Compare commits

...

44 Commits

Author SHA1 Message Date
Alex Gorrod
83b880922b If getting a handle lock only - don't propogate WT_NOTFOUND.
It's expected after the background drop changes.
Refs SERVER-18014
2015-04-27 17:42:46 +10:00
Michael Cahill
7ca9da48ef Merge pull request #1894 from wiredtiger/tree-discard-background
Discard trees from cache in the background
Conflicts:
	src/conn/conn_dhandle.c
2015-04-27 17:41:21 +10:00
Michael Cahill
26fd30aa13 Updated NEWS.MONGODB with additional changes. 2015-04-27 17:23:36 +10:00
Keith Bostic
04f9af1c4f If a file is marked cache-resident, it can never be evicted,
this fixes SERVER-18192.
2015-04-27 17:13:35 +10:00
Keith Bostic
1980475337 Rename WT_BTREE_NO_HAZARD to be WT_BTREE_IN_MEMORY, to better reflect
that it means permanently cache-resident.
2015-04-27 17:11:38 +10:00
Keith Bostic
96f85a0987 Always clear WT_BTREE_NO_HAZARD when cache_resident is not configured,
otherwise we can (1) create a file with cache_resident configured
(setting both no-eviction and no-hazard), (2) drop the file, then
(3) re-create the file without cache_resident configured (clearing only
no-eviction), and the result will be a file that can be evicted and
where we aren't maintaining hazard pointers, and nothing good is going
to happen after that.
2015-04-27 17:11:38 +10:00
Michael Cahill
7de835c634 Cherry-pick Windows fixes to the mongodb-3.0 branch. 2015-04-27 10:30:52 +10:00
Michael Cahill
06a7cc94c3 src/lsm/lsm_cursor.c:666:5: error: 'locked' may be used uninitialized in
this function
2015-04-26 08:04:59 +10:00
Michael Cahill
a0857dd1d1 Update NEWS.MONGODB with 3.0.3 changelog 2015-04-25 13:04:36 +10:00
Michael Cahill
54614a5162 Fixes after cherry-picking. 2015-04-25 13:02:42 +10:00
Alex Gorrod
80226f628c Fix a deadlock related to handle locking.
If one thread does a __wt_session_release_btree of a handle at the same time
as another thread does a __wt_session_get_btree both wanting exclusive access
to the file. It was possible for one thread to get the dhandle list lock and
wait on the handle lock, which another thread held the handle lock waiting for
the handle list lock.

Temporarily fix by doing a try-lock on the __wt_session_get_btree path, long term
the solution is to get rid of the __conn_dhandle_open_lock method, and split get
and lock into two passes.

Refs BF-716
2015-04-25 12:50:52 +10:00
Alex Gorrod
785fab985b Fix a deadlock related to LSM. There are cases where closing a file with
an existing checkpoint could self deadlock.

Check in the meta tracking whether we've already visited a checkpoint handle.

Refs WT-716
2015-04-25 12:50:52 +10:00
Keith Bostic
842dc0c582 Create a "clear a single walk" function and call it from __evict_clear_walks
and __evict_clear_all_walks, that way we can use the WT_WITH_DHANDLE macro,
and the comment about clearing the eviction reference before releasing the
page appears everywhere it should.
2015-04-25 12:50:52 +10:00
Michael Cahill
4a80c29d5f Clear session->dhandle so that future error messages don't dereference freed memory. 2015-04-25 12:50:52 +10:00
Michael Cahill
14f51cac04 Clear eviction walks in all trees before the eviction server exits. 2015-04-25 12:50:52 +10:00
Michael Cahill
0c93519d07 Move the "cache is empty" check to where the cache is destoyed, not when the eviction server is exiting. 2015-04-25 12:50:52 +10:00
Michael Cahill
6f09dd9dad Shut down the eviction server before closing file handles to avoid a race.
refs WT-1893
2015-04-25 12:50:52 +10:00
Susan LoVerso
1558eca283 Look for any number of non-data-changing log records to determine if we
can skip recovery. WT-1892
2015-04-25 12:50:52 +10:00
Michael Cahill
f141e80438 If an LSM search-near operation lands on a deleted item, make a copy of the key before stepping to the next record.
refs WT-1891
2015-04-25 12:50:52 +10:00
Susan LoVerso
25e3e44bd0 Remove use of unneeded tmp_fh. WT-1872 2015-04-25 12:50:52 +10:00
Alex Gorrod
04cb0cf13d Fix a deadlock in LSM with schema operations.
There is special code in LSM to co-ordinate schema operations on
tables (drop, rename, etc). The code does a dance dropping and
acquiring locks, to allow utility operations to drain for the tree
while waiting for it to close.

We were doing the dance with the schema and dhandle list locks. We
needed to include the table lock, or parallel cursor opens could block:

The cursor open is waiting for the table lock:
__wt_spin_lock src/include/mutex.i:175
__schema_add_table src/schema/schema_list.c:26
__wt_schema_get_table src/schema/schema_list.c:98
__wt_curtable_open src/third_party/wiredtiger/src/cursor/cur_table.c:875
__wt_open_cursor src/session/session_api.c:240

The LSM table drop is waiting for the schema lock:
__wt_spin_lock src/include/mutex.i:175
__lsm_tree_close src/lsm/lsm_tree.c:107
__wt_lsm_tree_drop src/lsm/lsm_tree.c:943
__wt_schema_drop src/schema/schema_drop.c:174
__drop_table src/schema/schema_drop.c:124
__wt_schema_drop src/schema/schema_drop.c:176
__session_drop src/session/session_api.c:528
2015-04-25 12:50:52 +10:00
Alex Gorrod
4d78cd2953 Fix a bug in LSM where updates with overwrite could be skipped.
References JIRA BF-829

The issue was that we were not looking in all chunks of an LSM
tree before deciding whether to apply an update (insert or remove).
2015-04-25 12:50:52 +10:00
Michael Cahill
ba56581ea8 Merge bulk cursor close with regular file cursor close: we were missing a decrement that kept bulk-loaded files pinned. 2015-04-25 12:50:52 +10:00
daveh86
6bc0d1c5a4 Allow forced eviction of pages already queued for eviction 2015-04-25 12:50:52 +10:00
Mark Benvenuto
231118be3d Disable fallocate on Windows since SetEndofFile does not
ignore truncation requests like POSIX fallocate.

Conflicts:
	src/os_win/os_fallocate.c
2015-04-25 12:50:50 +10:00
Keith Bostic
1029984691 When using ftruncate as the file-extension call, we must use WT_FH.size
as the starting point of the extension (not offset), and we have to read
the size value after acquiring the lock that prevents racing with writers.

Split the extension functionality out into a separate function and
try to make it a little simpler to understand.

Reference #1871.
2015-04-25 12:49:43 +10:00
Keith Bostic
c8ef70b364 The OS layer ftruncate() code sets the WT_FH file size, so when
ftruncate is used to extend the file, we skip over the bytes added to
the file during block allocation, and verify eventually fails because
there are unverified blocks in the file.

Reference #1871.
2015-04-25 12:49:43 +10:00
Alex Gorrod
96cef67303 Update file_manager=(close_idle_time=,close_scan_interval=) max values.
The old maximum for both was 1000, the new value is 100 thousand. Setting
such large values is not recommended, but there is no internal limitation
on them.

Conflicts:
	src/config/config_def.c
2015-04-25 12:49:41 +10:00
Keith Bostic
1749a9b293 Don't ignore sweeping entirely when we reach the open-file-count limit,
just ignore the in-use files.

Conflicts:
	src/conn/conn_sweep.c
2015-04-25 12:47:24 +10:00
Keith Bostic
018dd2bdbb Don't keep sweeping once we reach the minimum number of handles. 2015-04-25 12:46:08 +10:00
Keith Bostic
77094e91de const: At condition ret == -1, the value of ret must be equal to -1.
CID 72082 (#1 of 1): Redundant test (DEADCODE)
dead_error_condition: The condition ret == -1 must be true.
2015-04-25 12:46:08 +10:00
Keith Bostic
7c8b1da6a1 Remove unnecessary atomic operation, fixing CID 69810 along the way.
CID 69810 (#1 of 1): Parse warning (PW.CONVERSION_TO_POINTER_ADDS_BITS)
1. conversion_to_pointer_adds_bits: conversion from smaller integer to pointer
2015-04-25 12:46:08 +10:00
Keith Bostic
7f68002fbd CID 72073 (#1 of 1): Redundant test (DEADCODE)
dead_error_condition: The condition session != NULL must be true.
2015-04-25 12:46:08 +10:00
Keith Bostic
7253454660 Remove unnecessary error labels, fixing SERVER-17948 along the way.
Coverity analysis defect 72088: Redundant test:
File: /src/third_party/wiredtiger/src/log/log.c
Function __log_decompress
/src/third_party/wiredtiger/src/log/log.c, line: 363
At condition "ret != 0", the value of "ret" must be equal to 0.
2015-04-25 12:46:08 +10:00
Keith Bostic
87479274cd SERVER-17954: Coverity analysis defect 72085: Redundant test
At condition "ret != 0", the value of "ret" cannot be equal to 0.
2015-04-25 12:46:08 +10:00
Keith Bostic
1a32cfeaf3 Fix places where we were using the wrong link for traversing hash buckets. 2015-04-25 12:46:08 +10:00
Mark Benvenuto
4f3da019e8 Enable test/fops for Windows, and add to CI
- Added Windows shim for gettimeofday
2015-04-25 12:46:08 +10:00
Mark Benvenuto
7017411290 test/format for Windows
- Fixed an issue where fallocate was setting fh->size (incorrectly copied from ftruncate implementation)
2015-04-25 12:46:08 +10:00
Keith Bostic
3574a631f1 Rename file_manager.open_handles to file_manager.close_handle_minimum,
try and match existing naming for file_manager configuration options.

Conflicts:
	src/config/config_def.c
2015-04-25 12:46:05 +10:00
Keith Bostic
0c667cb31e Don't sweep unless there's a reason we need to close file handles.
Add a new configuration option, file_manager.open_handles that sets
a minimum number of file handles that must be open before sweep runs,
default is 250. Reference #1856, SERVER-17907.

Conflicts:
	src/config/config_def.c
2015-04-25 12:35:38 +10:00
Keith Bostic
76e1a33f0e If the underlying split buffer grows, existing boundary references may
point into freed memory, switch the boundary "first byte" field from a
pointer to a buffer offset to avoid the problem, reference #1852.
2015-04-25 12:34:01 +10:00
Susan LoVerso
dceb8b2086 Reset eol if we continue. Return NOTFOUND if checksum mismatch. #1840 2015-04-25 12:34:01 +10:00
Susan LoVerso
fccd9f8cbc Modify log_scan callback args to send in next LSN. #1837 2015-04-25 12:34:01 +10:00
Susan LoVerso
de0272bc87 Set checkpoint LSN to existing log record. #1700 2015-04-25 12:31:48 +10:00
63 changed files with 1468 additions and 531 deletions

View File

@@ -1,3 +1,336 @@
3.0.3, Apr 25 2015
------------------
commit 04f9af1c4fb9a9287786948a818c88eef68cb8a4
Author: Keith Bostic <keith@wiredtiger.com>
Date: Thu Apr 23 17:52:42 2015 -0400
If a file is marked cache-resident, it can never be evicted,
this fixes SERVER-18192.
commit 96f85a0987217074dd91de689a499cd0a4c16bbb
Author: Keith Bostic <keith@wiredtiger.com>
Date: Thu Apr 23 15:08:06 2015 -0400
Always clear WT_BTREE_NO_HAZARD when cache_resident is not configured,
otherwise we can (1) create a file with cache_resident configured
(setting both no-eviction and no-hazard), (2) drop the file, then
(3) re-create the file without cache_resident configured (clearing only
no-eviction), and the result will be a file that can be evicted and
where we aren't maintaining hazard pointers, and nothing good is going
to happen after that.
commit 9166bca3d07d6592c1426c2c33bd56b6be0667e0
Author: Alex Gorrod <alexg@wiredtiger.com>
Date: Thu Apr 23 05:43:35 2015 +0000
Fix a deadlock related to handle locking.
If one thread does a __wt_session_release_btree of a handle at the same time
as another thread does a __wt_session_get_btree both wanting exclusive access
to the file. It was possible for one thread to get the dhandle list lock and
wait on the handle lock, which another thread held the handle lock waiting for
the handle list lock.
Temporarily fix by doing a try-lock on the __wt_session_get_btree path, long term
the solution is to get rid of the __conn_dhandle_open_lock method, and split get
and lock into two passes.
Refs BF-716
commit 3e254079484ce35a3cb70c48478c69defdb8f012
Author: Alex Gorrod <alexg@wiredtiger.com>
Date: Thu Apr 23 05:42:08 2015 +0000
Fix a deadlock related to LSM. There are cases where closing a file with
an existing checkpoint could self deadlock.
Check in the meta tracking whether we've already visited a checkpoint handle.
Refs WT-716
commit 610f629949726b16f938ded85188bb6a21820f7e
Author: Keith Bostic <keith@wiredtiger.com>
Date: Mon Apr 20 10:40:54 2015 -0400
Create a "clear a single walk" function and call it from __evict_clear_walks
and __evict_clear_all_walks, that way we can use the WT_WITH_DHANDLE macro,
and the comment about clearing the eviction reference before releasing the
page appears everywhere it should.
commit 3eceb85ce623dcce9273f7b722bb64f509dbe24d
Author: Michael Cahill <michael.cahill@mongodb.com>
Date: Mon Apr 20 16:15:41 2015 +1000
Clear session->dhandle so that future error messages don't dereference freed memory.
commit 23ce8bae4d6507d6b6d599cb73a26a6c856cce98
Author: Michael Cahill <michael.cahill@mongodb.com>
Date: Mon Apr 20 15:48:29 2015 +1000
Clear eviction walks in all trees before the eviction server exits.
commit a4bce0e0bc05d528f118b645d0d1915db00cdcf3
Author: Michael Cahill <michael.cahill@mongodb.com>
Date: Mon Apr 20 14:11:57 2015 +1000
Move the "cache is empty" check to where the cache is destoyed, not when the eviction server is exiting.
commit c8fdd9c676c4a24bee6328a56cf7fd074cd045e0
Author: Michael Cahill <michael.cahill@mongodb.com>
Date: Mon Apr 20 13:25:36 2015 +1000
Shut down the eviction server before closing file handles to avoid a race.
refs WT-1893
commit abb0bb80cc6dce29b8db61c6747c228c2701ae5a
Author: Susan LoVerso <sue@wiredtiger.com>
Date: Fri Apr 17 10:49:41 2015 -0400
Look for any number of non-data-changing log records to determine if we
can skip recovery. WT-1892
commit ee02428d1fdf1118c482688ec870a9da69bee45a
Author: Michael Cahill <michael.cahill@mongodb.com>
Date: Fri Apr 17 12:28:52 2015 +1000
If an LSM search-near operation lands on a deleted item, make a copy of the key before stepping to the next record.
refs WT-1891
commit 54e856d57da291c5f84da6d0d0ab56280d9956dc
Author: Susan LoVerso <sue@wiredtiger.com>
Date: Thu Apr 16 12:22:31 2015 -0400
Remove use of unneeded tmp_fh. WT-1872
commit 6a32905c397e57643b15e5a3038dbcb99a8a8dc8
Author: Alex Gorrod <alexg@wiredtiger.com>
Date: Thu Apr 16 06:18:31 2015 +0000
Fix a deadlock in LSM with schema operations.
There is special code in LSM to co-ordinate schema operations on
tables (drop, rename, etc). The code does a dance dropping and
acquiring locks, to allow utility operations to drain for the tree
while waiting for it to close.
We were doing the dance with the schema and dhandle list locks. We
needed to include the table lock, or parallel cursor opens could block:
The cursor open is waiting for the table lock:
__wt_spin_lock src/include/mutex.i:175
__schema_add_table src/schema/schema_list.c:26
__wt_schema_get_table src/schema/schema_list.c:98
__wt_curtable_open src/third_party/wiredtiger/src/cursor/cur_table.c:875
__wt_open_cursor src/session/session_api.c:240
The LSM table drop is waiting for the schema lock:
__wt_spin_lock src/include/mutex.i:175
__lsm_tree_close src/lsm/lsm_tree.c:107
__wt_lsm_tree_drop src/lsm/lsm_tree.c:943
__wt_schema_drop src/schema/schema_drop.c:174
__drop_table src/schema/schema_drop.c:124
__wt_schema_drop src/schema/schema_drop.c:176
__session_drop src/session/session_api.c:528
commit 790646183cc5dd056bbf95c4563c20c51602a808
Author: Alex Gorrod <alexg@wiredtiger.com>
Date: Thu Apr 16 04:11:36 2015 +0000
Fix a bug in LSM where updates with overwrite could be skipped.
References JIRA BF-829
The issue was that we were not looking in all chunks of an LSM
tree before deciding whether to apply an update (insert or remove).
commit b9e2e76511a24505014369aaf0e1ec286e9c473d
Author: Michael Cahill <michael.cahill@mongodb.com>
Date: Tue Apr 14 16:30:31 2015 +1000
Merge bulk cursor close with regular file cursor close: we were missing a decrement that kept bulk-loaded files pinned.
commit ed3158e71f0bd2716269a5771fd162b60b9a1cc0
Author: daveh86 <howsdav@gmail.com>
Date: Mon Apr 13 12:59:29 2015 +1000
Allow forced eviction of pages already queued for eviction
commit 9c83351f63afc2e032e492e3030df4f3b1cd6883
Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
Date: Sun Apr 12 19:02:32 2015 -0400
Disable fallocate on Windows since SetEndofFile does not
ignore truncation requests like POSIX fallocate.
commit 61a7d81ad26db1f2bfb65258d9b8ae4a4ca25b34
Author: Keith Bostic <keith@wiredtiger.com>
Date: Sun Apr 12 12:44:05 2015 -0400
When using ftruncate as the file-extension call, we must use WT_FH.size
as the starting point of the extension (not offset), and we have to read
the size value after acquiring the lock that prevents racing with writers.
Split the extension functionality out into a separate function and
try to make it a little simpler to understand.
Reference #1871.
commit f26f1c1c59d5cbbc8f5f543215d8fc636e7175d2
Author: Keith Bostic <keith@wiredtiger.com>
Date: Sun Apr 12 10:16:34 2015 -0400
The OS layer ftruncate() code sets the WT_FH file size, so when
ftruncate is used to extend the file, we skip over the bytes added to
the file during block allocation, and verify eventually fails because
there are unverified blocks in the file.
Reference #1871.
commit d20f20f1ac324030986b3ee23e1edf96486c92b4
Author: Alex Gorrod <alexg@wiredtiger.com>
Date: Fri Apr 10 05:59:04 2015 +0000
Update file_manager=(close_idle_time=,close_scan_interval=) max values.
The old maximum for both was 1000, the new value is 100 thousand. Setting
such large values is not recommended, but there is no internal limitation
on them.
commit c36a3308f685d3b85efe9ac6ee0835f0974574b4
Author: Keith Bostic <keith@wiredtiger.com>
Date: Thu Apr 9 14:32:40 2015 -0400
Don't ignore sweeping entirely when we reach the open-file-count limit,
just ignore the in-use files.
commit 46ef2555bbc51ce6453536e72202782be4949855
Author: Keith Bostic <keith@wiredtiger.com>
Date: Thu Apr 9 11:06:50 2015 -0400
Don't keep sweeping once we reach the minimum number of handles.
commit 1fdfcc62726d25a7cceeeefff174a8e1bf9f9e67
Author: Keith Bostic <keith@wiredtiger.com>
Date: Thu Apr 9 12:39:03 2015 -0400
const: At condition ret == -1, the value of ret must be equal to -1.
CID 72082 (#1 of 1): Redundant test (DEADCODE)
dead_error_condition: The condition ret == -1 must be true.
commit d04e3c25d46a5c4426e1c6d4881cd9e250014931
Author: Keith Bostic <keith@wiredtiger.com>
Date: Thu Apr 9 12:19:24 2015 -0400
Remove unnecessary atomic operation, fixing CID 69810 along the way.
CID 69810 (#1 of 1): Parse warning (PW.CONVERSION_TO_POINTER_ADDS_BITS)
1. conversion_to_pointer_adds_bits: conversion from smaller integer to pointer
commit d585bdab980508e590cf70508f053182c556d6f2
Author: Keith Bostic <keith@wiredtiger.com>
Date: Thu Apr 9 12:00:04 2015 -0400
CID 72073 (#1 of 1): Redundant test (DEADCODE)
dead_error_condition: The condition session != NULL must be true.
commit 21907f9193e30f51a59fcbaddfbc46cb7732d3b7
Author: Keith Bostic <keith@wiredtiger.com>
Date: Thu Apr 9 11:52:13 2015 -0400
Remove unnecessary error labels, fixing SERVER-17948 along the way.
Coverity analysis defect 72088: Redundant test:
File: /src/third_party/wiredtiger/src/log/log.c
Function __log_decompress
/src/third_party/wiredtiger/src/log/log.c, line: 363
At condition "ret != 0", the value of "ret" must be equal to 0.
commit a29f4d2f40eee784950147af848fdbf277328b7f
Author: Keith Bostic <keith@wiredtiger.com>
Date: Thu Apr 9 11:31:11 2015 -0400
SERVER-17954: Coverity analysis defect 72085: Redundant test
At condition "ret != 0", the value of "ret" cannot be equal to 0.
commit 1298fd6db4f0c1d6133b3e547b2e2db51ec4a708
Author: Keith Bostic <keith@wiredtiger.com>
Date: Wed Apr 8 14:38:49 2015 -0400
Fix places where we were using the wrong link for traversing hash buckets.
commit 7d0e5fe3dfa39f7ff377a1d4660bc2bc36dc0df8
Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
Date: Wed Apr 8 10:19:33 2015 -0400
Enable test/fops for Windows, and add to CI
- Added Windows shim for gettimeofday
commit c6270b677499525067d5d729a6fbdce6ad2f533a
Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
Date: Wed Apr 8 13:28:41 2015 -0400
test/format for Windows
- Fixed an issue where fallocate was setting fh->size (incorrectly copied from ftruncate implementation)
commit fac74b4665d6dfa3aebecf741c914fd1678fa628
Author: Keith Bostic <keith@wiredtiger.com>
Date: Wed Apr 8 08:47:06 2015 -0400
Rename file_manager.open_handles to file_manager.close_handle_minimum,
try and match existing naming for file_manager configuration options.
commit 709cc8d7ac85d31aeae2387b192092910e6cf854
Author: Keith Bostic <keith@wiredtiger.com>
Date: Wed Apr 8 08:09:23 2015 -0400
Don't sweep unless there's a reason we need to close file handles.
Add a new configuration option, file_manager.open_handles that sets
a minimum number of file handles that must be open before sweep runs,
default is 250. Reference #1856, SERVER-17907.
commit b79dcdebf0b1987b59fa70e50c8c61e5e0a64e64
Author: Keith Bostic <keith@wiredtiger.com>
Date: Tue Apr 7 11:52:12 2015 -0400
If the underlying split buffer grows, existing boundary references may
point into freed memory, switch the boundary "first byte" field from a
pointer to a buffer offset to avoid the problem, reference #1852.
commit 684fd71475cbc6b15290945af0160fac0313ad6b
Author: Susan LoVerso <sue@wiredtiger.com>
Date: Wed Apr 1 15:44:42 2015 -0400
Reset eol if we continue. Return NOTFOUND if checksum mismatch. #1840
commit 9c29e0f13268c03038704372c069353c81357791
Author: Susan LoVerso <sue@wiredtiger.com>
Date: Tue Mar 31 16:49:47 2015 -0400
Modify log_scan callback args to send in next LSN. #1837
commit 003e6c3598408c7670f65a8720622c38fdaf148d
Author: Mark Benvenuto <mark.benvenuto@mongodb.com>
Date: Thu Mar 26 11:17:57 2015 -0400
Use Standard C type uint64_t for zlib
commit e12b9e0b005da7364330f4d3409256ded26ba90d
Author: Keith Bostic <keith@wiredtiger.com>
Date: Thu Mar 26 18:31:15 2015 -0400
Add checks for Windows builds: _M_AMD64 and _MSC_VER.
commit a609c82bfa6fbf80cd5fb853e1d97c16878a0180
Author: Susan LoVerso <sue@wiredtiger.com>
Date: Mon Mar 23 15:25:23 2015 -0400
Set checkpoint LSN to existing log record. #1700
3.0.2, Mar 26 2015
------------------

View File

@@ -338,11 +338,12 @@ t = env.Program("t_huge",
LIBS=[wtlib] + wtlibs)
Default(t)
#env.Program("t_fops",
#["test/fops/file.c",
#"test/fops/fops.c",
#"test/fops/t.c"],
#LIBS=[wtlib])
t = env.Program("t_fops",
["test/fops/file.c",
"test/fops/fops.c",
"test/fops/t.c"],
LIBS=[wtlib, shim] + wtlibs)
Default(t)
if useBdb:
benv = env.Clone()

7
dist/api_data.py vendored
View File

@@ -374,10 +374,13 @@ connection_runtime_config = [
type='category', subconfig=[
Config('close_idle_time', '30', r'''
amount of time in seconds a file handle needs to be idle
before attempting to close it''', min=1, max=1000),
before attempting to close it''', min=1, max=100000),
Config('close_handle_minimum', '250', r'''
number of handles open before the file manager will look for handles
to close'''),
Config('close_scan_interval', '10', r'''
interval in seconds at which to check for files that are
inactive and close them''', min=1, max=1000)
inactive and close them''', min=1, max=100000),
]),
Config('lsm_manager', '', r'''
configure database wide options for LSM tree management''',

View File

@@ -75,8 +75,8 @@ retry:
*/
my_slot = my_consume % async->async_qsize;
prev_slot = last_consume % async->async_qsize;
*op = (WT_ASYNC_OP_IMPL*)WT_ATOMIC_STORE8(
async->async_queue[my_slot], NULL);
*op = async->async_queue[my_slot];
async->async_queue[my_slot] = NULL;
WT_ASSERT(session, async->cur_queue > 0);
WT_ASSERT(session, *op != NULL);
@@ -105,12 +105,10 @@ retry:
static int
__async_flush_wait(WT_SESSION_IMPL *session, WT_ASYNC *async, uint64_t my_gen)
{
WT_DECL_RET;
while (async->flush_state == WT_ASYNC_FLUSHING &&
async->flush_gen == my_gen)
WT_ERR(__wt_cond_wait(session, async->flush_cond, 10000));
err: return (ret);
WT_RET(__wt_cond_wait(session, async->flush_cond, 10000));
return (0);
}
/*

View File

@@ -137,7 +137,7 @@ __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block,
WT_ERR(__wt_verbose(session, WT_VERB_CHECKPOINT,
"truncate file to %" PRIuMAX, (uintmax_t)ci->file_size));
WT_ERR_BUSY_OK(
__wt_ftruncate(session, block->fh, ci->file_size));
__wt_block_truncate(session, block->fh, ci->file_size));
}
if (0) {
@@ -185,7 +185,7 @@ __wt_block_checkpoint_unload(
* an open checkpoint on the file), that's OK.
*/
WT_TRET_BUSY_OK(
__wt_ftruncate(session, block->fh, block->fh->size));
__wt_block_truncate(session, block->fh, block->fh->size));
__wt_spin_lock(session, &block->live_lock);
__wt_block_ckpt_destroy(session, &block->live);

View File

@@ -1342,7 +1342,7 @@ __wt_block_extlist_truncate(
WT_RET(__wt_verbose(session, WT_VERB_BLOCK,
"truncate file from %" PRIdMAX " to %" PRIdMAX,
(intmax_t)orig, (intmax_t)size));
WT_RET_BUSY_OK(__wt_ftruncate(session, block->fh, size));
WT_RET_BUSY_OK(__wt_block_truncate(session, block->fh, size));
return (0);
}

View File

@@ -25,7 +25,7 @@ __wt_block_manager_truncate(
WT_RET(__wt_open(session, filename, 0, 0, WT_FILE_TYPE_DATA, &fh));
/* Truncate the file. */
WT_ERR(__wt_ftruncate(session, fh, (wt_off_t)0));
WT_ERR(__wt_block_truncate(session, fh, (wt_off_t)0));
/* Write out the file's meta-data. */
WT_ERR(__wt_desc_init(session, fh, allocsize));

View File

@@ -36,7 +36,7 @@ __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
if (block->fh->size > allocsize) {
len = (block->fh->size / allocsize) * allocsize;
if (len != block->fh->size)
WT_RET(__wt_ftruncate(session, block->fh, len));
WT_RET(__wt_block_truncate(session, block->fh, len));
} else
len = allocsize;
block->live.file_size = len;

View File

@@ -146,7 +146,7 @@ __verify_last_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckpt)
ci = &_ci;
WT_RET(__wt_block_ckpt_init(session, ci, ckpt->name));
WT_ERR(__wt_block_buffer_to_ckpt(session, block, ckpt->raw.data, ci));
WT_ERR(__wt_ftruncate(session, block->fh, ci->file_size));
WT_ERR(__wt_block_truncate(session, block->fh, ci->file_size));
err: __wt_block_ckpt_destroy(session, ci);
return (ret);

View File

@@ -20,6 +20,124 @@ __wt_block_header(WT_BLOCK *block)
return ((u_int)WT_BLOCK_HEADER_SIZE);
}
/*
* __wt_block_truncate --
* Truncate the file.
*/
int
__wt_block_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
{
WT_RET(__wt_ftruncate(session, fh, len));
fh->size = fh->extend_size = len;
return (0);
}
/*
* __wt_block_extend --
* Extend the file.
*/
static inline int
__wt_block_extend(WT_SESSION_IMPL *session, WT_BLOCK *block,
WT_FH *fh, wt_off_t offset, size_t align_size, int *release_lockp)
{
WT_DECL_RET;
int locked;
/*
* The locking in this function is messy: the live system is locked when
* we're called, by definition, but that lock may have been acquired by
* our our caller or our caller's caller. If it's our caller's lock and
* we can unlock it before returning (either before extending the file
* or afterward, depending on the call used), then release_lock is set.
*
* If we unlock, but then find out we need a lock after all, re-acquire
* the lock (and set release_lock so our caller knows to release it).
*/
locked = 1;
/* If not configured to extend the file, we're done. */
if (fh->extend_len == 0)
return (0);
/*
* Extend the file in chunks. We want to limit the number of threads
* extending the file at the same time, so choose the one thread that's
* crossing the extended boundary. We don't extend newly created files,
* and it's theoretically possible we might wait so long our extension
* of the file is passed by another thread writing single blocks, that's
* why there's a check in case the extended file size becomes too small:
* if the file size catches up, every thread tries to extend it.
*/
if (fh->extend_size > fh->size &&
(offset > fh->extend_size ||
offset + fh->extend_len + (wt_off_t)align_size < fh->extend_size))
return (0);
/*
* File extension may require locking: some variants of the system call
* used to extend the file initialize the extended space. If a writing
* thread races with the extending thread, the extending thread might
* overwrite already written data, and that would be very, very bad.
*
* Some variants of the system call to extend the file fail at run-time
* based on the filesystem type, fall back to ftruncate in that case,
* and remember that ftruncate requires locking.
*/
if (fh->fallocate_available != WT_FALLOCATE_NOT_AVAILABLE) {
/*
* Release any locally acquired lock if not needed to extend the
* file, extending the file may require updating on-disk file's
* metadata, which can be slow. (It may be a bad idea to
* configure for file extension on systems that require locking
* over the extend call.)
*/
if (!fh->fallocate_requires_locking && *release_lockp) {
*release_lockp = locked = 0;
__wt_spin_unlock(session, &block->live_lock);
}
/*
* Extend the file: there's a race between setting the value of
* extend_size and doing the extension, but it should err on the
* side of extend_size being smaller than the actual file size,
* and that's OK, we simply may do another extension sooner than
* otherwise.
*/
fh->extend_size = fh->size + fh->extend_len * 2;
if ((ret = __wt_fallocate(
session, fh, fh->size, fh->extend_len * 2)) == 0)
return (0);
if (ret != ENOTSUP)
return (ret);
}
/*
* We may have a caller lock or a locally acquired lock, but we need a
* lock to call ftruncate.
*/
if (!locked) {
__wt_spin_lock(session, &block->live_lock);
*release_lockp = 1;
}
/*
* The underlying truncate call initializes allocated space, reset the
* extend length after locking so we don't overwrite already-written
* blocks.
*/
fh->extend_size = fh->size + fh->extend_len * 2;
/*
* The truncate might fail if there's a mapped file (in other words, if
* there's an open checkpoint on the file), that's OK.
*/
if ((ret = __wt_ftruncate(session, fh, fh->extend_size)) == EBUSY)
ret = 0;
return (ret);
}
/*
* __wt_block_write_size --
* Return the buffer size required to write a block.
@@ -86,7 +204,6 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
blk = WT_BLOCK_HEADER_REF(buf->mem);
fh = block->fh;
local_locked = 0;
/* Buffers should be aligned for writing. */
if (!F_ISSET(buf, WT_ITEM_ALIGNED)) {
@@ -143,81 +260,26 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
blk->cksum = __wt_cksum(
buf->mem, data_cksum ? align_size : WT_BLOCK_COMPRESS_SKIP);
/* Pre-allocate some number of extension structures. */
WT_RET(__wt_block_ext_prealloc(session, 5));
/*
* Acquire a lock, if we don't already hold one.
* Allocate space for the write, and optionally extend the file (note
* the block-extend function may release the lock).
* Release any locally acquired lock.
*/
local_locked = 0;
if (!caller_locked) {
WT_RET(__wt_block_ext_prealloc(session, 5));
__wt_spin_lock(session, &block->live_lock);
local_locked = 1;
}
ret = __wt_block_alloc(session, block, &offset, (wt_off_t)align_size);
/*
* Extend the file in chunks. We want to limit the number of threads
* extending the file at the same time, so choose the one thread that's
* crossing the extended boundary. We don't extend newly created files,
* and it's theoretically possible we might wait so long our extension
* of the file is passed by another thread writing single blocks, that's
* why there's a check in case the extended file size becomes too small:
* if the file size catches up, every thread tries to extend it.
*
* File extension may require locking: some variants of the system call
* used to extend the file initialize the extended space. If a writing
* thread races with the extending thread, the extending thread might
* overwrite already written data, and that would be very, very bad.
*
* Some variants of the system call to extend the file fail at run-time
* based on the filesystem type, fall back to ftruncate in that case,
* and remember that ftruncate requires locking.
*/
if (ret == 0 &&
fh->extend_len != 0 &&
(fh->extend_size <= fh->size ||
(offset + fh->extend_len <= fh->extend_size &&
offset +
fh->extend_len + (wt_off_t)align_size >= fh->extend_size))) {
fh->extend_size = offset + fh->extend_len * 2;
if (fh->fallocate_available != WT_FALLOCATE_NOT_AVAILABLE) {
/*
* Release any locally acquired lock if it's not needed
* to extend the file, extending the file might require
* updating file metadata, which can be slow. (It may be
* a bad idea to configure for file extension on systems
* that require locking over the extend call.)
*/
if (!fh->fallocate_requires_locking && local_locked) {
__wt_spin_unlock(session, &block->live_lock);
local_locked = 0;
}
/* Extend the file. */
if ((ret = __wt_fallocate(session,
fh, offset, fh->extend_len * 2)) == ENOTSUP) {
ret = 0;
goto extend_truncate;
}
} else {
extend_truncate: /*
* We may have a caller lock or a locally acquired lock,
* but we need a lock to call ftruncate.
*/
if (!caller_locked && local_locked == 0) {
__wt_spin_lock(session, &block->live_lock);
local_locked = 1;
}
/*
* The truncate might fail if there's a file mapping
* (if there's an open checkpoint on the file), that's
* OK.
*/
if ((ret = __wt_ftruncate(
session, fh, offset + fh->extend_len * 2)) == EBUSY)
ret = 0;
}
}
/* Release any locally acquired lock. */
if (local_locked) {
if (ret == 0)
ret = __wt_block_extend(
session, block, fh, offset, align_size, &local_locked);
if (local_locked)
__wt_spin_unlock(session, &block->live_lock);
local_locked = 0;
}
WT_RET(ret);
/* Write the block. */

View File

@@ -254,13 +254,13 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
/* Eviction; the metadata file is never evicted. */
if (WT_IS_METADATA(btree->dhandle))
F_SET(btree, WT_BTREE_NO_EVICTION | WT_BTREE_NO_HAZARD);
F_SET(btree, WT_BTREE_IN_MEMORY | WT_BTREE_NO_EVICTION);
else {
WT_RET(__wt_config_gets(session, cfg, "cache_resident", &cval));
if (cval.val)
F_SET(btree, WT_BTREE_NO_EVICTION | WT_BTREE_NO_HAZARD);
F_SET(btree, WT_BTREE_IN_MEMORY | WT_BTREE_NO_EVICTION);
else
F_CLR(btree, WT_BTREE_NO_EVICTION);
F_CLR(btree, WT_BTREE_IN_MEMORY | WT_BTREE_NO_EVICTION);
}
/* Checksums */
@@ -507,8 +507,11 @@ __wt_btree_evictable(WT_SESSION_IMPL *session, int on)
btree = S2BT(session);
/* The metadata file is never evicted. */
if (on && !WT_IS_METADATA(btree->dhandle))
/* Permanently cache-resident files can never be evicted. */
if (F_ISSET(btree, WT_BTREE_IN_MEMORY))
return;
if (on)
F_CLR(btree, WT_BTREE_NO_EVICTION);
else
F_SET(btree, WT_BTREE_NO_EVICTION);

View File

@@ -359,9 +359,8 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip,
tp->symbol = (uint32_t)symbol;
tp->frequency = (uint32_t)frequency;
}
ret = ferror(fp) ? WT_ERROR : 0;
if (ret == EOF)
ret = 0;
*entriesp = lineno - 1;
*tablep = table;

View File

@@ -60,8 +60,11 @@ static const WT_CONFIG_CHECK confchk_eviction_subconfigs[] = {
};
static const WT_CONFIG_CHECK confchk_file_manager_subconfigs[] = {
{ "close_idle_time", "int", NULL, "min=1,max=1000", NULL },
{ "close_scan_interval", "int", NULL, "min=1,max=1000", NULL },
{ "close_handle_minimum", "string", NULL, NULL, NULL },
{ "close_idle_time", "int", NULL, "min=1,max=100000", NULL },
{ "close_scan_interval", "int",
NULL, "min=1,max=100000",
NULL },
{ NULL, NULL, NULL, NULL, NULL }
};
@@ -662,10 +665,10 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"name=\"WiredTigerCheckpoint\",wait=0),error_prefix=,"
"eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80,"
"eviction_target=80,eviction_trigger=95,"
"file_manager=(close_idle_time=30,close_scan_interval=10),"
"lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,"
"shared_cache=(chunk=10MB,name=,reserve=0,size=500MB),"
"statistics=none,statistics_log=(on_close=0,"
"file_manager=(close_handle_minimum=250,close_idle_time=30,"
"close_scan_interval=10),lsm_manager=(merge=,worker_thread_max=4)"
",lsm_merge=,shared_cache=(chunk=10MB,name=,reserve=0,size=500MB)"
",statistics=none,statistics_log=(on_close=0,"
"path=\"WiredTigerStat.%d.%H\",sources=,"
"timestamp=\"%b %d %H:%M:%S\",wait=0),verbose=",
confchk_connection_reconfigure
@@ -792,13 +795,13 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"config_base=,create=0,direct_io=,error_prefix=,"
"eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80,"
"eviction_target=80,eviction_trigger=95,exclusive=0,extensions=,"
"file_extend=,file_manager=(close_idle_time=30,"
"close_scan_interval=10),hazard_max=1000,log=(archive=,"
"compressor=,enabled=0,file_max=100MB,path=,prealloc=,recover=on)"
",lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,mmap=,"
"multiprocess=0,session_max=100,session_scratch_max=2MB,"
"shared_cache=(chunk=10MB,name=,reserve=0,size=500MB),"
"statistics=none,statistics_log=(on_close=0,"
"file_extend=,file_manager=(close_handle_minimum=250,"
"close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
"log=(archive=,compressor=,enabled=0,file_max=100MB,path=,"
"prealloc=,recover=on),lsm_manager=(merge=,worker_thread_max=4),"
"lsm_merge=,mmap=,multiprocess=0,session_max=100,"
"session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,reserve=0"
",size=500MB),statistics=none,statistics_log=(on_close=0,"
"path=\"WiredTigerStat.%d.%H\",sources=,"
"timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0"
",method=fsync),use_environment_priv=0,verbose=",
@@ -811,13 +814,13 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"config_base=,create=0,direct_io=,error_prefix=,"
"eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80,"
"eviction_target=80,eviction_trigger=95,exclusive=0,extensions=,"
"file_extend=,file_manager=(close_idle_time=30,"
"close_scan_interval=10),hazard_max=1000,log=(archive=,"
"compressor=,enabled=0,file_max=100MB,path=,prealloc=,recover=on)"
",lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,mmap=,"
"multiprocess=0,session_max=100,session_scratch_max=2MB,"
"shared_cache=(chunk=10MB,name=,reserve=0,size=500MB),"
"statistics=none,statistics_log=(on_close=0,"
"file_extend=,file_manager=(close_handle_minimum=250,"
"close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
"log=(archive=,compressor=,enabled=0,file_max=100MB,path=,"
"prealloc=,recover=on),lsm_manager=(merge=,worker_thread_max=4),"
"lsm_merge=,mmap=,multiprocess=0,session_max=100,"
"session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,reserve=0"
",size=500MB),statistics=none,statistics_log=(on_close=0,"
"path=\"WiredTigerStat.%d.%H\",sources=,"
"timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0"
",method=fsync),use_environment_priv=0,verbose=,version=(major=0,"
@@ -830,13 +833,13 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=,"
"direct_io=,error_prefix=,eviction=(threads_max=1,threads_min=1),"
"eviction_dirty_target=80,eviction_target=80,eviction_trigger=95,"
"extensions=,file_extend=,file_manager=(close_idle_time=30,"
"close_scan_interval=10),hazard_max=1000,log=(archive=,"
"compressor=,enabled=0,file_max=100MB,path=,prealloc=,recover=on)"
",lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,mmap=,"
"multiprocess=0,session_max=100,session_scratch_max=2MB,"
"shared_cache=(chunk=10MB,name=,reserve=0,size=500MB),"
"statistics=none,statistics_log=(on_close=0,"
"extensions=,file_extend=,file_manager=(close_handle_minimum=250,"
"close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
"log=(archive=,compressor=,enabled=0,file_max=100MB,path=,"
"prealloc=,recover=on),lsm_manager=(merge=,worker_thread_max=4),"
"lsm_merge=,mmap=,multiprocess=0,session_max=100,"
"session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,reserve=0"
",size=500MB),statistics=none,statistics_log=(on_close=0,"
"path=\"WiredTigerStat.%d.%H\",sources=,"
"timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0"
",method=fsync),verbose=,version=(major=0,minor=0)",
@@ -848,13 +851,13 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=,"
"direct_io=,error_prefix=,eviction=(threads_max=1,threads_min=1),"
"eviction_dirty_target=80,eviction_target=80,eviction_trigger=95,"
"extensions=,file_extend=,file_manager=(close_idle_time=30,"
"close_scan_interval=10),hazard_max=1000,log=(archive=,"
"compressor=,enabled=0,file_max=100MB,path=,prealloc=,recover=on)"
",lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,mmap=,"
"multiprocess=0,session_max=100,session_scratch_max=2MB,"
"shared_cache=(chunk=10MB,name=,reserve=0,size=500MB),"
"statistics=none,statistics_log=(on_close=0,"
"extensions=,file_extend=,file_manager=(close_handle_minimum=250,"
"close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
"log=(archive=,compressor=,enabled=0,file_max=100MB,path=,"
"prealloc=,recover=on),lsm_manager=(merge=,worker_thread_max=4),"
"lsm_merge=,mmap=,multiprocess=0,session_max=100,"
"session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,reserve=0"
",size=500MB),statistics=none,statistics_log=(on_close=0,"
"path=\"WiredTigerStat.%d.%H\",sources=,"
"timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0"
",method=fsync),verbose=",

View File

@@ -215,6 +215,22 @@ __wt_cache_destroy(WT_SESSION_IMPL *session)
if (cache == NULL)
return (0);
/* The cache should be empty at this point. Complain if not. */
if (cache->pages_inmem != cache->pages_evict)
__wt_errx(session,
"cache server: exiting with %" PRIu64 " pages in "
"memory and %" PRIu64 " pages evicted",
cache->pages_inmem, cache->pages_evict);
if (cache->bytes_inmem != 0)
__wt_errx(session,
"cache server: exiting with %" PRIu64 " bytes in memory",
cache->bytes_inmem);
if (cache->bytes_dirty != 0 || cache->pages_dirty != 0)
__wt_errx(session,
"cache server: exiting with %" PRIu64
" bytes dirty and %" PRIu64 " pages dirty",
cache->bytes_dirty, cache->pages_dirty);
WT_TRET(__wt_cond_destroy(session, &cache->evict_cond));
WT_TRET(__wt_cond_destroy(session, &cache->evict_waiter_cond));
__wt_spin_destroy(session, &cache->evict_lock);

View File

@@ -74,16 +74,19 @@ __conn_dhandle_open_lock(
*
* Wait for a read lock if we want exclusive access and failed
* to get it: the sweep server may be closing this handle, and
* we need to wait for it to complete. If we want exclusive
* access and find the handle open once we get the read lock,
* give up: some other thread has it locked for real.
* we need to wait for it to release its lock. If we want
* exclusive access and find the handle open once we get the
* read lock, give up: some other thread has it locked for real.
*/
if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
(!want_exclusive || lock_busy)) {
WT_RET(__wt_readlock(session, dhandle->rwlock));
is_open = F_ISSET(dhandle, WT_DHANDLE_OPEN) ? 1 : 0;
if (is_open && !want_exclusive)
if (is_open && !want_exclusive) {
WT_ASSERT(session,
!F_ISSET(dhandle, WT_DHANDLE_DEAD));
return (0);
}
WT_RET(__wt_readunlock(session, dhandle->rwlock));
} else
is_open = 0;
@@ -109,6 +112,7 @@ __conn_dhandle_open_lock(
/* We have an exclusive lock, we're done. */
F_SET(dhandle, WT_DHANDLE_EXCLUSIVE);
WT_ASSERT(session, !F_ISSET(dhandle, WT_DHANDLE_DEAD));
return (0);
} else if (ret != EBUSY || (is_open && want_exclusive))
return (ret);
@@ -141,13 +145,26 @@ __wt_conn_dhandle_find(WT_SESSION_IMPL *session,
/* Increment the reference count if we already have the btree open. */
bucket = __wt_hash_city64(name, strlen(name)) % WT_HASH_ARRAY_SIZE;
SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl)
if (strcmp(name, dhandle->name) == 0 &&
((ckpt == NULL && dhandle->checkpoint == NULL) ||
(ckpt != NULL && dhandle->checkpoint != NULL &&
strcmp(ckpt, dhandle->checkpoint) == 0))) {
session->dhandle = dhandle;
return (0);
if (ckpt == NULL) {
SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl) {
if (F_ISSET(dhandle, WT_DHANDLE_DEAD))
continue;
if (dhandle->checkpoint == NULL &&
strcmp(name, dhandle->name) == 0) {
session->dhandle = dhandle;
return (0);
}
}
} else
SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl) {
if (F_ISSET(dhandle, WT_DHANDLE_DEAD))
continue;
if (dhandle->checkpoint != NULL &&
strcmp(name, dhandle->name) == 0 &&
strcmp(ckpt, dhandle->checkpoint) == 0) {
session->dhandle = dhandle;
return (0);
}
}
return (WT_NOTFOUND);
@@ -229,6 +246,30 @@ err: WT_TRET(__wt_rwlock_destroy(session, &dhandle->rwlock));
return (ret);
}
/*
* __conn_dhandle_mark_dead --
* Mark a data handle dead.
*/
static int
__conn_dhandle_mark_dead(WT_SESSION_IMPL *session)
{
int evict_reset;
WT_ASSERT(session, F_ISSET(session, WT_SESSION_HANDLE_LIST_LOCKED));
/*
* Handle forced discard (e.g., when dropping a file).
*
* We need exclusive access to the file -- disable ordinary
* eviction and drain any blocks already queued.
*/
WT_RET(__wt_evict_file_exclusive_on(session, &evict_reset));
F_SET(session->dhandle, WT_DHANDLE_DEAD);
if (evict_reset)
__wt_evict_file_exclusive_off(session);
return (0);
}
/*
* __wt_conn_btree_sync_and_close --
* Sync and close the underlying btree handle.
@@ -270,16 +311,25 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int final, int force)
/*
* The close can fail if an update cannot be written, return the EBUSY
* error to our caller for eventual retry.
*
* If we are forcing the close, just mark the handle dead and the tree
* will be discarded later. Don't do this for memory-mapped trees: we
* have to close the file handle to allow the file to be removed, but
* memory mapped trees contain pointers into memory that will become
* invalid if the mapping is closed.
*/
if (!F_ISSET(btree,
WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY))
WT_ERR(__wt_checkpoint_close(session, final, force));
if (dhandle->checkpoint == NULL)
--S2C(session)->open_btree_count;
WT_ERR(force && (btree->bm == NULL || btree->bm->map == NULL) ?
__conn_dhandle_mark_dead(session) :
__wt_checkpoint_close(session, final));
WT_TRET(__wt_btree_close(session));
F_CLR(dhandle, WT_DHANDLE_OPEN);
if (!force || final) {
F_CLR(dhandle, WT_DHANDLE_OPEN);
if (dhandle->checkpoint == NULL)
--S2C(session)->open_btree_count;
}
F_CLR(btree, WT_BTREE_SPECIAL_FLAGS);
err: __wt_spin_unlock(session, &dhandle->close_lock);
@@ -521,6 +571,7 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session,
__wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl)
if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
!F_ISSET(dhandle, WT_DHANDLE_DEAD) &&
strcmp(uri, dhandle->name) == 0 &&
(apply_checkpoints || dhandle->checkpoint == NULL))
WT_RET(__conn_btree_apply_internal(
@@ -528,6 +579,7 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session,
} else {
SLIST_FOREACH(dhandle, &conn->dhlh, l)
if (F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
!F_ISSET(dhandle, WT_DHANDLE_DEAD) &&
(apply_checkpoints ||
dhandle->checkpoint == NULL) &&
WT_PREFIX_MATCH(dhandle->name, "file:") &&
@@ -649,8 +701,9 @@ __wt_conn_dhandle_close_all(
WT_ASSERT(session, session->dhandle == NULL);
bucket = __wt_hash_city64(name, strlen(name)) % WT_HASH_ARRAY_SIZE;
SLIST_FOREACH(dhandle, &conn->dhhash[bucket], l) {
if (strcmp(dhandle->name, name) != 0)
SLIST_FOREACH(dhandle, &conn->dhhash[bucket], hashl) {
if (strcmp(dhandle->name, name) != 0 ||
F_ISSET(dhandle, WT_DHANDLE_DEAD))
continue;
session->dhandle = dhandle;
@@ -722,7 +775,7 @@ __conn_dhandle_remove(WT_SESSION_IMPL *session, int final)
* Close/discard a single data handle.
*/
int
__wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final)
__wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final, int force)
{
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
@@ -730,8 +783,9 @@ __wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final)
dhandle = session->dhandle;
if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
tret = __wt_conn_btree_sync_and_close(session, final, 0);
if (F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
(final && F_ISSET(dhandle, WT_DHANDLE_DEAD))) {
tret = __wt_conn_btree_sync_and_close(session, final, force);
if (final && tret != 0) {
__wt_err(session, tret,
"Final close of %s failed", dhandle->name);
@@ -795,7 +849,7 @@ restart:
continue;
WT_WITH_DHANDLE(session, dhandle,
WT_TRET(__wt_conn_dhandle_discard_single(session, 1)));
WT_TRET(__wt_conn_dhandle_discard_single(session, 1, 0)));
goto restart;
}
@@ -811,7 +865,7 @@ restart:
/* Close the metadata file handle. */
while ((dhandle = SLIST_FIRST(&conn->dhlh)) != NULL)
WT_WITH_DHANDLE(session, dhandle,
WT_TRET(__wt_conn_dhandle_discard_single(session, 1)));
WT_TRET(__wt_conn_dhandle_discard_single(session, 1, 0)));
return (ret);
}

View File

@@ -422,6 +422,7 @@ __log_wrlsn_server(void *arg)
slot = &log->slot_pool[written[i].slot_index];
WT_ASSERT(session, LOG_CMP(&written[i].lsn,
&slot->slot_release_lsn) == 0);
log->write_start_lsn = slot->slot_start_lsn;
log->write_lsn = slot->slot_end_lsn;
WT_ERR(__wt_cond_signal(session,
log->log_write_cond));
@@ -552,6 +553,7 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[])
WT_ZERO_LSN(&log->sync_dir_lsn);
WT_INIT_LSN(&log->trunc_lsn);
WT_INIT_LSN(&log->write_lsn);
WT_INIT_LSN(&log->write_start_lsn);
log->fileid = 0;
WT_RET(__wt_cond_alloc(session, "log sync", 0, &log->log_sync_cond));
WT_RET(__wt_cond_alloc(session, "log write", 0, &log->log_write_cond));

View File

@@ -116,6 +116,7 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
WT_TRET(__wt_checkpoint_server_destroy(session));
WT_TRET(__wt_statlog_destroy(session, 1));
WT_TRET(__wt_sweep_destroy(session));
WT_TRET(__wt_evict_destroy(session));
/* Close open data handles. */
WT_TRET(__wt_conn_dhandle_discard(session));
@@ -153,9 +154,6 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
fh = SLIST_FIRST(&conn->fhlh);
}
/* Shut down the eviction server thread. */
WT_TRET(__wt_evict_destroy(session));
/* Disconnect from shared cache - must be before cache destroy. */
WT_TRET(__wt_conn_cache_pool_destroy(session));

View File

@@ -8,6 +8,159 @@
#include "wt_internal.h"
/*
* __sweep_mark --
* Mark idle handles with a time of death, and note if we see dead
* handles.
*/
static int
__sweep_mark(WT_SESSION_IMPL *session, int *dead_handlesp)
{
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
time_t now;
conn = S2C(session);
*dead_handlesp = 0;
/* Don't discard handles that have been open recently. */
WT_RET(__wt_seconds(session, &now));
WT_STAT_FAST_CONN_INCR(session, dh_conn_sweeps);
SLIST_FOREACH(dhandle, &conn->dhlh, l) {
if (WT_IS_METADATA(dhandle))
continue;
if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) {
++*dead_handlesp;
continue;
}
if (dhandle->session_inuse != 0 ||
now <= dhandle->timeofdeath + conn->sweep_idle_time)
continue;
if (dhandle->timeofdeath == 0) {
dhandle->timeofdeath = now;
WT_STAT_FAST_CONN_INCR(session, dh_conn_tod);
continue;
}
/* We now have a candidate to close. */
++*dead_handlesp;
}
return (0);
}
/*
* __sweep_expire --
* Mark trees dead if they are clean and haven't been accessed recently,
* until we have reached the configured minimum number of handles.
*/
static int
__sweep_expire(WT_SESSION_IMPL *session)
{
WT_BTREE *btree;
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
time_t now;
conn = S2C(session);
/* Don't discard handles that have been open recently. */
WT_RET(__wt_seconds(session, &now));
WT_STAT_FAST_CONN_INCR(session, dh_conn_sweeps);
SLIST_FOREACH(dhandle, &conn->dhlh, l) {
/*
* Ignore open files once the open file count reaches the
* minimum number of handles.
*/
if (conn->open_file_count < conn->sweep_handles_min)
break;
if (WT_IS_METADATA(dhandle))
continue;
if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
F_ISSET(dhandle, WT_DHANDLE_DEAD))
continue;
if (dhandle->session_inuse != 0 ||
now <= dhandle->timeofdeath + conn->sweep_idle_time)
continue;
/*
* We have a candidate for closing; if it's open, acquire an
* exclusive lock on the handle and mark it dead.
*
* The close would require I/O if an update cannot be written
* (updates in a no-longer-referenced file might not yet be
* globally visible if sessions have disjoint sets of files
* open). In that case, skip it: we'll retry the close the
* next time, after the transaction state has progressed.
*
* We don't set WT_DHANDLE_EXCLUSIVE deliberately, we want
* opens to block on us and then retry rather than returning an
* EBUSY error to the application. This is done holding the
* handle list lock so that connection-level handle searches
* never need to retry.
*/
if ((ret =
__wt_try_writelock(session, dhandle->rwlock)) == EBUSY)
continue;
WT_RET(ret);
/* Only sweep clean trees where all updates are visible. */
btree = dhandle->handle;
if (btree->modified ||
!__wt_txn_visible_all(session, btree->rec_max_txn))
goto unlock;
/*
* Mark the handle as dead and close the underlying file
* handle. Closing the handle decrements the open file count,
* meaning the close loop won't overrun the configured minimum.
*/
WT_WITH_DHANDLE(session, dhandle, ret =
__wt_conn_btree_sync_and_close(session, 0, 1));
unlock: WT_TRET(__wt_writeunlock(session, dhandle->rwlock));
WT_RET_BUSY_OK(ret);
}
return (0);
}
/*
* __sweep_flush --
* Flush pages from dead trees.
*/
static int
__sweep_flush(WT_SESSION_IMPL *session)
{
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
conn = S2C(session);
WT_STAT_FAST_CONN_INCR(session, dh_conn_sweeps);
SLIST_FOREACH(dhandle, &conn->dhlh, l) {
if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
!F_ISSET(dhandle, WT_DHANDLE_DEAD))
continue;
/* If the handle is marked "dead", flush it from cache. */
WT_WITH_DHANDLE(session, dhandle, ret =
__wt_conn_btree_sync_and_close(session, 0, 0));
/* We closed the btree handle, bump the statistic. */
if (ret == 0)
WT_STAT_FAST_CONN_INCR(session, dh_conn_handles);
WT_RET_BUSY_OK(ret);
}
return (0);
}
/*
* __sweep_remove_handles --
* Remove closed dhandles from the connection list.
@@ -26,7 +179,9 @@ __sweep_remove_handles(WT_SESSION_IMPL *session)
dhandle_next = SLIST_NEXT(dhandle, l);
if (WT_IS_METADATA(dhandle))
continue;
if (F_ISSET(dhandle, WT_DHANDLE_OPEN))
if (F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
dhandle->session_inuse != 0 ||
dhandle->session_ref != 0)
continue;
/* Make sure we get exclusive access. */
@@ -46,7 +201,7 @@ __sweep_remove_handles(WT_SESSION_IMPL *session)
}
WT_WITH_DHANDLE(session, dhandle,
ret = __wt_conn_dhandle_discard_single(session, 0));
ret = __wt_conn_dhandle_discard_single(session, 0, 1));
/* If the handle was not successfully discarded, unlock it. */
if (ret != 0)
@@ -58,96 +213,6 @@ __sweep_remove_handles(WT_SESSION_IMPL *session)
return (ret == EBUSY ? 0 : ret);
}
/*
* __sweep --
* Close unused dhandles on the connection dhandle list.
*/
static int
__sweep(WT_SESSION_IMPL *session)
{
WT_BTREE *btree;
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
time_t now;
int closed_handles;
conn = S2C(session);
closed_handles = 0;
/* Don't discard handles that have been open recently. */
WT_RET(__wt_seconds(session, &now));
WT_STAT_FAST_CONN_INCR(session, dh_conn_sweeps);
SLIST_FOREACH(dhandle, &conn->dhlh, l) {
if (WT_IS_METADATA(dhandle))
continue;
if (!F_ISSET(dhandle, WT_DHANDLE_OPEN) &&
dhandle->session_inuse == 0 && dhandle->session_ref == 0) {
++closed_handles;
continue;
}
if (dhandle->session_inuse != 0 ||
now <= dhandle->timeofdeath + conn->sweep_idle_time)
continue;
if (dhandle->timeofdeath == 0) {
dhandle->timeofdeath = now;
WT_STAT_FAST_CONN_INCR(session, dh_conn_tod);
continue;
}
/*
* We have a candidate for closing; if it's open, acquire an
* exclusive lock on the handle and close it.
*
* The close would require I/O if an update cannot be written
* (updates in a no-longer-referenced file might not yet be
* globally visible if sessions have disjoint sets of files
* open). In that case, skip it: we'll retry the close the
* next time, after the transaction state has progressed.
*
* We don't set WT_DHANDLE_EXCLUSIVE deliberately, we want
* opens to block on us rather than returning an EBUSY error to
* the application.
*/
if ((ret =
__wt_try_writelock(session, dhandle->rwlock)) == EBUSY)
continue;
WT_RET(ret);
/* Only sweep clean trees where all updates are visible. */
btree = dhandle->handle;
if (btree->modified ||
!__wt_txn_visible_all(session, btree->rec_max_txn))
goto unlock;
/* If the handle is open, try to close it. */
if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
WT_WITH_DHANDLE(session, dhandle, ret =
__wt_conn_btree_sync_and_close(session, 0, 0));
/* We closed the btree handle, bump the statistic. */
if (ret == 0)
WT_STAT_FAST_CONN_INCR(
session, dh_conn_handles);
}
if (dhandle->session_inuse == 0 && dhandle->session_ref == 0)
++closed_handles;
unlock: WT_TRET(__wt_writeunlock(session, dhandle->rwlock));
WT_RET_BUSY_OK(ret);
}
if (closed_handles) {
WT_WITH_DHANDLE_LOCK(session,
ret = __sweep_remove_handles(session));
WT_RET(ret);
}
return (0);
}
/*
* __sweep_server --
* The handle sweep server thread.
@@ -158,12 +223,13 @@ __sweep_server(void *arg)
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_SESSION_IMPL *session;
int dead_handles;
session = arg;
conn = S2C(session);
/*
* Sweep for dead handles.
* Sweep for dead and excess handles.
*/
while (F_ISSET(conn, WT_CONN_SERVER_RUN) &&
F_ISSET(conn, WT_CONN_SERVER_SWEEP)) {
@@ -171,8 +237,28 @@ __sweep_server(void *arg)
WT_ERR(__wt_cond_wait(session, conn->sweep_cond,
(uint64_t)conn->sweep_interval * WT_MILLION));
/* Sweep the handles. */
WT_ERR(__sweep(session));
/*
* Mark handles with a time of death, and report whether any
* handles are marked dead.
*/
WT_ERR(__sweep_mark(session, &dead_handles));
if (dead_handles == 0 &&
conn->open_file_count < conn->sweep_handles_min)
continue;
/* Close handles if we have reached the configured limit */
if (conn->open_file_count >= conn->sweep_handles_min) {
WT_WITH_DHANDLE_LOCK(session,
ret = __sweep_expire(session));
WT_ERR(ret);
}
WT_ERR(__sweep_flush(session));
WT_WITH_DHANDLE_LOCK(session,
ret = __sweep_remove_handles(session));
WT_ERR(ret);
}
if (0) {
@@ -202,6 +288,10 @@ __wt_sweep_config(WT_SESSION_IMPL *session, const char *cfg[])
cfg, "file_manager.close_scan_interval", &cval));
conn->sweep_interval = (time_t)cval.val;
WT_RET(__wt_config_gets(session,
cfg, "file_manager.close_handle_minimum", &cval));
conn->sweep_handles_min = (u_int)cval.val;
return (0);
}

View File

@@ -219,36 +219,6 @@ __curbulk_insert_row_skip_check(WT_CURSOR *cursor)
err: API_END_RET(session, ret);
}
/*
* __curbulk_close --
* WT_CURSOR->close for the bulk cursor type.
*/
static int
__curbulk_close(WT_CURSOR *cursor)
{
WT_BTREE *btree;
WT_CURSOR_BULK *cbulk;
WT_DECL_RET;
WT_SESSION_IMPL *session;
cbulk = (WT_CURSOR_BULK *)cursor;
btree = cbulk->cbt.btree;
CURSOR_API_CALL(cursor, session, close, btree);
WT_TRET(__wt_bulk_wrapup(session, cbulk));
__wt_buf_free(session, &cbulk->last);
WT_TRET(__wt_session_release_btree(session));
/* The URI is owned by the btree handle. */
cursor->internal_uri = NULL;
WT_TRET(__wt_cursor_close(cursor));
err: API_END_RET(session, ret);
}
/*
* __wt_curbulk_init --
* Initialize a bulk cursor.
@@ -278,7 +248,6 @@ __wt_curbulk_init(WT_SESSION_IMPL *session,
break;
WT_ILLEGAL_VALUE(session);
}
c->close = __curbulk_close;
cbulk->bitmap = bitmap;
if (bitmap)

View File

@@ -356,11 +356,19 @@ static int
__curfile_close(WT_CURSOR *cursor)
{
WT_CURSOR_BTREE *cbt;
WT_CURSOR_BULK *cbulk;
WT_DECL_RET;
WT_SESSION_IMPL *session;
cbt = (WT_CURSOR_BTREE *)cursor;
CURSOR_API_CALL(cursor, session, close, cbt->btree);
if (F_ISSET(cursor, WT_CURSTD_BULK)) {
/* Free the bulk-specific resources. */
cbulk = (WT_CURSOR_BULK *)cbt;
WT_TRET(__wt_bulk_wrapup(session, cbulk));
__wt_buf_free(session, &cbulk->last);
}
WT_TRET(__wt_btcur_close(cbt));
if (cbt->btree != NULL) {
/* Increment the data-source's in-use counter. */

View File

@@ -14,7 +14,8 @@
*/
static int
__curlog_logrec(WT_SESSION_IMPL *session,
WT_ITEM *logrec, WT_LSN *lsnp, void *cookie, int firstrecord)
WT_ITEM *logrec, WT_LSN *lsnp, WT_LSN *next_lsnp,
void *cookie, int firstrecord)
{
WT_CURSOR_LOG *cl;
@@ -23,8 +24,7 @@ __curlog_logrec(WT_SESSION_IMPL *session,
/* Set up the LSNs and take a copy of the log record for the cursor. */
*cl->cur_lsn = *lsnp;
*cl->next_lsn = *lsnp;
cl->next_lsn->offset += (wt_off_t)logrec->size;
*cl->next_lsn = *next_lsnp;
WT_RET(__wt_buf_set(session, cl->logrec, logrec->data, logrec->size));
/*

View File

@@ -8,6 +8,7 @@
#include "wt_internal.h"
static int __evict_clear_all_walks(WT_SESSION_IMPL *);
static int __evict_clear_walks(WT_SESSION_IMPL *);
static int __evict_has_work(WT_SESSION_IMPL *, uint32_t *);
static int WT_CDECL __evict_lru_cmp(const void *, const void *);
@@ -211,24 +212,15 @@ __evict_server(void *arg)
WT_ERR(__wt_verbose(session, WT_VERB_EVICTSERVER, "waking"));
}
/*
* The eviction server is shutting down: in case any trees are still
* open, clear all walks now so that they can be closed.
*/
WT_ERR(__evict_clear_all_walks(session));
WT_ERR(__wt_verbose(
session, WT_VERB_EVICTSERVER, "cache eviction server exiting"));
if (cache->pages_inmem != cache->pages_evict)
__wt_errx(session,
"cache server: exiting with %" PRIu64 " pages in "
"memory and %" PRIu64 " pages evicted",
cache->pages_inmem, cache->pages_evict);
if (cache->bytes_inmem != 0)
__wt_errx(session,
"cache server: exiting with %" PRIu64 " bytes in memory",
cache->bytes_inmem);
if (cache->bytes_dirty != 0 || cache->pages_dirty != 0)
__wt_errx(session,
"cache server: exiting with %" PRIu64
" bytes dirty and %" PRIu64 " pages dirty",
cache->bytes_dirty, cache->pages_dirty);
if (0) {
err: WT_PANIC_MSG(session, ret, "cache eviction server error");
}
@@ -322,7 +314,7 @@ __wt_evict_create(WT_SESSION_IMPL *session)
/*
* __wt_evict_destroy --
* Destroy the eviction server thread.
* Destroy the eviction threads.
*/
int
__wt_evict_destroy(WT_SESSION_IMPL *session)
@@ -570,6 +562,29 @@ __evict_pass(WT_SESSION_IMPL *session)
return (0);
}
/*
* __evict_clear_walk --
* Clear a single walk point.
*/
static int
__evict_clear_walk(WT_SESSION_IMPL *session)
{
WT_BTREE *btree;
WT_REF *ref;
btree = S2BT(session);
if ((ref = btree->evict_ref) == NULL)
return (0);
/*
* Clear evict_ref first, in case releasing it forces eviction (we
* assert we never try to evict the current eviction walk point).
*/
btree->evict_ref = NULL;
return (__wt_page_release(session, ref, 0));
}
/*
* __evict_clear_walks --
* Clear the eviction walk points for any file a session is waiting on.
@@ -577,11 +592,9 @@ __evict_pass(WT_SESSION_IMPL *session)
static int
__evict_clear_walks(WT_SESSION_IMPL *session)
{
WT_BTREE *btree;
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_REF *ref;
WT_SESSION_IMPL *s;
u_int i, session_cnt;
@@ -594,30 +607,19 @@ __evict_clear_walks(WT_SESSION_IMPL *session)
continue;
if (s->dhandle == cache->evict_file_next)
cache->evict_file_next = NULL;
session->dhandle = s->dhandle;
btree = s->dhandle->handle;
if ((ref = btree->evict_ref) != NULL) {
/*
* Clear evict_ref first, in case releasing it forces
* eviction (we assert that we never try to evict the
* current eviction walk point).
*/
btree->evict_ref = NULL;
WT_TRET(__wt_page_release(session, ref, 0));
}
session->dhandle = NULL;
WT_WITH_DHANDLE(
session, s->dhandle, WT_TRET(__evict_clear_walk(session)));
}
return (ret);
}
/*
* __evict_tree_walk_clear --
* Clear the tree's current eviction point, acquiring the eviction lock.
* __evict_request_walk_clear --
* Request that the eviction server clear the tree's current eviction
* point.
*/
static int
__evict_tree_walk_clear(WT_SESSION_IMPL *session)
__evict_request_walk_clear(WT_SESSION_IMPL *session)
{
WT_BTREE *btree;
WT_CACHE *cache;
@@ -639,6 +641,26 @@ __evict_tree_walk_clear(WT_SESSION_IMPL *session)
return (ret);
}
/*
* __evict_clear_all_walks --
* Clear the eviction walk points for all files a session is waiting on.
*/
static int
__evict_clear_all_walks(WT_SESSION_IMPL *session)
{
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
conn = S2C(session);
SLIST_FOREACH(dhandle, &conn->dhlh, l)
if (WT_PREFIX_MATCH(dhandle->name, "file:"))
WT_WITH_DHANDLE(session,
dhandle, WT_TRET(__evict_clear_walk(session)));
return (ret);
}
/*
* __wt_evict_page --
* Evict a given page.
@@ -711,7 +733,7 @@ __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session, int *evict_resetp)
__wt_spin_unlock(session, &cache->evict_walk_lock);
/* Clear any existing LRU eviction walk for the file. */
WT_RET(__evict_tree_walk_clear(session));
WT_RET(__evict_request_walk_clear(session));
/* Hold the evict lock to remove any queued pages from this file. */
__wt_spin_lock(session, &cache->evict_lock);
@@ -1404,6 +1426,19 @@ __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_server)
if (page->read_gen != WT_READGEN_OLDEST)
page->read_gen = __wt_cache_read_gen_set(session);
/*
* If we are evicting in a dead tree, don't write dirty pages.
*
* Force pages clean to keep statistics correct and to let the
* page-discard function assert that no dirty pages are ever
* discarded.
*/
if (F_ISSET(btree->dhandle, WT_DHANDLE_DEAD) &&
__wt_page_is_modified(page)) {
page->modify->write_gen = 0;
__wt_cache_dirty_decr(session, page);
}
WT_WITH_BTREE(session, btree, ret = __wt_evict_page(session, ref));
(void)WT_ATOMIC_SUB4(btree->evict_busy, 1);

View File

@@ -144,8 +144,8 @@ struct __wt_btree {
/* Flags values up to 0xff are reserved for WT_DHANDLE_* */
#define WT_BTREE_BULK 0x00100 /* Bulk-load handle */
#define WT_BTREE_NO_EVICTION 0x00200 /* Disable eviction */
#define WT_BTREE_NO_HAZARD 0x00400 /* Disable hazard pointers */
#define WT_BTREE_IN_MEMORY 0x00200 /* Cache-resident object */
#define WT_BTREE_NO_EVICTION 0x00400 /* Disable eviction */
#define WT_BTREE_SALVAGE 0x00800 /* Handle is for salvage */
#define WT_BTREE_UPGRADE 0x01000 /* Handle is for upgrade */
#define WT_BTREE_VERIFY 0x02000 /* Handle is for verify */

View File

@@ -967,10 +967,6 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_PAGE *page, int check_splits)
if (mod == NULL)
return (1);
/* Skip pages that are already being evicted. */
if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU))
return (0);
/*
* If the tree was deepened, there's a requirement that newly created
* internal pages not be evicted until all threads are known to have

View File

@@ -202,6 +202,7 @@ struct __wt_connection_impl {
u_int open_btree_count; /* Locked: open writable btree count */
uint32_t next_file_id; /* Locked: file ID counter */
uint32_t open_file_count; /* Atomic: open file handle count */
/*
* WiredTiger allocates space for 50 simultaneous sessions (threads of
@@ -337,6 +338,7 @@ struct __wt_connection_impl {
WT_CONDVAR *sweep_cond; /* Handle sweep wait mutex */
time_t sweep_idle_time;/* Handle sweep idle time */
time_t sweep_interval;/* Handle sweep interval */
u_int sweep_handles_min;/* Handle sweep minimum open */
/* Locked: collator list */
TAILQ_HEAD(__wt_coll_qh, __wt_named_collator) collqh;

View File

@@ -65,11 +65,12 @@ struct __wt_data_handle {
WT_DSRC_STATS stats; /* Data-source statistics */
/* Flags values over 0xff are reserved for WT_BTREE_* */
#define WT_DHANDLE_DISCARD 0x01 /* Discard on release */
#define WT_DHANDLE_DISCARD_CLOSE 0x02 /* Close on release */
#define WT_DHANDLE_EXCLUSIVE 0x04 /* Need exclusive access */
#define WT_DHANDLE_HAVE_REF 0x08 /* Already have ref */
#define WT_DHANDLE_LOCK_ONLY 0x10 /* Handle only used as a lock */
#define WT_DHANDLE_OPEN 0x20 /* Handle is open */
#define WT_DHANDLE_DEAD 0x01 /* Dead, awaiting discard */
#define WT_DHANDLE_DISCARD 0x02 /* Discard on release */
#define WT_DHANDLE_DISCARD_FORCE 0x04 /* Force discard on release */
#define WT_DHANDLE_EXCLUSIVE 0x08 /* Need exclusive access */
#define WT_DHANDLE_HAVE_REF 0x10 /* Already have ref */
#define WT_DHANDLE_LOCK_ONLY 0x20 /* Handle only used as a lock */
#define WT_DHANDLE_OPEN 0x40 /* Handle is open */
uint32_t flags;
};

View File

@@ -71,6 +71,7 @@ extern int __wt_verify_ckpt_load( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_
extern int __wt_verify_ckpt_unload(WT_SESSION_IMPL *session, WT_BLOCK *block);
extern int __wt_block_verify_addr(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size);
extern u_int __wt_block_header(WT_BLOCK *block);
extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len);
extern int __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep);
extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, int data_cksum);
extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump, int data_cksum, int caller_locked);
@@ -231,7 +232,7 @@ extern int __wt_conn_btree_apply(WT_SESSION_IMPL *session, int apply_checkpoints
extern int __wt_conn_btree_apply_single_ckpt(WT_SESSION_IMPL *session, const char *uri, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
extern int __wt_conn_btree_apply_single(WT_SESSION_IMPL *session, const char *uri, const char *checkpoint, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
extern int __wt_conn_dhandle_close_all( WT_SESSION_IMPL *session, const char *name, int force);
extern int __wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final);
extern int __wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final, int force);
extern int __wt_conn_dhandle_discard(WT_SESSION_IMPL *session);
extern int __wt_connection_init(WT_CONNECTION_IMPL *conn);
extern int __wt_connection_destroy(WT_CONNECTION_IMPL *conn);
@@ -326,7 +327,7 @@ extern int __wt_log_open(WT_SESSION_IMPL *session);
extern int __wt_log_close(WT_SESSION_IMPL *session);
extern int __wt_log_newfile(WT_SESSION_IMPL *session, int conn_create, int *created);
extern int __wt_log_read(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, uint32_t flags);
extern int __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, int (*func)(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, void *cookie, int firstrecord), void *cookie);
extern int __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags, int (*func)(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, WT_LSN *next_lsnp, void *cookie, int firstrecord), void *cookie);
extern int __wt_log_write(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, uint32_t flags);
extern int __wt_log_vprintf(WT_SESSION_IMPL *session, const char *fmt, va_list ap);
extern int __wt_logrec_alloc(WT_SESSION_IMPL *session, size_t size, WT_ITEM **logrecp);
@@ -425,6 +426,7 @@ extern int __wt_metadata_remove(WT_SESSION_IMPL *session, const char *key);
extern int __wt_metadata_search( WT_SESSION_IMPL *session, const char *key, char **valuep);
extern void __wt_meta_track_discard(WT_SESSION_IMPL *session);
extern int __wt_meta_track_on(WT_SESSION_IMPL *session);
extern int __wt_meta_track_find_handle( WT_SESSION_IMPL *session, const char *name, const char *checkpoint);
extern int __wt_meta_track_off(WT_SESSION_IMPL *session, int need_sync, int unroll);
extern int __wt_meta_track_sub_on(WT_SESSION_IMPL *session);
extern int __wt_meta_track_sub_off(WT_SESSION_IMPL *session);
@@ -574,7 +576,7 @@ extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name
extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config, WT_SESSION_IMPL **sessionp);
extern int __wt_compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, int *skip);
extern int __wt_session_compact( WT_SESSION *wt_session, const char *uri, const char *config);
extern int __wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags);
extern int __wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags, int *deadp);
extern int __wt_session_release_btree(WT_SESSION_IMPL *session);
extern int __wt_session_get_btree_ckpt(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], uint32_t flags);
extern void __wt_session_close_cache(WT_SESSION_IMPL *session);
@@ -672,7 +674,7 @@ extern int __wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, int final, int force);
extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, int final);
extern uint64_t __wt_ext_transaction_id(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session);
extern int __wt_ext_transaction_isolation_level( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session);
extern int __wt_ext_transaction_notify( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_TXN_NOTIFY *notify);

View File

@@ -128,7 +128,8 @@ typedef struct {
WT_LSN sync_dir_lsn; /* LSN of the last directory sync */
WT_LSN sync_lsn; /* LSN of the last sync */
WT_LSN trunc_lsn; /* End LSN for recovery truncation */
WT_LSN write_lsn; /* Last LSN written to log file */
WT_LSN write_lsn; /* End of last LSN written */
WT_LSN write_start_lsn;/* Beginning of last LSN written */
/*
* Synchronization resources

View File

@@ -125,12 +125,18 @@ struct __wt_table {
WT_CONNECTION_IMPL *__conn = S2C(session); \
int __handle_locked = \
F_ISSET(session, WT_SESSION_HANDLE_LIST_LOCKED);\
int __table_locked = \
F_ISSET(session, WT_SESSION_TABLE_LOCKED); \
int __schema_locked = \
F_ISSET(session, WT_SESSION_SCHEMA_LOCKED); \
if (__handle_locked) { \
F_CLR(session, WT_SESSION_HANDLE_LIST_LOCKED); \
__wt_spin_unlock(session, &__conn->dhandle_lock);\
} \
if (__table_locked) { \
F_CLR(session, WT_SESSION_TABLE_LOCKED); \
__wt_spin_unlock(session, &__conn->table_lock);\
} \
if (__schema_locked) { \
F_CLR(session, WT_SESSION_SCHEMA_LOCKED); \
__wt_spin_unlock(session, &__conn->schema_lock);\
@@ -140,6 +146,10 @@ struct __wt_table {
__wt_spin_lock(session, &__conn->schema_lock); \
F_SET(session, WT_SESSION_SCHEMA_LOCKED); \
} \
if (__table_locked) { \
__wt_spin_lock(session, &__conn->table_lock); \
F_SET(session, WT_SESSION_TABLE_LOCKED); \
} \
if (__handle_locked) { \
__wt_spin_lock(session, &__conn->dhandle_lock); \
F_SET(session, WT_SESSION_HANDLE_LIST_LOCKED); \

View File

@@ -23,7 +23,7 @@ struct __wt_stats {
#define WT_STAT_ATOMIC_INCRV(stats, fld, value) do { \
(void)WT_ATOMIC_ADD8(WT_STAT(stats, fld), (value)); \
} while (0)
#define WT_STAT_ATOMIC_INCR(stats, fld) WT_ATOMIC_ADD(WT_STAT(stats, fld), 1)
#define WT_STAT_ATOMIC_INCR(stats, fld) WT_ATOMIC_ADD8(WT_STAT(stats, fld), 1)
#define WT_STAT_DECRV(stats, fld, value) do { \
(stats)->fld.v -= (value); \
} while (0)

View File

@@ -1609,12 +1609,15 @@ struct __wt_connection {
* integer between 10 and 99; default \c 95.}
* @config{file_manager = (, control how file handles are managed., a
* set of related configuration options defined below.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;close_handle_minimum, number of
* handles open before the file manager will look for handles to close.,
* a string; default \c 250.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;close_idle_time, amount of time in
* seconds a file handle needs to be idle before attempting to close
* it., an integer between 1 and 1000; default \c 30.}
* it., an integer between 1 and 100000; default \c 30.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;close_scan_interval, interval in
* seconds at which to check for files that are inactive and close
* them., an integer between 1 and 1000; default \c 10.}
* them., an integer between 1 and 100000; default \c 10.}
* @config{ ),,}
* @config{lsm_manager = (, configure database wide options for LSM tree
* management., a set of related configuration options defined below.}
@@ -1980,12 +1983,15 @@ struct __wt_connection {
* following options: \c "data"\, \c "log"; default empty.}
* @config{file_manager = (, control how file handles are managed., a set of
* related configuration options defined below.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;close_idle_time, amount of time in seconds a
* file handle needs to be idle before attempting to close it., an integer
* between 1 and 1000; default \c 30.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;close_handle_minimum, number of handles open
* before the file manager will look for handles to close., a string; default \c
* 250.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;close_idle_time, amount of time in
* seconds a file handle needs to be idle before attempting to close it., an
* integer between 1 and 100000; default \c 30.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;close_scan_interval, interval in seconds at
* which to check for files that are inactive and close them., an integer
* between 1 and 1000; default \c 10.}
* between 1 and 100000; default \c 10.}
* @config{ ),,}
* @config{hazard_max, maximum number of simultaneous hazard pointers per
* session handle., an integer greater than or equal to 15; default \c 1000.}

View File

@@ -46,7 +46,10 @@ __wt_log_needs_recovery(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn, int *rec)
WT_CONNECTION_IMPL *conn;
WT_CURSOR *c;
WT_DECL_RET;
WT_ITEM dummy_key, dummy_value;
WT_LOG *log;
uint64_t dummy_txnid;
uint32_t dummy_fileid, dummy_optype, rectype;
conn = S2C(session);
log = conn->log;
@@ -59,21 +62,37 @@ __wt_log_needs_recovery(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn, int *rec)
if (log == NULL)
return (0);
/*
* See if there are any data modification records between the
* checkpoint LSN and the end of the log. If there are none then
* we can skip recovery.
*/
WT_RET(__wt_curlog_open(session, "log:", NULL, &c));
c->set_key(c, ckp_lsn->file, ckp_lsn->offset, 0);
if ((ret = c->search(c)) == 0) {
while ((ret = c->next(c)) == 0) {
/*
* The only thing we care about is the rectype.
*/
WT_ERR(c->get_value(c, &dummy_txnid, &rectype,
&dummy_optype, &dummy_fileid,
&dummy_key, &dummy_value));
if (rectype == WT_LOGREC_COMMIT)
break;
}
/*
* If the checkpoint LSN we're given is the last record,
* then recovery is not needed.
* If we get to the end of the log, we can skip recovery.
*/
if ((ret = c->next(c)) == WT_NOTFOUND) {
if (ret == WT_NOTFOUND) {
*rec = 0;
ret = 0;
}
} else if (ret == WT_NOTFOUND)
/*
* If we didn't find that LSN, we need to run recovery,
* but not return any error.
* We should always find the checkpoint LSN as it now points
* to the beginning of a written log record. But if we're
* running recovery on an earlier database we may not. In
* that case, we need to run recovery, don't return an error.
*/
ret = 0;
else
@@ -332,7 +351,6 @@ __log_decompress(WT_SESSION_IMPL *session, WT_ITEM *in, WT_ITEM **out)
{
WT_COMPRESSOR *compressor;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_LOG_RECORD *logrec;
size_t result_len, skip;
uint32_t uncompressed_size;
@@ -342,14 +360,14 @@ __log_decompress(WT_SESSION_IMPL *session, WT_ITEM *in, WT_ITEM **out)
skip = WT_LOG_COMPRESS_SKIP;
compressor = conn->log_compressor;
if (compressor == NULL || compressor->decompress == NULL)
WT_ERR_MSG(session, WT_ERROR,
WT_RET_MSG(session, WT_ERROR,
"log_read: Compressed record with "
"no configured compressor");
uncompressed_size = logrec->mem_len;
WT_ERR(__wt_scr_alloc(session, 0, out));
WT_ERR(__wt_buf_initsize(session, *out, uncompressed_size));
WT_RET(__wt_scr_alloc(session, 0, out));
WT_RET(__wt_buf_initsize(session, *out, uncompressed_size));
memcpy((*out)->mem, in->mem, skip);
WT_ERR(compressor->decompress(compressor, &session->iface,
WT_RET(compressor->decompress(compressor, &session->iface,
(uint8_t *)in->mem + skip, in->size - skip,
(uint8_t *)(*out)->mem + skip,
uncompressed_size - skip, &result_len));
@@ -360,9 +378,10 @@ __log_decompress(WT_SESSION_IMPL *session, WT_ITEM *in, WT_ITEM **out)
* here after corruption happens. If we're salvaging the file,
* it's OK, otherwise it's really, really bad.
*/
if (ret != 0 || result_len != uncompressed_size - WT_LOG_COMPRESS_SKIP)
WT_ERR(WT_ERROR);
err: return (ret);
if (result_len != uncompressed_size - WT_LOG_COMPRESS_SKIP)
return (WT_ERROR);
return (0);
}
/*
@@ -564,7 +583,7 @@ __log_truncate(WT_SESSION_IMPL *session,
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_FH *log_fh, *tmp_fh;
WT_FH *log_fh;
WT_LOG *log;
uint32_t lognum;
u_int i, logcount;
@@ -581,10 +600,8 @@ __log_truncate(WT_SESSION_IMPL *session,
*/
WT_ERR(__log_openfile(session, 0, &log_fh, file_prefix, lsn->file));
WT_ERR(__wt_ftruncate(session, log_fh, lsn->offset));
tmp_fh = log_fh;
log_fh = NULL;
WT_ERR(__wt_fsync(session, tmp_fh));
WT_ERR(__wt_close(session, &tmp_fh));
WT_ERR(__wt_fsync(session, log_fh));
WT_ERR(__wt_close(session, &log_fh));
/*
* If we just want to truncate the current log, return and skip
@@ -605,10 +622,8 @@ __log_truncate(WT_SESSION_IMPL *session,
*/
WT_ERR(__wt_ftruncate(session,
log_fh, LOG_FIRST_RECORD));
tmp_fh = log_fh;
log_fh = NULL;
WT_ERR(__wt_fsync(session, tmp_fh));
WT_ERR(__wt_close(session, &tmp_fh));
WT_ERR(__wt_fsync(session, log_fh));
WT_ERR(__wt_close(session, &log_fh));
}
}
err: WT_TRET(__wt_close(session, &log_fh));
@@ -630,7 +645,7 @@ __wt_log_allocfile(
WT_DECL_ITEM(from_path);
WT_DECL_ITEM(to_path);
WT_DECL_RET;
WT_FH *log_fh, *tmp_fh;
WT_FH *log_fh;
WT_LOG *log;
conn = S2C(session);
@@ -655,10 +670,8 @@ __wt_log_allocfile(
WT_ERR(__wt_ftruncate(session, log_fh, LOG_FIRST_RECORD));
if (prealloc)
WT_ERR(__log_prealloc(session, log_fh));
tmp_fh = log_fh;
log_fh = NULL;
WT_ERR(__wt_fsync(session, tmp_fh));
WT_ERR(__wt_close(session, &tmp_fh));
WT_ERR(__wt_fsync(session, log_fh));
WT_ERR(__wt_close(session, &log_fh));
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
"log_prealloc: rename %s to %s",
(char *)from_path->data, (char *)to_path->data));
@@ -968,6 +981,7 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, int *freep)
WT_ERR(__wt_cond_wait(
session, log->log_write_cond, 200));
}
log->write_start_lsn = slot->slot_start_lsn;
log->write_lsn = slot->slot_end_lsn;
WT_ERR(__wt_cond_signal(session, log->log_write_cond));
@@ -1123,6 +1137,7 @@ __wt_log_newfile(WT_SESSION_IMPL *session, int conn_create, int *created)
WT_RET(__wt_fsync(session, log->log_fh));
log->sync_lsn = end_lsn;
log->write_lsn = end_lsn;
log->write_start_lsn = end_lsn;
}
if (created != NULL)
*created = create_log;
@@ -1238,7 +1253,8 @@ err:
int
__wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags,
int (*func)(WT_SESSION_IMPL *session,
WT_ITEM *record, WT_LSN *lsnp, void *cookie, int firstrecord), void *cookie)
WT_ITEM *record, WT_LSN *lsnp, WT_LSN *next_lsnp,
void *cookie, int firstrecord), void *cookie)
{
WT_CONNECTION_IMPL *conn;
WT_DECL_ITEM(uncitem);
@@ -1247,7 +1263,7 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags,
WT_ITEM buf;
WT_LOG *log;
WT_LOG_RECORD *logrec;
WT_LSN end_lsn, rd_lsn, start_lsn;
WT_LSN end_lsn, next_lsn, rd_lsn, start_lsn;
wt_off_t log_size;
uint32_t allocsize, cksum, firstlog, lastlog, lognum, rdup_len, reclen;
u_int i, logcount;
@@ -1370,6 +1386,7 @@ advance:
WT_ERR(__log_openfile(
session, 0, &log_fh, WT_LOG_FILENAME, rd_lsn.file));
WT_ERR(__log_filesize(session, log_fh, &log_size));
eol = 0;
continue;
}
/*
@@ -1432,6 +1449,12 @@ advance:
*/
if (log != NULL)
log->trunc_lsn = rd_lsn;
/*
* If the user asked for a specific LSN and it is not
* a valid LSN, return WT_NOTFOUND.
*/
if (LF_ISSET(WT_LOGSCAN_ONE))
ret = WT_NOTFOUND;
break;
}
@@ -1440,23 +1463,25 @@ advance:
* header, invoke the callback.
*/
WT_STAT_FAST_CONN_INCR(session, log_scan_records);
next_lsn = rd_lsn;
next_lsn.offset += (wt_off_t)rdup_len;
if (rd_lsn.offset != 0) {
if (F_ISSET(logrec, WT_LOG_RECORD_COMPRESSED)) {
WT_ERR(__log_decompress(session, &buf,
&uncitem));
WT_ERR((*func)(session, uncitem, &rd_lsn,
cookie, firstrecord));
&next_lsn, cookie, firstrecord));
__wt_scr_free(session, &uncitem);
} else
WT_ERR((*func)(session, &buf, &rd_lsn, cookie,
firstrecord));
WT_ERR((*func)(session, &buf, &rd_lsn,
&next_lsn, cookie, firstrecord));
firstrecord = 0;
if (LF_ISSET(WT_LOGSCAN_ONE))
break;
}
rd_lsn.offset += (wt_off_t)rdup_len;
rd_lsn = next_lsn;
}
/* Truncate if we're in recovery. */

View File

@@ -391,13 +391,22 @@ __clsm_open_cursors(
c = &clsm->iface;
session = (WT_SESSION_IMPL *)c->session;
txn = &session->txn;
lsm_tree = clsm->lsm_tree;
chunk = NULL;
locked = 0;
lsm_tree = clsm->lsm_tree;
if (update) {
if (txn->isolation == TXN_ISO_SNAPSHOT)
F_SET(clsm, WT_CLSM_OPEN_SNAPSHOT);
} else
/*
* Ensure that any snapshot update has cursors on the right set of
* chunks to guarantee visibility is correct.
*/
if (update && txn->isolation == TXN_ISO_SNAPSHOT)
F_SET(clsm, WT_CLSM_OPEN_SNAPSHOT);
/*
* Query operations need a full set of cursors. Overwrite cursors
* do queries in service of updates.
*/
if (!update || !F_ISSET(c, WT_CURSTD_OVERWRITE))
F_SET(clsm, WT_CLSM_OPEN_READ);
if (lsm_tree->nchunks == 0)
@@ -407,10 +416,12 @@ __clsm_open_cursors(
ckpt_cfg[1] = "checkpoint=" WT_CHECKPOINT ",raw";
ckpt_cfg[2] = NULL;
/* Copy the key, so we don't lose the cursor position. */
if (F_ISSET(c, WT_CURSTD_KEY_INT) && !WT_DATA_IN_ITEM(&c->key))
WT_RET(__wt_buf_set(
session, &c->key, c->key.data, c->key.size));
/*
* If the key is pointing to memory that is pinned by a chunk
* cursor, take a copy before closing cursors.
*/
if (F_ISSET(c, WT_CURSTD_KEY_INT))
WT_CURSOR_NEEDKEY(c);
F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV);
@@ -1201,9 +1212,21 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp)
deleted = __clsm_deleted(clsm, &cursor->value);
if (!deleted)
__clsm_deleted_decode(clsm, &cursor->value);
else if ((ret = cursor->next(cursor)) == 0) {
cmp = 1;
deleted = 0;
else {
/*
* We have a key pointing at memory that is
* pinned by the current chunk cursor. In the
* unlikely event that we have to reopen cursors
* to move to the next record, make sure the cursor
* flags are set so a copy is made before the current
* chunk cursor releases its position.
*/
F_CLR(cursor, WT_CURSTD_KEY_SET);
F_SET(cursor, WT_CURSTD_KEY_INT);
if ((ret = cursor->next(cursor)) == 0) {
cmp = 1;
deleted = 0;
}
}
WT_ERR_NOTFOUND_OK(ret);
}

View File

@@ -344,7 +344,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
WT_RET_MSG(session, ret, "LSM metadata write");
/*
* Clear the "cache resident" flag so the primary can be evicted and
* Clear the no-eviction flag so the primary can be evicted and
* eventually closed. Only do this once the checkpoint has succeeded:
* otherwise, accessing the leaf page during the checkpoint can trigger
* forced eviction.
@@ -457,7 +457,7 @@ __lsm_discard_handle(
WT_RET(__wt_session_get_btree(session, uri, checkpoint, NULL,
WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY));
F_SET(session->dhandle, WT_DHANDLE_DISCARD);
F_SET(session->dhandle, WT_DHANDLE_DISCARD_FORCE);
return (__wt_session_release_btree(session));
}

View File

@@ -71,7 +71,7 @@ __wt_metadata_cursor(
* We use the metadata a lot, so we have a handle cached; lock it and
* increment the in-use counter once the cursor is open.
*/
WT_ERR(__wt_session_lock_dhandle(session, 0));
WT_ERR(__wt_session_lock_dhandle(session, 0, NULL));
WT_ERR(__wt_curfile_create(session, NULL, cfg, 0, 0, cursorp));
__wt_cursor_dhandle_incr_use(session);

View File

@@ -183,6 +183,35 @@ free: trk->op = WT_ST_EMPTY;
return (ret);
}
/*
* __wt_meta_track_find_handle --
* Check if we have already seen a handle.
*/
int
__wt_meta_track_find_handle(
WT_SESSION_IMPL *session, const char *name, const char *checkpoint)
{
WT_META_TRACK *trk, *trk_orig;
WT_ASSERT(session,
WT_META_TRACKING(session) && session->meta_track_nest > 0);
trk_orig = session->meta_track;
trk = session->meta_track_next;
while (--trk >= trk_orig) {
if (trk->op != WT_ST_LOCK)
continue;
if (strcmp(trk->dhandle->name, name) == 0 &&
((trk->dhandle->checkpoint == NULL && checkpoint == NULL) ||
(trk->dhandle->checkpoint != NULL &&
strcmp(trk->dhandle->checkpoint, checkpoint) == 0)))
return (0);
}
return (WT_NOTFOUND);
}
/*
* __wt_meta_track_off --
* Turn off metadata operation tracking, unrolling on error.

View File

@@ -145,8 +145,7 @@ __wt_realloc_aligned(WT_SESSION_IMPL *session,
bytes_to_allocate =
WT_ALIGN(bytes_to_allocate, S2C(session)->buffer_alignment);
if (session != NULL)
WT_STAT_FAST_CONN_INCR(session, memory_allocation);
WT_STAT_FAST_CONN_INCR(session, memory_allocation);
if ((ret = posix_memalign(&newp,
S2C(session)->buffer_alignment,

View File

@@ -18,10 +18,8 @@ __wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
WT_DECL_RET;
WT_SYSCALL_RETRY(ftruncate(fh->fd, len), ret);
if (ret == 0) {
fh->size = fh->extend_size = len;
if (ret == 0)
return (0);
}
WT_RET_MSG(session, ret, "%s ftruncate error", fh->name);
}

View File

@@ -60,7 +60,7 @@ __wt_open(WT_SESSION_IMPL *session,
hash = __wt_hash_city64(name, strlen(name));
bucket = hash % WT_HASH_ARRAY_SIZE;
__wt_spin_lock(session, &conn->fh_lock);
SLIST_FOREACH(tfh, &conn->fhhash[bucket], l) {
SLIST_FOREACH(tfh, &conn->fhhash[bucket], hashl) {
if (strcmp(name, tfh->name) == 0) {
++tfh->ref;
*fhp = tfh;
@@ -174,7 +174,7 @@ setupfh:
*/
matched = 0;
__wt_spin_lock(session, &conn->fh_lock);
SLIST_FOREACH(tfh, &conn->fhhash[bucket], l) {
SLIST_FOREACH(tfh, &conn->fhhash[bucket], hashl) {
if (strcmp(name, tfh->name) == 0) {
++tfh->ref;
*fhp = tfh;
@@ -184,7 +184,7 @@ setupfh:
}
if (!matched) {
WT_CONN_FILE_INSERT(conn, fh, bucket);
WT_STAT_FAST_CONN_INCR(session, file_open);
WT_STAT_ATOMIC_INCR(&conn->stats, file_open);
*fhp = fh;
}
@@ -230,7 +230,7 @@ __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp)
/* Remove from the list. */
bucket = fh->name_hash % WT_HASH_ARRAY_SIZE;
WT_CONN_FILE_REMOVE(conn, fh, bucket);
WT_STAT_FAST_CONN_DECR(session, file_open);
WT_STAT_ATOMIC_DECR(&conn->stats, file_open);
__wt_spin_unlock(session, &conn->fh_lock);

View File

@@ -29,7 +29,7 @@ __remove_file_check(WT_SESSION_IMPL *session, const char *name)
* level should have closed it before removing.
*/
__wt_spin_lock(session, &conn->fh_lock);
SLIST_FOREACH(fh, &conn->fhhash[bucket], l)
SLIST_FOREACH(fh, &conn->fhhash[bucket], hashl)
if (strcmp(name, fh->name) == 0)
break;
__wt_spin_unlock(session, &conn->fh_lock);

View File

@@ -28,7 +28,7 @@ __wt_map_error_to_windows_error(int error) {
}
/*
* __wt_map_error_to_windows_error --
* __wt_map_windows_error_to_error --
* Return a positive integer, a decoded Windows error
*/
static int

View File

@@ -17,7 +17,12 @@ __wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh)
{
WT_UNUSED(session);
fh->fallocate_available = WT_FALLOCATE_AVAILABLE;
/*
* fallocate on Windows is implemented using SetEndOfFile which can
* also truncate the file. WiredTiger expects fallocate to ignore
* requests to truncate the file which Windows does not do.
*/
fh->fallocate_available = WT_FALLOCATE_NOT_AVAILABLE;
/*
* We use a separate handle for file size changes, so there's no need
@@ -34,23 +39,5 @@ int
__wt_fallocate(
WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len)
{
WT_DECL_RET;
LARGE_INTEGER largeint;
WT_RET(__wt_verbose(
session, WT_VERB_FILEOPS, "%s: fallocate", fh->name));
largeint.QuadPart = offset + len;
if ((ret = SetFilePointerEx(
fh->filehandle_secondary, largeint, NULL, FILE_BEGIN)) == FALSE)
WT_RET_MSG(session,
__wt_errno(), "%s SetFilePointerEx error", fh->name);
if ((ret = SetEndOfFile(fh->filehandle_secondary)) != FALSE) {
fh->size = fh->extend_size = len;
return (0);
}
WT_RET_MSG(session, __wt_errno(), "%s SetEndOfFile error", fh->name);
return (ENOTSUP);
}

View File

@@ -26,10 +26,8 @@ __wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
fh->name);
ret = SetEndOfFile(fh->filehandle_secondary);
if (ret != FALSE) {
fh->size = fh->extend_size = len;
if (ret != FALSE)
return (0);
}
if (GetLastError() == ERROR_USER_MAPPED_FILE)
return (EBUSY);

View File

@@ -39,7 +39,7 @@ __wt_open(WT_SESSION_IMPL *session,
/* Increment the reference count if we already have the file open. */
matched = 0;
__wt_spin_lock(session, &conn->fh_lock);
SLIST_FOREACH(tfh, &conn->fhhash[bucket], l)
SLIST_FOREACH(tfh, &conn->fhhash[bucket], hashl)
if (strcmp(name, tfh->name) == 0) {
++tfh->ref;
*fhp = tfh;
@@ -160,7 +160,7 @@ setupfh:
*/
matched = 0;
__wt_spin_lock(session, &conn->fh_lock);
SLIST_FOREACH(tfh, &conn->fhhash[bucket], l)
SLIST_FOREACH(tfh, &conn->fhhash[bucket], hashl)
if (strcmp(name, tfh->name) == 0) {
++tfh->ref;
*fhp = tfh;
@@ -169,7 +169,7 @@ setupfh:
}
if (!matched) {
WT_CONN_FILE_INSERT(conn, fh, bucket);
WT_STAT_FAST_CONN_INCR(session, file_open);
(void)WT_ATOMIC_ADD4(conn->open_file_count, 1);
*fhp = fh;
}
@@ -217,7 +217,7 @@ __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp)
/* Remove from the list. */
bucket = fh->name_hash % WT_HASH_ARRAY_SIZE;
WT_CONN_FILE_REMOVE(conn, fh, bucket);
WT_STAT_FAST_CONN_DECR(session, file_open);
(void)WT_ATOMIC_SUB4(conn->open_file_count, 1);
__wt_spin_unlock(session, &conn->fh_lock);

View File

@@ -29,7 +29,7 @@ __remove_file_check(WT_SESSION_IMPL *session, const char *name)
* level should have closed it before removing.
*/
__wt_spin_lock(session, &conn->fh_lock);
SLIST_FOREACH(fh, &conn->fhhash[bucket], l)
SLIST_FOREACH(fh, &conn->fhhash[bucket], hashl)
if (strcmp(name, fh->name) == 0)
break;
__wt_spin_unlock(session, &conn->fh_lock);

View File

@@ -127,15 +127,15 @@ typedef struct {
*/
struct __rec_boundary {
/*
* The start field records location in the initial split buffer,
* that is, the first byte of the split chunk recorded before we
* decide to split a page; the offset between the first byte of
* chunk[0] and the first byte of chunk[1] is chunk[0]'s length.
* Offset is the byte offset in the initial split buffer of the
* first byte of the split chunk, recorded before we decide to
* split the page; the difference between chunk[1]'s offset and
* chunk[0]'s offset is chunk[0]'s length.
*
* Once we split a page, we stop filling in the start field, as
* we're writing the split chunks as we find them.
* Once we split a page, we stop filling in offset values, we're
* writing the split chunks as we find them.
*/
uint8_t *start; /* Split's first byte */
size_t offset; /* Split's first byte */
/*
* The recno and entries fields are the starting record number
@@ -1512,8 +1512,7 @@ __rec_leaf_page_max(WT_SESSION_IMPL *session, WT_RECONCILE *r)
static void
__rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd)
{
bnd->start = NULL;
bnd->offset = 0;
bnd->recno = 0;
bnd->entries = 0;
@@ -1527,7 +1526,10 @@ __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd)
bnd->skip_next = 0;
bnd->skip_allocated = 0;
/* Ignore the key, we re-use that memory in each new reconciliation. */
/*
* Don't touch the key, we re-use that memory in each new
* reconciliation.
*/
bnd->already_compressed = 0;
}
@@ -1542,8 +1544,8 @@ __rec_split_bnd_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r)
/*
* Make sure there's enough room for another boundary. The calculation
* is +2, because when filling in the current boundary's information,
* we save the start point of the next boundary (for example, a record
* number or key), in the (current + 1) slot.
* we save start information for the next boundary (a byte offset and a
* record number or key), in the (current + 1) slot.
*
* For the same reason, we're always initializing one ahead.
*/
@@ -1692,7 +1694,7 @@ __rec_split_init(WT_SESSION_IMPL *session,
WT_RET(__rec_split_bnd_grow(session, r));
__rec_split_bnd_init(session, &r->bnd[0]);
r->bnd[0].recno = recno;
r->bnd[0].start = WT_PAGE_HEADER_BYTE(btree, dsk);
r->bnd[0].offset = WT_PAGE_HEADER_BYTE_SIZE(btree);
/*
* If the maximum page size is the same as the split page size, either
@@ -1984,10 +1986,10 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
session, r, &next->key, dsk->type));
/*
* Set the starting buffer address and clear the entries (the
* Set the starting buffer offset and clear the entries (the
* latter not required, but cleaner).
*/
next->start = r->first_free;
next->offset = WT_PTRDIFF(r->first_free, dsk);
next->entries = 0;
/* Set the space available to another split-size chunk. */
@@ -2643,8 +2645,8 @@ __rec_split_fixup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
WT_DECL_ITEM(tmp);
WT_DECL_RET;
WT_PAGE_HEADER *dsk;
uint32_t i, len;
uint8_t *dsk_start;
size_t i, len;
uint8_t *dsk_start, *p;
/*
* When we overflow physical limits of the page, we walk the list of
@@ -2672,8 +2674,8 @@ __rec_split_fixup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk);
for (i = 0, bnd = r->bnd; i < r->bnd_next; ++i, ++bnd) {
/* Copy the page contents to the temporary buffer. */
len = WT_PTRDIFF32((bnd + 1)->start, bnd->start);
memcpy(dsk_start, bnd->start, len);
len = (bnd + 1)->offset - bnd->offset;
memcpy(dsk_start, (uint8_t *)r->dsk.mem + bnd->offset, len);
/* Finalize the header information and write the page. */
dsk->recno = bnd->recno;
@@ -2698,13 +2700,14 @@ __rec_split_fixup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
* chunk, including header, because if there was room for that large a
* remnant, we wouldn't have switched from accumulating to a page end.
*/
len = WT_PTRDIFF32(r->first_free, bnd->start);
p = (uint8_t *)r->dsk.mem + bnd->offset;
len = WT_PTRDIFF(r->first_free, p);
if (len >= r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree))
WT_PANIC_ERR(session, EINVAL,
"Reconciliation remnant too large for the split buffer");
dsk = r->dsk.mem;
dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk);
(void)memmove(dsk_start, bnd->start, len);
(void)memmove(dsk_start, p, len);
/*
* Fix up our caller's information, including updating the starting

View File

@@ -187,13 +187,12 @@ __open_index(WT_SESSION_IMPL *session, WT_TABLE *table, WT_INDEX *idx)
/* Start with the declared index columns. */
WT_ERR(__wt_config_subinit(session, &colconf, &idx->colconf));
npublic_cols = 0;
while ((ret = __wt_config_next(&colconf, &ckey, &cval)) == 0) {
for (npublic_cols = 0;
(ret = __wt_config_next(&colconf, &ckey, &cval)) == 0;
++npublic_cols)
WT_ERR(__wt_buf_catfmt(
session, buf, "%.*s,", (int)ckey.len, ckey.str));
++npublic_cols;
}
if (ret != 0 && ret != WT_NOTFOUND)
if (ret != WT_NOTFOUND)
goto err;
/*

View File

@@ -47,7 +47,7 @@ __session_add_dhandle(
* the schema lock.
*/
int
__wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags)
__wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags, int *deadp)
{
enum { NOLOCK, READLOCK, WRITELOCK } locked;
WT_BTREE *btree;
@@ -57,6 +57,8 @@ __wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags)
btree = S2BT(session);
dhandle = session->dhandle;
locked = NOLOCK;
if (deadp != NULL)
*deadp = 0;
/*
* Special operation flags will cause the handle to be reopened.
@@ -95,7 +97,10 @@ __wt_session_lock_dhandle(WT_SESSION_IMPL *session, uint32_t flags)
* required, we're done. Otherwise, check that the handle is open and
* that no special flags are required.
*/
if (LF_ISSET(WT_DHANDLE_LOCK_ONLY) ||
if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) {
WT_ASSERT(session, deadp != NULL);
*deadp = 1;
} else if (LF_ISSET(WT_DHANDLE_LOCK_ONLY) ||
(F_ISSET(dhandle, WT_DHANDLE_OPEN) && special_flags == 0))
return (0);
@@ -135,46 +140,25 @@ __wt_session_release_btree(WT_SESSION_IMPL *session)
dhandle = session->dhandle;
locked = F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) ? WRITELOCK : READLOCK;
if (F_ISSET(dhandle, WT_DHANDLE_DISCARD_CLOSE)) {
/*
* If configured to discard on last close, trade any read lock
* for an exclusive lock. If the exchange succeeds, setup for
* discard. It is expected acquiring an exclusive lock will fail
* sometimes since the handle may still be in use: in that case
* we're done.
*/
if (locked == READLOCK) {
locked = NOLOCK;
WT_ERR(__wt_readunlock(session, dhandle->rwlock));
ret = __wt_try_writelock(session, dhandle->rwlock);
if (ret != 0) {
if (ret == EBUSY)
ret = 0;
goto err;
}
locked = WRITELOCK;
F_CLR(dhandle, WT_DHANDLE_DISCARD_CLOSE);
F_SET(dhandle,
WT_DHANDLE_DISCARD | WT_DHANDLE_EXCLUSIVE);
}
}
/*
* If we had special flags set, close the handle so that future access
* can get a handle without special flags.
*/
if (F_ISSET(dhandle, WT_DHANDLE_DISCARD) ||
if (F_ISSET(dhandle, WT_DHANDLE_DISCARD_FORCE)) {
WT_WITH_DHANDLE_LOCK(session,
ret = __wt_conn_btree_sync_and_close(session, 0, 1));
F_CLR(dhandle, WT_DHANDLE_DISCARD_FORCE);
} else if (F_ISSET(dhandle, WT_DHANDLE_DISCARD) ||
F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS)) {
WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE));
ret = __wt_conn_btree_sync_and_close(session, 0, 0);
F_CLR(dhandle, WT_DHANDLE_DISCARD);
WT_TRET(__wt_conn_btree_sync_and_close(session, 0, 0));
}
if (F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE))
F_CLR(dhandle, WT_DHANDLE_EXCLUSIVE);
err: switch (locked) {
switch (locked) {
case NOLOCK:
break;
case READLOCK:
@@ -312,7 +296,8 @@ __session_dhandle_sweep(WT_SESSION_IMPL *session)
dhandle = dhandle_cache->dhandle;
if (dhandle != session->dhandle &&
dhandle->session_inuse == 0 &&
now - dhandle->timeofdeath > conn->sweep_idle_time) {
(F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
now - dhandle->timeofdeath > conn->sweep_idle_time)) {
WT_STAT_FAST_CONN_INCR(session, dh_session_handles);
__session_discard_btree(session, dhandle_cache);
}
@@ -348,6 +333,7 @@ __wt_session_get_btree(WT_SESSION_IMPL *session,
WT_DATA_HANDLE_CACHE *dhandle_cache;
WT_DECL_RET;
uint64_t bucket;
int is_dead;
WT_ASSERT(session, !F_ISSET(session, WT_SESSION_NO_DATA_HANDLES));
WT_ASSERT(session, !LF_ISSET(WT_DHANDLE_HAVE_REF));
@@ -381,7 +367,8 @@ __wt_session_get_btree(WT_SESSION_IMPL *session,
if (dhandle != NULL) {
/* Try to lock the handle; if this succeeds, we're done. */
if ((ret = __wt_session_lock_dhandle(session, flags)) == 0)
if ((ret =
__wt_session_lock_dhandle(session, flags, &is_dead)) == 0)
goto done;
/* Propagate errors we don't expect. */
@@ -389,17 +376,23 @@ __wt_session_get_btree(WT_SESSION_IMPL *session,
return (ret);
/*
* Don't try harder to get the btree handle if our caller
* hasn't allowed us to take the schema lock - they do so on
* purpose and will handle error returns.
* Don't try harder to get the handle if we're only checking
* for locks or our caller hasn't allowed us to take the schema
* lock - they do so on purpose and will handle error returns.
*/
if (!F_ISSET(session, WT_SESSION_SCHEMA_LOCKED) &&
if ((LF_ISSET(WT_DHANDLE_LOCK_ONLY) && ret == EBUSY) ||
(!F_ISSET(session, WT_SESSION_SCHEMA_LOCKED) &&
F_ISSET(session,
WT_SESSION_HANDLE_LIST_LOCKED | WT_SESSION_TABLE_LOCKED))
WT_SESSION_HANDLE_LIST_LOCKED | WT_SESSION_TABLE_LOCKED)))
return (ret);
/* We found the data handle, don't try to get it again. */
LF_SET(WT_DHANDLE_HAVE_REF);
/* If we found the handle and it isn't dead, reopen it. */
if (is_dead) {
__session_discard_btree(session, dhandle_cache);
dhandle_cache = NULL;
session->dhandle = dhandle = NULL;
} else
LF_SET(WT_DHANDLE_HAVE_REF);
}
/*
@@ -419,11 +412,11 @@ __wt_session_get_btree(WT_SESSION_IMPL *session,
WT_RET(__session_add_dhandle(session, NULL));
WT_ASSERT(session, LF_ISSET(WT_DHANDLE_LOCK_ONLY) ||
F_ISSET(session->dhandle, WT_DHANDLE_OPEN));
(F_ISSET(session->dhandle, WT_DHANDLE_OPEN) &&
!F_ISSET(session->dhandle, WT_DHANDLE_DEAD)));
done: WT_ASSERT(session, LF_ISSET(WT_DHANDLE_EXCLUSIVE) ==
F_ISSET(session->dhandle, WT_DHANDLE_EXCLUSIVE));
F_SET(session->dhandle, LF_ISSET(WT_DHANDLE_DISCARD_CLOSE));
return (0);
}
@@ -438,8 +431,17 @@ __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint)
WT_DATA_HANDLE *dhandle, *saved_dhandle;
WT_DECL_RET;
WT_ASSERT(session, WT_META_TRACKING(session));
saved_dhandle = session->dhandle;
/*
* If we already have the checkpoint locked, don't attempt to lock
* it again.
*/
if ((ret = __wt_meta_track_find_handle(
session, saved_dhandle->name, checkpoint)) != WT_NOTFOUND)
return (ret);
/*
* Get the checkpoint handle exclusive, so no one else can access it
* while we are creating the new checkpoint.
@@ -463,7 +465,6 @@ __wt_session_lock_checkpoint(WT_SESSION_IMPL *session, const char *checkpoint)
dhandle = session->dhandle;
F_SET(dhandle, WT_DHANDLE_DISCARD);
WT_ASSERT(session, WT_META_TRACKING(session));
WT_ERR(__wt_meta_track_handle_lock(session, 0));
/* Restore the original btree in the session. */

View File

@@ -33,7 +33,7 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, int *busyp
*busyp = 0;
/* If a file can never be evicted, hazard pointers aren't required. */
if (F_ISSET(btree, WT_BTREE_NO_HAZARD))
if (F_ISSET(btree, WT_BTREE_IN_MEMORY))
return (0);
/*
@@ -142,7 +142,7 @@ __wt_hazard_clear(WT_SESSION_IMPL *session, WT_PAGE *page)
btree = S2BT(session);
/* If a file can never be evicted, hazard pointers aren't required. */
if (F_ISSET(btree, WT_BTREE_NO_HAZARD))
if (F_ISSET(btree, WT_BTREE_IN_MEMORY))
return (0);
/*

View File

@@ -1090,7 +1090,7 @@ __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[])
* Checkpoint a single file as part of closing the handle.
*/
int
__wt_checkpoint_close(WT_SESSION_IMPL *session, int final, int force)
__wt_checkpoint_close(WT_SESSION_IMPL *session, int final)
{
WT_BTREE *btree;
WT_DECL_RET;
@@ -1099,8 +1099,8 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, int final, int force)
btree = S2BT(session);
bulk = F_ISSET(btree, WT_BTREE_BULK) ? 1 : 0;
/* Handle forced discard (when dropping a file). */
if (force)
/* If the handle is already dead, force the discard. */
if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD))
return (__wt_cache_op(session, NULL, WT_SYNC_DISCARD_FORCE));
/*

View File

@@ -302,7 +302,7 @@ __wt_txn_checkpoint_log(
switch (flags) {
case WT_TXN_LOG_CKPT_PREPARE:
txn->full_ckpt = 1;
*ckpt_lsn = S2C(session)->log->alloc_lsn;
*ckpt_lsn = S2C(session)->log->write_start_lsn;
break;
case WT_TXN_LOG_CKPT_START:
@@ -327,7 +327,7 @@ __wt_txn_checkpoint_log(
txn->ckpt_nsnapshot = 0;
WT_CLEAR(empty);
ckpt_snapshot = &empty;
*ckpt_lsn = S2C(session)->log->alloc_lsn;
*ckpt_lsn = S2C(session)->log->write_start_lsn;
} else
ckpt_snapshot = txn->ckpt_snapshot;
@@ -440,7 +440,8 @@ __wt_txn_truncate_end(WT_SESSION_IMPL *session)
*/
static int
__txn_printlog(WT_SESSION_IMPL *session,
WT_ITEM *rawrec, WT_LSN *lsnp, void *cookie, int firstrecord)
WT_ITEM *rawrec, WT_LSN *lsnp, WT_LSN *next_lsnp,
void *cookie, int firstrecord)
{
FILE *out;
WT_LOG_RECORD *logrec;
@@ -452,6 +453,7 @@ __txn_printlog(WT_SESSION_IMPL *session,
const uint8_t *end, *p;
const char *msg;
WT_UNUSED(next_lsnp);
out = cookie;
p = LOG_SKIP_HEADER(rawrec->data);

View File

@@ -263,13 +263,15 @@ __txn_commit_apply(
*/
static int
__txn_log_recover(WT_SESSION_IMPL *session,
WT_ITEM *logrec, WT_LSN *lsnp, void *cookie, int firstrecord)
WT_ITEM *logrec, WT_LSN *lsnp, WT_LSN *next_lsnp,
void *cookie, int firstrecord)
{
WT_RECOVERY *r;
const uint8_t *end, *p;
uint64_t txnid;
uint32_t rectype;
WT_UNUSED(next_lsnp);
r = cookie;
p = LOG_SKIP_HEADER(logrec->data);
end = (const uint8_t *)logrec->data + logrec->size;
@@ -374,32 +376,30 @@ __recovery_free(WT_RECOVERY *r)
static int
__recovery_file_scan(WT_RECOVERY *r)
{
WT_DECL_RET;
WT_CURSOR *c;
const char *uri, *config;
WT_DECL_RET;
int cmp;
const char *uri, *config;
/* Scan through all files in the metadata. */
c = r->files[0].c;
c->set_key(c, "file:");
if ((ret = c->search_near(c, &cmp)) != 0) {
/* Is the metadata empty? */
if (ret == WT_NOTFOUND)
ret = 0;
goto err;
WT_RET_NOTFOUND_OK(ret);
return (0);
}
if (cmp < 0)
WT_ERR_NOTFOUND_OK(c->next(c));
WT_RET_NOTFOUND_OK(c->next(c));
for (; ret == 0; ret = c->next(c)) {
WT_ERR(c->get_key(c, &uri));
WT_RET(c->get_key(c, &uri));
if (!WT_PREFIX_MATCH(uri, "file:"))
break;
WT_ERR(c->get_value(c, &config));
WT_ERR(__recovery_setup_file(r, uri, config));
WT_RET(c->get_value(c, &config));
WT_RET(__recovery_setup_file(r, uri, config));
}
WT_ERR_NOTFOUND_OK(ret);
err: return (ret);
WT_RET_NOTFOUND_OK(ret);
return (0);
}
/*

View File

@@ -113,7 +113,7 @@ main(int argc, char *argv[])
return (usage());
/* Use line buffering on stdout so status updates aren't buffered. */
(void)setvbuf(stdout, NULL, _IOLBF, 0);
(void)setvbuf(stdout, NULL, _IOLBF, 32);
/* Clean up on signal. */
(void)signal(SIGINT, onint);
@@ -156,7 +156,14 @@ wt_startup(char *config_open)
int ret;
char config_buf[128];
if ((ret = system("rm -rf WT_TEST && mkdir WT_TEST")) != 0)
#undef CMD
#ifdef _WIN32
#define CMD "rd /s /q WT_TEST & mkdir WT_TEST"
#else
#define CMD "rm -rf WT_TEST && mkdir WT_TEST"
#endif
if ((ret = system(CMD)) != 0)
die(ret, "directory cleanup call failed");
snprintf(config_buf, sizeof(config_buf),
@@ -192,7 +199,13 @@ shutdown(void)
{
int ret;
if ((ret = system("rm -rf WT_TEST")) != 0)
#undef CMD
#ifdef _WIN32
#define CMD "if exist WT_TEST rd /s /q WT_TEST"
#else
#define CMD "rm -rf WT_TEST"
#endif
if ((ret = system(CMD)) != 0)
die(ret, "directory cleanup call failed");
}

View File

@@ -27,16 +27,26 @@
*/
#include <sys/types.h>
#ifndef _WIN32
#include <sys/time.h>
#endif
#include <errno.h>
#include <inttypes.h>
#ifndef _WIN32
#include <pthread.h>
#endif
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#ifndef _WIN32
#include <unistd.h>
#endif
#ifdef _WIN32
#include "windows_shim.h"
#endif
#include <wiredtiger.h>

View File

@@ -397,7 +397,7 @@ void
config_single(const char *s, int perm)
{
CONFIG *cp;
u_long v;
uint64_t v;
char *p;
const char *ep;

View File

@@ -299,7 +299,7 @@ path_setup(const char *home)
*/
#undef CMD
#ifdef _WIN32
#define CMD "cd %s && del /s /q * && rd /s /q KVS"
#define CMD "cd %s && del /s /q * >:nul && rd /s /q KVS"
#else
#define CMD "cd %s > /dev/null && rm -rf `ls | sed /rand/d`"
#endif
@@ -311,7 +311,7 @@ path_setup(const char *home)
/* Backup directory initialize command, remove and re-create it. */
#undef CMD
#ifdef _WIN32
#define CMD "del /s && mkdir %s"
#define CMD "del /s /q >:nul && mkdir %s"
#else
#define CMD "rm -rf %s && mkdir %s"
#endif
@@ -330,9 +330,9 @@ path_setup(const char *home)
#undef CMD
#ifdef _WIN32
#define CMD \
"cd %s " \
"cd %s && " \
"rd /q /s slvg.copy & mkdir slvg.copy && " \
"copy WiredTiger* slvg.copy\\ && copy wt* slvg.copy\\"
"copy WiredTiger* slvg.copy\\ >:nul && copy wt* slvg.copy\\ >:nul"
#else
#define CMD \
"cd %s > /dev/null && " \

View File

@@ -424,6 +424,7 @@ wts_dump(const char *tag, int dump_bdb)
if (DATASOURCE("helium") || DATASOURCE("kvsbdb"))
return;
#ifndef _WIN32
track("dump files and compare", 0ULL, NULL);
len = strlen(g.home) + strlen(BERKELEY_DB_PATH) + strlen(g.uri) + 100;
@@ -441,6 +442,7 @@ wts_dump(const char *tag, int dump_bdb)
if ((ret = system(cmd)) != 0)
die(ret, "%s: dump comparison failed", tag);
free(cmd);
#endif
}
void

View File

@@ -72,6 +72,23 @@ tasks:
scons.bat ${smp_command|} "CFLAGS=/Gv /wd4090 /wd4996 /we4047 /we4024 /TC /we4100" wiredtiger.dll libwiredtiger.lib
- name: fops-windows
commands:
- func: "fetch source"
- command: git.apply_patch
params:
directory: wiredtiger
- command: shell.exec
params:
working_dir: "wiredtiger"
script: |
set -o errexit
set -o verbose
scons.bat --enable-python=c:\\swigwin-3.0.2\\swig.exe ${smp_command|}
cmd.exe /c t_fops.exe
buildvariants:
- name: ubuntu1404
display_name: Ubuntu 14.04
@@ -99,12 +116,13 @@ buildvariants:
- name: windows-64
display_name: Windows 64-bit
run_on:
- windows-64-vs2013-compile
- windows-64-vs2013-test
expansions:
smp_command: -j$(grep -c ^processor /proc/cpuinfo)
tasks:
- name: compile-windows
- name: compile-windows-alt
- name: fops-windows
- name: osx-108
display_name: OS X 10.8

84
test/suite/test_bug013.py Normal file
View File

@@ -0,0 +1,84 @@
#!/usr/bin/env python
#
# Public Domain 2014-2015 MongoDB, Inc.
# Public Domain 2008-2014 WiredTiger, Inc.
#
# This is free and unencumbered software released into the public domain.
#
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
#
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
import wiredtiger, wttest
# test_bug013.py
# Test data consistency in LSM with updates. Ensure that overwrite
# cursors see all entries in the tree (i.e: they open cursors on all
# chunks in the LSM tree).
# See JIRA BF-829
class test_bug013(wttest.WiredTigerTestCase):
"""
Test LSM data consistency.
"""
uri = 'table:test_bug013'
def check_entries(self, keys):
# Test by iterating.
cursor = self.session.open_cursor(self.uri, None, None)
i = 0
for i1, i2, i3, v1 in cursor:
self.assertEqual( keys[i], [i1, i2, i3])
i += 1
cursor.close()
self.assertEqual(i, len(keys))
def test_lsm_consistency(self):
self.session.reconfigure("isolation=snapshot")
self.session.create(self.uri, 'key_format=iii,value_format=i,type=lsm')
cursor = self.session.open_cursor(self.uri, None, None)
cursor[(2, 6, 1)] = 0
cursor.close()
# Ensure the first chunk is flushed to disk, so the tree will have
# at least two chunks. Wrapped in a try, since it sometimes gets
# an EBUSY return
try:
self.session.verify(self.uri, None)
except wiredtiger.WiredTigerError:
pass
# Add a key
cursor = self.session.open_cursor(self.uri, None, 'overwrite=false')
cursor[(1, 5, 1)] = 0
cursor.close()
# Remove the key we just added. If the LSM code is broken, the
# search for the key we just inserted returns not found - so the
# key isn't actually removed.
cursor = self.session.open_cursor(self.uri, None, 'overwrite=false')
cursor.set_key((1, 5, 1))
cursor.remove()
cursor.close()
# Verify that the data is as we expect
self.check_entries([[2, 6, 1]])
if __name__ == '__main__':
wttest.run()

View File

@@ -40,7 +40,9 @@ class test_cursor07(wttest.WiredTigerTestCase, suite_subprocess):
logmax = "100K"
tablename = 'test_cursor07'
uri = 'table:' + tablename
nkeys = 5
# A large number of keys will force a log file change which will
# test that scenario for log cursors.
nkeys = 7000
scenarios = check_scenarios([
('regular', dict(reopen=False)),

125
test/suite/test_cursor08.py Normal file
View File

@@ -0,0 +1,125 @@
#!/usr/bin/env python
#
# Public Domain 2014-2015 MongoDB, Inc.
# Public Domain 2008-2014 WiredTiger, Inc.
#
# This is free and unencumbered software released into the public domain.
#
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
#
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# test_cursor08.py
# Log cursors with compression
#
import fnmatch, os, shutil, run, time
from suite_subprocess import suite_subprocess
from wiredtiger import wiredtiger_open, stat, WiredTigerError
from wtscenario import multiply_scenarios, number_scenarios, check_scenarios
import wttest
class test_cursor08(wttest.WiredTigerTestCase, suite_subprocess):
logmax = "100K"
tablename = 'test_cursor08'
uri = 'table:' + tablename
nkeys = 5
reopens = check_scenarios([
('regular', dict(reopen=False)),
('reopen', dict(reopen=True))
])
compress = check_scenarios([
('bzip2', dict(compress='bzip2')),
('nop', dict(compress='nop')),
('snappy', dict(compress='snappy')),
('zlib', dict(compress='zlib')),
('none', dict(compress='')),
])
scenarios = number_scenarios(multiply_scenarios('.', reopens, compress))
# Return the wiredtiger_open extension argument for a shared library.
def extensionArg(self, name):
if name == None:
return ''
testdir = os.path.dirname(__file__)
extdir = os.path.join(run.wt_builddir, 'ext/compressors')
extfile = os.path.join(
extdir, name, '.libs', 'libwiredtiger_' + name + '.so')
if not os.path.exists(extfile):
self.skipTest('compression extension "' + extfile + '" not built')
return ',extensions=["' + extfile + '"]'
# Overrides WiredTigerTestCase - add logging
def setUpConnectionOpen(self, dir):
self.home = dir
self.txn_sync = '(method=dsync,enabled)'
conn_params = \
'log=(archive=false,enabled,file_max=%s,' % self.logmax + \
'compressor=%s)' % self.compress + \
',create,error_prefix="%s: ",' % self.shortid() + \
'transaction_sync="%s",' % self.txn_sync + \
self.extensionArg(self.compress)
# print "Creating conn at '%s' with config '%s'" % (dir, conn_params)
try:
conn = wiredtiger_open(dir, conn_params)
except WiredTigerError as e:
print "Failed conn at '%s' with config '%s'" % (dir, conn_params)
self.pr(`conn`)
self.session2 = conn.open_session()
return conn
def test_log_cursor(self):
# print "Creating %s with config '%s'" % (self.uri, self.create_params)
create_params = 'key_format=i,value_format=S'
self.session.create(self.uri, create_params)
c = self.session.open_cursor(self.uri, None)
# A binary value.
value = u'\u0001\u0002abcd\u0003\u0004'
self.session.begin_transaction()
for k in range(self.nkeys):
c[k] = value
self.session.commit_transaction()
c.close()
if self.reopen:
self.reopen_conn()
# Check for these values via a log cursor
c = self.session.open_cursor("log:", None)
count = 0
while c.next() == 0:
# lsn.file, lsn.offset, opcount
keys = c.get_key()
# txnid, rectype, optype, fileid, logrec_key, logrec_value
values = c.get_value()
try:
if value in str(values[5]): # logrec_value
count += 1
except:
pass
c.close()
self.assertEqual(count, self.nkeys)
if __name__ == '__main__':
wttest.run()

View File

@@ -56,6 +56,23 @@ usleep(useconds_t useconds)
return (0);
}
int gettimeofday(struct timeval* tp, void* tzp)
{
uint64_t ns100;
FILETIME time;
tzp = tzp;
GetSystemTimeAsFileTime(&time);
ns100 = (((int64_t)time.dwHighDateTime << 32) + time.dwLowDateTime)
- 116444736000000000LL;
tp->tv_sec = ns100 / 10000000;
tp->tv_usec = (long)((ns100 % 10000000) / 10);
return (0);
}
int
pthread_rwlock_destroy(pthread_rwlock_t *lock)
{

View File

@@ -59,6 +59,16 @@ _Check_return_opt_ int __cdecl _wt_snprintf(
*/
#define mkdir(path, mode) _mkdir(path)
/*
* Emulate <sys/time.h>
*/
struct timeval {
time_t tv_sec;
int64_t tv_usec;
};
int gettimeofday(struct timeval* tp, void* tzp);
/*
* Emulate <sched.h>
*/