Move compact start/stop into the session layer so all operations on the file during compaction (including checkpoints) use first-fit allocation.
154 lines
5.7 KiB
Python
154 lines
5.7 KiB
Python
#!/usr/bin/env python
|
|
#
|
|
# Public Domain 2014-2016 MongoDB, Inc.
|
|
# Public Domain 2008-2014 WiredTiger, Inc.
|
|
#
|
|
# This is free and unencumbered software released into the public domain.
|
|
#
|
|
# Anyone is free to copy, modify, publish, use, compile, sell, or
|
|
# distribute this software, either in source code form or as a compiled
|
|
# binary, for any purpose, commercial or non-commercial, and by any
|
|
# means.
|
|
#
|
|
# In jurisdictions that recognize copyright laws, the author or authors
|
|
# of this software dedicate any and all copyright interest in the
|
|
# software to the public domain. We make this dedication for the benefit
|
|
# of the public at large and to the detriment of our heirs and
|
|
# successors. We intend this dedication to be an overt act of
|
|
# relinquishment in perpetuity of all present and future rights to this
|
|
# software under copyright law.
|
|
#
|
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
|
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
|
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
# OTHER DEALINGS IN THE SOFTWARE.
|
|
#
|
|
# test_compact02.py
|
|
# Test that compact reduces the file size.
|
|
#
|
|
|
|
import wiredtiger, wttest
|
|
from wiredtiger import stat
|
|
from wtscenario import multiply_scenarios, number_scenarios
|
|
|
|
# Test basic compression
|
|
class test_compact02(wttest.WiredTigerTestCase):
|
|
|
|
types = [
|
|
('file', dict(uri='file:test_compact02')),
|
|
]
|
|
cacheSize = [
|
|
('default', dict(cacheSize='')),
|
|
('1mb', dict(cacheSize='cache_size=1MB')),
|
|
('10gb', dict(cacheSize='cache_size=10GB')),
|
|
]
|
|
|
|
# There's a balance between the pages we create and the size of the records
|
|
# being stored: compaction doesn't work on tables with many overflow items
|
|
# because we don't rewrite them. Experimentally, 8KB is as small as the test
|
|
# can go. Additionally, we can't set the maximum page size too large because
|
|
# there won't be enough pages to rewrite. Experimentally, 128KB works.
|
|
fileConfig = [
|
|
('default', dict(fileConfig='')),
|
|
('8KB', dict(fileConfig='leaf_page_max=8kb')),
|
|
('64KB', dict(fileConfig='leaf_page_max=64KB')),
|
|
('128KB', dict(fileConfig='leaf_page_max=128KB')),
|
|
]
|
|
scenarios = \
|
|
number_scenarios(multiply_scenarios('.', types, cacheSize, fileConfig))
|
|
|
|
# We want about 22K records that total about 130Mb. That is an average
|
|
# of 6196 bytes per record. Half the records should be smaller, about
|
|
# 2700 bytes (about 30Mb) and the other half should be larger, 9666 bytes
|
|
# per record (about 100Mb).
|
|
#
|
|
# Test flow is as follows.
|
|
#
|
|
# 1. Create a table with the data, alternating record size.
|
|
# 2. Checkpoint and get stats on the table to confirm the size.
|
|
# 3. Delete the half of the records with the larger record size.
|
|
# 4. Call compact.
|
|
# 5. Get stats on compacted table.
|
|
#
|
|
nrecords = 22000
|
|
bigvalue = "abcdefghi" * 1074 # 9*1074 == 9666
|
|
smallvalue = "ihgfedcba" * 303 # 9*303 == 2727
|
|
|
|
fullsize = nrecords / 2 * len(bigvalue) + nrecords / 2 * len(smallvalue)
|
|
|
|
# Return the size of the file
|
|
def getSize(self):
|
|
cstat = self.session.open_cursor(
|
|
'statistics:' + self.uri, None, 'statistics=(size)')
|
|
sz = cstat[stat.dsrc.block_size][2]
|
|
cstat.close()
|
|
return sz
|
|
|
|
# This test varies the cache size and so needs to set up its own connection.
|
|
# Override the standard methods.
|
|
def setUpConnectionOpen(self, dir):
|
|
return None
|
|
def setUpSessionOpen(self, conn):
|
|
return None
|
|
def ConnectionOpen(self, cacheSize):
|
|
self.home = '.'
|
|
conn_params = 'create,' + \
|
|
cacheSize + ',error_prefix="%s: ",' % self.shortid() + \
|
|
'statistics=(fast)'
|
|
try:
|
|
self.conn = wiredtiger.wiredtiger_open(self.home, conn_params)
|
|
except wiredtiger.WiredTigerError as e:
|
|
print "Failed conn at '%s' with config '%s'" % (dir, conn_params)
|
|
self.session = self.conn.open_session(None)
|
|
|
|
# Create a table, add keys with both big and small values.
|
|
def test_compact02(self):
|
|
self.ConnectionOpen(self.cacheSize)
|
|
|
|
mb = 1024 * 1024
|
|
params = 'key_format=i,value_format=S,' + self.fileConfig
|
|
|
|
# 1. Create a table with the data, alternating record size.
|
|
self.session.create(self.uri, params)
|
|
c = self.session.open_cursor(self.uri, None)
|
|
for i in range(self.nrecords):
|
|
if i % 2 == 0:
|
|
c[i] = str(i) + self.bigvalue
|
|
else:
|
|
c[i] = str(i) + self.smallvalue
|
|
c.close()
|
|
|
|
# 2. Checkpoint and get stats on the table to confirm the size.
|
|
self.session.checkpoint()
|
|
sz = self.getSize()
|
|
self.pr('After populate ' + str(sz / mb) + 'MB')
|
|
self.assertGreater(sz, self.fullsize)
|
|
|
|
# 3. Delete the half of the records with the larger record size.
|
|
c = self.session.open_cursor(self.uri, None)
|
|
count = 0
|
|
for i in range(self.nrecords):
|
|
if i % 2 == 0:
|
|
count += 1
|
|
c.set_key(i)
|
|
c.remove()
|
|
c.close()
|
|
self.pr('Removed total ' + str((count * 9666) / mb) + 'MB')
|
|
|
|
# 4. Call compact.
|
|
self.session.compact(self.uri, None)
|
|
|
|
# 5. Get stats on compacted table.
|
|
sz = self.getSize()
|
|
self.pr('After compact ' + str(sz / mb) + 'MB')
|
|
|
|
# After compact, the file size should be less than half the full size.
|
|
self.assertLess(sz, self.fullsize / 2)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
wttest.run()
|