Files
mongo/test/suite/test_compact02.py
Keith Bostic 029f0ae023 WT-2394: Have pages split as part of compact checkpoints use first-fit
Move compact start/stop into the session layer so all operations on the
file during compaction (including checkpoints) use first-fit allocation.
2016-02-22 13:04:59 -05:00

154 lines
5.7 KiB
Python

#!/usr/bin/env python
#
# Public Domain 2014-2016 MongoDB, Inc.
# Public Domain 2008-2014 WiredTiger, Inc.
#
# This is free and unencumbered software released into the public domain.
#
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
#
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
#
# test_compact02.py
# Test that compact reduces the file size.
#
import wiredtiger, wttest
from wiredtiger import stat
from wtscenario import multiply_scenarios, number_scenarios
# Test basic compression
class test_compact02(wttest.WiredTigerTestCase):
types = [
('file', dict(uri='file:test_compact02')),
]
cacheSize = [
('default', dict(cacheSize='')),
('1mb', dict(cacheSize='cache_size=1MB')),
('10gb', dict(cacheSize='cache_size=10GB')),
]
# There's a balance between the pages we create and the size of the records
# being stored: compaction doesn't work on tables with many overflow items
# because we don't rewrite them. Experimentally, 8KB is as small as the test
# can go. Additionally, we can't set the maximum page size too large because
# there won't be enough pages to rewrite. Experimentally, 128KB works.
fileConfig = [
('default', dict(fileConfig='')),
('8KB', dict(fileConfig='leaf_page_max=8kb')),
('64KB', dict(fileConfig='leaf_page_max=64KB')),
('128KB', dict(fileConfig='leaf_page_max=128KB')),
]
scenarios = \
number_scenarios(multiply_scenarios('.', types, cacheSize, fileConfig))
# We want about 22K records that total about 130Mb. That is an average
# of 6196 bytes per record. Half the records should be smaller, about
# 2700 bytes (about 30Mb) and the other half should be larger, 9666 bytes
# per record (about 100Mb).
#
# Test flow is as follows.
#
# 1. Create a table with the data, alternating record size.
# 2. Checkpoint and get stats on the table to confirm the size.
# 3. Delete the half of the records with the larger record size.
# 4. Call compact.
# 5. Get stats on compacted table.
#
nrecords = 22000
bigvalue = "abcdefghi" * 1074 # 9*1074 == 9666
smallvalue = "ihgfedcba" * 303 # 9*303 == 2727
fullsize = nrecords / 2 * len(bigvalue) + nrecords / 2 * len(smallvalue)
# Return the size of the file
def getSize(self):
cstat = self.session.open_cursor(
'statistics:' + self.uri, None, 'statistics=(size)')
sz = cstat[stat.dsrc.block_size][2]
cstat.close()
return sz
# This test varies the cache size and so needs to set up its own connection.
# Override the standard methods.
def setUpConnectionOpen(self, dir):
return None
def setUpSessionOpen(self, conn):
return None
def ConnectionOpen(self, cacheSize):
self.home = '.'
conn_params = 'create,' + \
cacheSize + ',error_prefix="%s: ",' % self.shortid() + \
'statistics=(fast)'
try:
self.conn = wiredtiger.wiredtiger_open(self.home, conn_params)
except wiredtiger.WiredTigerError as e:
print "Failed conn at '%s' with config '%s'" % (dir, conn_params)
self.session = self.conn.open_session(None)
# Create a table, add keys with both big and small values.
def test_compact02(self):
self.ConnectionOpen(self.cacheSize)
mb = 1024 * 1024
params = 'key_format=i,value_format=S,' + self.fileConfig
# 1. Create a table with the data, alternating record size.
self.session.create(self.uri, params)
c = self.session.open_cursor(self.uri, None)
for i in range(self.nrecords):
if i % 2 == 0:
c[i] = str(i) + self.bigvalue
else:
c[i] = str(i) + self.smallvalue
c.close()
# 2. Checkpoint and get stats on the table to confirm the size.
self.session.checkpoint()
sz = self.getSize()
self.pr('After populate ' + str(sz / mb) + 'MB')
self.assertGreater(sz, self.fullsize)
# 3. Delete the half of the records with the larger record size.
c = self.session.open_cursor(self.uri, None)
count = 0
for i in range(self.nrecords):
if i % 2 == 0:
count += 1
c.set_key(i)
c.remove()
c.close()
self.pr('Removed total ' + str((count * 9666) / mb) + 'MB')
# 4. Call compact.
self.session.compact(self.uri, None)
# 5. Get stats on compacted table.
sz = self.getSize()
self.pr('After compact ' + str(sz / mb) + 'MB')
# After compact, the file size should be less than half the full size.
self.assertLess(sz, self.fullsize / 2)
if __name__ == '__main__':
wttest.run()