Files
mongo/test/suite/test_join01.py
Don Anderson a94aa935f0 WT-2571 In join tests, use scenarios instead of lots of nested loops.
Move miscellaneous tests that don't need scenarios to their own test class.
2016-04-22 13:38:50 -04:00

343 lines
13 KiB
Python

#!/usr/bin/env python
#
# Public Domain 2014-2016 MongoDB, Inc.
# Public Domain 2008-2014 WiredTiger, Inc.
#
# This is free and unencumbered software released into the public domain.
#
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
#
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.
import wiredtiger, wttest
from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
# test_join01.py
# Join operations
# Basic tests for join
class test_join01(wttest.WiredTigerTestCase):
nentries = 100
type_scen = [
('table', dict(ref='table')),
('index', dict(ref='index'))
]
bloom0_scen = [
('bloom0=0', dict(joincfg0='')),
('bloom0=1000', dict(joincfg0=',strategy=bloom,count=1000')),
('bloom0=10000', dict(joincfg0=',strategy=bloom,count=10000')),
]
bloom1_scen = [
('bloom1=0', dict(joincfg1='')),
('bloom1=1000', dict(joincfg1=',strategy=bloom,count=1000')),
('bloom1=10000', dict(joincfg1=',strategy=bloom,count=10000')),
]
projection_scen = [
('no-projection', dict(do_proj=False)),
('projection', dict(do_proj=True))
]
nested_scen = [
('simple', dict(do_nested=False)),
('nested', dict(do_nested=True))
]
stats_scen = [
('no-stats', dict(do_stats=False)),
('stats', dict(do_stats=True))
]
order_scen = [
('order=0', dict(join_order=0)),
('order=1', dict(join_order=1)),
('order=2', dict(join_order=2)),
('order=3', dict(join_order=3)),
]
scenarios = number_scenarios(multiply_scenarios('.', type_scen,
bloom0_scen, bloom1_scen,
projection_scen,
nested_scen, stats_scen,
order_scen))
# We need statistics for these tests.
conn_config = 'statistics=(all)'
def gen_key(self, i):
return [ i + 1 ]
def gen_values(self, i):
s = str(i)
rs = s[::-1]
sort3 = (self.nentries * (i % 3)) + i # multiples of 3 sort first
return [s, rs, sort3]
# Common function for testing iteration of join cursors
def iter_common(self, jc, do_proj, do_nested, join_order):
# See comments in join_common()
# The order that the results are seen depends on
# the ordering of the joins. Specifically, the first
# join drives the order that results are seen.
if do_nested:
if join_order == 0:
expect = [73, 82, 83, 92]
elif join_order == 1:
expect = [73, 82, 83, 92]
elif join_order == 2:
expect = [82, 92, 73, 83]
elif join_order == 3:
expect = [92, 73, 82, 83]
else:
if join_order == 0:
expect = [73, 82, 62, 83, 92]
elif join_order == 1:
expect = [62, 73, 82, 83, 92]
elif join_order == 2:
expect = [62, 82, 92, 73, 83]
elif join_order == 3:
expect = [73, 82, 62, 83, 92]
while jc.next() == 0:
[k] = jc.get_keys()
i = k - 1
if do_proj: # our projection test simply reverses the values
[v2,v1,v0] = jc.get_values()
else:
[v0,v1,v2] = jc.get_values()
self.assertEquals(self.gen_values(i), [v0,v1,v2])
if len(expect) == 0 or i != expect[0]:
self.tty('ERROR: ' + str(i) + ' is not next in: ' +
str(expect))
self.tty('JOIN ORDER=' + str(join_order) + ', NESTED=' + str(do_nested))
self.assertTrue(i == expect[0])
expect.remove(i)
self.assertEquals(0, len(expect))
# Stats are collected twice: after iterating
# through the join cursor once, and secondly after resetting
# the join cursor and iterating again.
def stats(self, jc, which):
statcur = self.session.open_cursor('statistics:join', jc, None)
# pick a stat we always expect to see
statdesc = 'bloom filter false positives'
expectstats = [
'join: index:join01:index1: ' + statdesc,
'join: index:join01:index2: ' + statdesc ]
if self.ref == 'index':
expectstats.append('join: index:join01:index0: ' + statdesc)
elif self.do_proj:
expectstats.append('join: table:join01(v2,v1,v0): ' + statdesc)
else:
expectstats.append('join: table:join01: ' + statdesc)
self.check_stats(statcur, expectstats)
statcur.reset()
self.check_stats(statcur, expectstats)
statcur.close()
def statstr_to_int(self, str):
"""
Convert a statistics value string, which may be in either form:
'12345' or '33M (33604836)'
"""
parts = str.rpartition('(')
return int(parts[2].rstrip(')'))
# All of the expect strings should appear
def check_stats(self, statcursor, expectstats):
stringclass = ''.__class__
intclass = (0).__class__
# Reset the cursor, we're called multiple times.
statcursor.reset()
self.printVerbose(3, 'statistics:')
for id, desc, valstr, val in statcursor:
self.assertEqual(type(desc), stringclass)
self.assertEqual(type(valstr), stringclass)
self.assertEqual(type(val), intclass)
self.assertEqual(val, self.statstr_to_int(valstr))
self.printVerbose(3, ' stat: \'' + desc + '\', \'' +
valstr + '\', ' + str(val))
if desc in expectstats:
expectstats.remove(desc)
self.assertTrue(len(expectstats) == 0,
'missing expected values in stats: ' + str(expectstats))
def session_record_join(self, jc, refc, config, order, joins):
joins.append([order, [jc, refc, config]])
def session_play_one_join(self, firsturi, jc, refc, config):
if refc.uri == firsturi and config != None:
config = config.replace('strategy=bloom','')
#self.tty('->join(jc, uri="' + refc.uri +
# '", config="' + str(config) + '"')
self.session.join(jc, refc, config)
def session_play_joins(self, joins, join_order):
#self.tty('->')
firsturi = None
for [i, joinargs] in joins:
if i >= join_order:
if firsturi == None:
firsturi = joinargs[1].uri
self.session_play_one_join(firsturi, *joinargs)
for [i, joinargs] in joins:
if i < join_order:
if firsturi == None:
firsturi = joinargs[1].uri
self.session_play_one_join(firsturi, *joinargs)
# Common function for testing the most basic functionality
# of joins
def test_join(self):
joincfg0 = self.joincfg0
joincfg1 = self.joincfg1
do_proj = self.do_proj
do_nested = self.do_nested
do_stats = self.do_stats
join_order = self.join_order
#self.tty('join_common(' + joincfg0 + ',' + joincfg1 + ',' +
# str(do_proj) + ',' + str(do_nested) + ',' +
# str(do_stats) + ',' + str(join_order) + ')')
closeme = []
joins = [] # cursors to be joined
self.session.create('table:join01', 'key_format=r' +
',value_format=SSi,columns=(k,v0,v1,v2)')
self.session.create('index:join01:index0','columns=(v0)')
self.session.create('index:join01:index1','columns=(v1)')
self.session.create('index:join01:index2','columns=(v2)')
c = self.session.open_cursor('table:join01', None, None)
for i in range(0, self.nentries):
c.set_key(*self.gen_key(i))
c.set_value(*self.gen_values(i))
c.insert()
c.close()
if do_proj:
proj_suffix = '(v2,v1,v0)' # Reversed values
else:
proj_suffix = '' # Default projection (v0,v1,v2)
# We join on index2 first, not using bloom indices.
# This defines the order that items are returned.
# index2 sorts multiples of 3 first (see gen_values())
# and by using 'gt' and key 99, we'll skip multiples of 3,
# and examine primary keys 2,5,8,...,95,98,1,4,7,...,94,97.
jc = self.session.open_cursor('join:table:join01' + proj_suffix,
None, None)
# Adding a projection to a reference cursor should be allowed.
c2 = self.session.open_cursor('index:join01:index2(v1)', None, None)
c2.set_key(99) # skips all entries w/ primary key divisible by three
self.assertEquals(0, c2.search())
self.session_record_join(jc, c2, 'compare=gt', 0, joins)
# Then select all the numbers 0-99 whose string representation
# sort >= '60'.
if self.ref == 'index':
c0 = self.session.open_cursor('index:join01:index0', None, None)
c0.set_key('60')
else:
c0 = self.session.open_cursor('table:join01', None, None)
c0.set_key(60)
self.assertEquals(0, c0.search())
self.session_record_join(jc, c0, 'compare=ge' + joincfg0, 1, joins)
# Then select all numbers whose reverse string representation
# is in '20' < x < '40'.
c1a = self.session.open_cursor('index:join01:index1(v1)', None, None)
c1a.set_key('21')
self.assertEquals(0, c1a.search())
self.session_record_join(jc, c1a, 'compare=gt' + joincfg1, 2, joins)
c1b = self.session.open_cursor('index:join01:index1(v1)', None, None)
c1b.set_key('41')
self.assertEquals(0, c1b.search())
self.session_record_join(jc, c1b, 'compare=lt' + joincfg1, 2, joins)
# Numbers that satisfy these 3 conditions (with ordering implied by c2):
# [73, 82, 62, 83, 92].
#
# After iterating, we should be able to reset and iterate again.
if do_nested:
# To test nesting, we create two new levels of conditions:
#
# x == 72 or x == 73 or x == 82 or x == 83 or
# (x >= 90 and x <= 99)
#
# that will get AND-ed into our existing join. The expected
# result is [73, 82, 83, 92].
#
# We don't specify the projection here, it should be picked up
# from the 'enclosing' join.
nest1 = self.session.open_cursor('join:table:join01', None, None)
nest2 = self.session.open_cursor('join:table:join01', None, None)
nc = self.session.open_cursor('index:join01:index0', None, None)
nc.set_key('90')
self.assertEquals(0, nc.search())
self.session.join(nest2, nc, 'compare=ge') # joincfg left out
closeme.append(nc)
nc = self.session.open_cursor('index:join01:index0', None, None)
nc.set_key('99')
self.assertEquals(0, nc.search())
self.session.join(nest2, nc, 'compare=le')
closeme.append(nc)
self.session.join(nest1, nest2, "operation=or")
for val in [ '72', '73', '82', '83' ]:
nc = self.session.open_cursor('index:join01:index0', None, None)
nc.set_key(val)
self.assertEquals(0, nc.search())
self.session.join(nest1, nc, 'compare=eq,operation=or' +
joincfg0)
closeme.append(nc)
self.session_record_join(jc, nest1, None, 3, joins)
self.session_play_joins(joins, join_order)
self.iter_common(jc, do_proj, do_nested, join_order)
if do_stats:
self.stats(jc, 0)
jc.reset()
self.iter_common(jc, do_proj, do_nested, join_order)
if do_stats:
self.stats(jc, 1)
jc.reset()
self.iter_common(jc, do_proj, do_nested, join_order)
if do_stats:
self.stats(jc, 2)
jc.reset()
self.iter_common(jc, do_proj, do_nested, join_order)
jc.close()
c2.close()
c1a.close()
c1b.close()
c0.close()
if do_nested:
nest1.close()
nest2.close()
for c in closeme:
c.close()
self.session.drop('table:join01')
if __name__ == '__main__':
wttest.run()