mongo/test/suite/test_schema03.py

#!/usr/bin/env python
#
# Public Domain 2014-2016 MongoDB, Inc.
# Public Domain 2008-2014 WiredTiger, Inc.
#
# This is free and unencumbered software released into the public domain.
#
# Anyone is free to copy, modify, publish, use, compile, sell, or
# distribute this software, either in source code form or as a compiled
# binary, for any purpose, commercial or non-commercial, and by any
# means.
#
# In jurisdictions that recognize copyright laws, the author or authors
# of this software dedicate any and all copyright interest in the
# software to the public domain. We make this dedication for the benefit
# of the public at large and to the detriment of our heirs and
# successors. We intend this dedication to be an overt act of
# relinquishment in perpetuity of all present and future rights to this
# software under copyright law.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
# OTHER DEALINGS IN THE SOFTWARE.

import os
import suite_random
import wiredtiger, wtscenario, wttest
from wtscenario import check_scenarios

try:
    # Windows does not getrlimit/setrlimit so we must catch the resource
    # module load
    import resource
except:
    None

# test_schema03.py
#    Bigger, more 'randomly generated' schemas and data.
#    This test is complex.  If it fails, rerun with modified values for
# SHOW_PYTHON* variables.
def extract_random_from_list(rand, list):
    pos = rand.rand_range(0, len(list))
    result = list[pos]
    list = list[:pos] + list[pos+1:]
    return (result, list)

class tabconfig:
    """
    Configuration for a table used in the test
    """
    def __init__(self):
        self.tableidx = -1
        self.tablename = ''
        self.cglist = []   # list of related cgconfig
        self.idxlist = []  # list of related idxconfig
        self.nkeys = 0     # how many key columns
        self.nvalues = 0   # how many value columns
        self.nentries = 0
        self.keyformats = ''
        self.valueformats = ''

    # we don't want to insert the keys in order,
    # so generate them with backwards digits e.g.
    # 235 => 532.  However, 100 backwards is 001,
    # so we append a positive integer to the end
    # before reversing.
    def gen_keys(self, i):
        addmod = i * 10 + (i % 7) + 1
        rev = int((str(addmod))[::-1])
        keys = []
        # ASSUME: each format is 1 char
        for format in self.keyformats:
            if format == 'S':
                keys.append(str(rev))
            elif format == 'i':
                keys.append(rev)
            elif format == 'r':
                keys.append(long(i+1))
        return keys

    def gen_values(self, i):
        vals = []
        # ASSUME: each format is 1 char
        for format in self.valueformats:
            if format == 'S':
                vals.append(str(i))
            elif format == 'i':
                vals.append(i)
        return vals

    def columns_for_groups(self, collist):
        totalgroups = len(self.cglist)
        ncolumns = len(collist)
        rand = suite_random.suite_random(ncolumns, totalgroups)

        # Each columngroup must have at least one column, so
        # the only choice about distribution is with the
        # excess columns.
        excess = ncolumns - totalgroups
        if excess < 0:
            raise ValueError('columns_for_groups expects a column list (len=' + str(ncolumns) + ') larger than column group list (len=' + str(totalgroups) + ')')

        # Initially, all groups get column from the collist
        for cg in self.cglist:
            (colno, collist) = extract_random_from_list(rand, collist)
            cg.columns.append(colno)

        # Then divy up remainder in the collist
        for i in range(0, excess):
            pos = rand.rand_range(0, totalgroups)
            cg = self.cglist[pos]
            (colno, collist) = extract_random_from_list(rand, collist)
            cg.columns.append(colno)

        # collist should be emptied
        if len(collist) != 0:
            raise AssertionError('column list did not get emptied')

    def columns_for_indices(self, collist):
        totalindices = len(self.idxlist)
        ncolumns = len(collist)
        startcol = 0

        # KNOWN LIMITATION: Indices should not include primary keys
        # Remove this statement when the limitation is fixed.
        #startcol = self.nkeys
        # END KNOWN LIMITATION.

        rand = suite_random.suite_random(ncolumns, totalindices)

        # Initially, all indices get one column from the collist.
        # Overlaps are allowed.  Then probalistically, add some
        # more columns.
        for idx in self.idxlist:
            prob = 1.0
            for i in range(0, ncolumns - startcol):
                if rand.rand_float() > prob:
                    break
                colno = collist[rand.rand_range(startcol, ncolumns)]
                if not any(x == colno for x in idx.columns):
                    idx.columns.append(colno)
                    if colno < self.nkeys:
                        # ASSUME: each format is 1 char
                        idx.formats += self.keyformats[colno]
                    else:
                        # ASSUME: each format is 1 char
                        idx.formats += self.valueformats[colno - self.nkeys]
                prob *= 0.5


class cgconfig:
    """
    Configuration for a column group used in the test.
    Each tabconfig contains a list of these.
    """
    def __init__(self):
        self.cgname = ''
        self.columns = []
        self.createset = 0    # 0 or 1 depending on which set to create them.

class idxconfig:
    """
    Configuration for an index used in the test.
    Each tabconfig contains a list of these.
    """
    def __init__(self):
        self.idxname = ''
        self.columns = []
        self.createset = 0    # 0 or 1 depending on which set to create them.
        self.formats = ''     # piece
        self.tab = None       # references the tabconfig

    def gen_keys(self, i):
        keys = []
        colpos = 0
        addmod = i * 10 + (i % 7) + 1
        rev = int((str(addmod))[::-1])
        for format in self.formats:
            if self.columns[colpos] >= self.tab.nkeys:
                # The column is a value in the primary table
                key = i
            else:
                # The column is a key in the primary table
                key = rev
            if format == 'S':
                key = str(key)
            keys.append(key)
            colpos += 1
        return keys

class test_schema03(wttest.WiredTigerTestCase):
    """
    Test schemas - a 'predictably random' assortment of columns,
    column groups and indices are created within tables, and are
    created in various orders as much as the API allows.  On some runs
    the connection will be closed and reopened at a particular point
    to test that the schemas (and data) are saved and read correctly.

    The test is run multiple times, using scenarios.
    The test always follows these steps:
    - table:      create tables
    - colgroup0:  create (some) colgroups
    - index0:     create (some) indices
    - colgroup1:  create (more) colgroups
    - index1:     create (more) indices
    - populate0:  populate 1st time
    - index2:     create (more) indices
    - populate1:  populate 2nd time (more key/values)
    - check:      check key/values

    The variations represented by scenarios are:
    - how many tables to create
    - how many colgroups to create at each step (may be 0)
    - how many indices to create at each step (may be 0)
    - between each step, whether to close/reopen the connection
    """

    # Boost cache size and number of sessions for this test
    conn_config = 'cache_size=100m,session_max=1000'

    ################################################################
    # These three variables can be altered to help generate
    # and pare down failing test cases.

    # Set to true to get python test program fragment on stdout,
    # used by show_python() below.
    SHOW_PYTHON = False

    # When SHOW_PYTHON is set, we print an enormous amount of output.
    # To only print for a given scenario, set this
    SHOW_PYTHON_ONLY_SCEN = None  # could be e.g. [2] or [0,1]

    # To print verbosely for only a given table, set this
    SHOW_PYTHON_ONLY_TABLE = None # could be e.g. [2] or [0,1]

    ################################################################

    # Set whenever we are working with a table
    current_table = None

    nentries = 50

    # We need to have a large number of open files available
    # to run this test.  We probably don't need quite this many,
    # but boost it up to this limit anyway.
    OPEN_FILE_LIMIT = 1000

    restart_scenarios = check_scenarios([('table', dict(s_restart=['table'],P=0.3)),
                         ('colgroup0', dict(s_restart=['colgroup0'],P=0.3)),
                         ('index0', dict(s_restart=['index0'],P=0.3)),
                         ('colgroup1', dict(s_restart=['colgroup1'],P=0.3)),
                         ('index1', dict(s_restart=['index1'],P=0.3)),
                         ('populate0', dict(s_restart=['populate0'],P=0.3)),
                         ('index2', dict(s_restart=['index2'],P=0.3)),
                         ('populate1', dict(s_restart=['populate1'],P=0.3)),
                         ('ipop', dict(s_restart=['index0','populate0'],P=0.3)),
                         ('all', dict(s_restart=['table','colgroup0','index0','colgroup1','index1','populate0','index2','populate1'],P=1.0)),
    ])

    ntable_scenarios = wtscenario.quick_scenarios('s_ntable',
        [1,2,5,8], [1.0,0.4,0.5,0.5])
    ncolgroup_scenarios = wtscenario.quick_scenarios('s_colgroup',
        [[1,0],[0,1],[2,4],[8,5]], [1.0,0.2,0.3,1.0])
    nindex_scenarios = wtscenario.quick_scenarios('s_index',
        [[1,1,1],[3,2,1],[5,1,3]], [1.0,0.5,1.0])
    idx_args_scenarios = wtscenario.quick_scenarios('s_index_args',
        ['', ',type=file', ',type=lsm'], [0.5, 0.3, 0.2])
    table_args_scenarios = wtscenario.quick_scenarios('s_extra_table_args',
        ['', ',type=file', ',type=lsm'], [0.5, 0.3, 0.2])

    all_scenarios = wtscenario.multiply_scenarios('_', restart_scenarios, ntable_scenarios, ncolgroup_scenarios, nindex_scenarios, idx_args_scenarios, table_args_scenarios)

    # Prune the scenarios according to the probabilities given above.
    scenarios = wtscenario.prune_scenarios(all_scenarios, 30)
    scenarios = wtscenario.number_scenarios(scenarios)

    # Note: the set can be reduced here for debugging, e.g.
    # scenarios = scenarios[40:44]
    #   or
    # scenarios = [ scenarios[0], scenarios[30], scenarios[40] ]

    #wttest.WiredTigerTestCase.printVerbose(2, 'test_schema03: running ' + \
    #                      str(len(scenarios)) + ' of ' + \
    #                      str(len(all_scenarios)) + ' possible scenarios')

    # This test requires a large number of open files.
    # Increase our resource limits before we start
    def setUp(self):
        if os.name == "nt":
            self.skipTest('Unix specific test skipped on Windows')

        self.origFileLimit = resource.getrlimit(resource.RLIMIT_NOFILE)
        newlimit = (self.OPEN_FILE_LIMIT, self.origFileLimit[1])
        if newlimit[0] > newlimit[1]:
            self.skipTest('Require %d open files, only %d available' % newlimit)
        resource.setrlimit(resource.RLIMIT_NOFILE, newlimit)
        super(test_schema03, self).setUp()

    def tearDown(self):
        super(test_schema03, self).tearDown()
        resource.setrlimit(resource.RLIMIT_NOFILE, self.origFileLimit)

    def gen_formats(self, rand, n, iskey):
        result = ''
        for i in range(0, n):
            if rand.rand_range(0, 2) == 0:
                result += 'S'
            else:
                result += 'i'
        return result

    def show_python(self, s):
        if self.SHOW_PYTHON:
            if self.SHOW_PYTHON_ONLY_TABLE == None or self.current_table in self.SHOW_PYTHON_ONLY_TABLE:
                if self.SHOW_PYTHON_ONLY_SCEN == None or self.scenario_number in self.SHOW_PYTHON_ONLY_SCEN:
                    print '        ' + s

    def join_names(self, sep, prefix, list):
        return sep.join([prefix + str(val) for val in list])

    def create(self, what, tablename, whatname, columnlist, extra_args=''):
        createarg = what + ":" + tablename + ":" + whatname
        colarg = self.join_names(',', 'c', columnlist)
        self.show_python("self.session.create('" + createarg + "', 'columns=(" + colarg + ")" + extra_args + "')")
        result = self.session.create(createarg,
                "columns=(" + colarg + ")" + extra_args)
        self.assertEqual(result, 0)

    def finished_step(self, name):
        if self.s_restart == name:
            print "  # Reopening connection at step: " + name
            self.reopen_conn()

    def test_schema(self):
        rand = suite_random.suite_random()
        if self.SHOW_PYTHON:
            print '  ################################################'
            print '  # Running scenario ' + str(self.scenario_number)

        ntables = self.s_ntable

        # Report known limitations in the test,
        # we'll work around these later, in a loop where we don't want to print.
        self.KNOWN_LIMITATION('Column groups created after indices confuses things')

        # Column groups are created in two different times.
        # We call these two batches 'createsets'.
        # So we don't have the exactly the same number of column groups
        # for each table, for tests that indicate >1 colgroup, we
        # increase the number of column groups for each table
        tabconfigs = []
        for i in range(0, ntables):
            self.current_table = i
            tc = tabconfig()
            tc.tablename = 't' + str(i)
            tc.tableidx = i
            tabconfigs.append(tc)

            for createset in range(0, 2):
                ncg = self.s_colgroup[createset]
                if ncg > 1:
                    ncg += i
                for k in range(0, ncg):
                    thiscg = cgconfig()
                    thiscg.createset = createset

                    # KNOWN LIMITATION: Column groups created after
                    # indices confuses things.  So for now, put all
                    # column group creation in the first set.
                    # Remove this statement when the limitation is fixed.
                    thiscg.createset = 0
                    # END KNOWN LIMITATION

                    thiscg.cgname = 'g' + str(len(tc.cglist))
                    tc.cglist.append(thiscg)

            # The same idea for indices, except that we create them in
            # three sets
            for createset in range(0, 3):
                nindex = self.s_index[createset]
                if nindex > 1:
                    nindex += i
                for k in range(0, nindex):
                    thisidx = idxconfig()
                    thisidx.createset = createset
                    thisidx.idxname = 'i' + str(len(tc.idxlist))
                    thisidx.tab = tc
                    tc.idxlist.append(thisidx)

            # We'll base the number of key/value columns
            # loosely on the number of column groups and indices.

            colgroups = len(tc.cglist)
            indices = len(tc.idxlist)
            nall = colgroups * 2 + indices
            k = rand.rand_range(1, nall)
            v = rand.rand_range(0, nall)
            # we need at least one value per column group
            if v < colgroups:
                v = colgroups
            tc.nkeys = k
            tc.nvalues = v
            tc.keyformats = self.gen_formats(rand, tc.nkeys, True)
            tc.valueformats = self.gen_formats(rand, tc.nvalues, False)

            # Simple naming (we'll test odd naming elsewhere):
            #  tables named 't0' --> 't<N>'
            #  within each table:
            #     columns named 'c0' --> 'c<N>'
            #     colgroups named 'g0' --> 'g<N>'
            #     indices named 'i0' --> 'i<N>'

            config = ""
            config += "key_format=" + tc.keyformats
            config += ",value_format=" + tc.valueformats
            config += ",columns=("
            for j in range(0, tc.nkeys + tc.nvalues):
                if j != 0:
                    config += ","
                config += "c" + str(j)
            config += "),colgroups=("
            for j in range(0, len(tc.cglist)):
                if j != 0:
                    config += ","
                config += "g" + str(j)
            config += ")"
            config += self.s_extra_table_args
            # indices are not declared here
            self.show_python("self.session.create('table:" + tc.tablename + "', '" + config + "')")
            self.session.create("table:" + tc.tablename, config)

            tc.columns_for_groups(range(tc.nkeys, tc.nkeys + tc.nvalues))
            tc.columns_for_indices(range(0, tc.nkeys + tc.nvalues))

        self.finished_step('table')

        for createset in (0, 1):
            # Create column groups in this set
            # e.g. self.session.create("colgroup:t0:g1", "columns=(c3,c4)")
            for tc in tabconfigs:
                self.current_table = tc.tableidx
                for cg in tc.cglist:
                    if cg.createset == createset:
                        self.create('colgroup', tc.tablename, cg.cgname, cg.columns)

            self.finished_step('colgroup' + str(createset))

            # Create indices in this set
            # e.g. self.session.create("index:t0:i1", "columns=(c3,c4)")
            for tc in tabconfigs:
                self.current_table = tc.tableidx
                for idx in tc.idxlist:
                    if idx.createset == createset:
                        self.create('index', tc.tablename, idx.idxname, idx.columns, self.s_index_args)

            self.finished_step('index' + str(createset))

        # populate first batch
        for tc in tabconfigs:
            self.current_table = tc.tableidx
            max = rand.rand_range(0, self.nentries)
            self.populate(tc, xrange(0, max))

        self.finished_step('populate0')

        # Create indices in third set
        for tc in tabconfigs:
            for idx in tc.idxlist:
                if idx.createset == 2:
                    self.create('index', tc.tablename, idx.idxname, idx.columns)

        self.finished_step('index2')

        # populate second batch
        for tc in tabconfigs:
            self.current_table = tc.tableidx
            self.populate(tc, xrange(tc.nentries, self.nentries))

        self.finished_step('populate1')

        for tc in tabconfigs:
            self.current_table = tc.tableidx
            self.check_entries(tc)

    def populate(self, tc, insertrange):
        self.show_python("cursor = self.session.open_cursor('table:" + tc.tablename + "', None, None)")
        cursor = self.session.open_cursor('table:' + tc.tablename, None, None)
        for i in insertrange:
            key = tc.gen_keys(i)
            val = tc.gen_values(i)
            self.show_python("cursor.set_key(*" + str(key) + ")")
            cursor.set_key(*key)
            self.show_python("cursor.set_value(*" + str(val) + ")")
            cursor.set_value(*val)
            self.show_python("cursor.insert()")
            cursor.insert()
            tc.nentries += 1
        self.show_python("cursor.close()")
        cursor.close()

    def check_one(self, name, cursor, key, val):
        keystr = str(key)
        valstr = str(val)
        self.show_python('# search[' + name + '](' + keystr + ')')
        self.show_python("cursor.set_key(*" + keystr + ")")
        cursor.set_key(*key)
        self.show_python("ok = cursor.search()")
        ok = cursor.search()
        self.show_python("self.assertEqual(ok, 0)")
        self.assertEqual(ok, 0)
        self.show_python("self.assertEqual(" + keystr + ", cursor.get_keys())")
        self.assertEqual(key, cursor.get_keys())
        self.show_python("self.assertEqual(" + valstr + ", cursor.get_values())")
        self.assertEqual(val, cursor.get_values())

    def check_entries(self, tc):
        """
        Verify entries in the primary and index table
        related to the tabconfig.
        """
        self.show_python('# check_entries: ' + tc.tablename)
        self.show_python("cursor = self.session.open_cursor('table:" + tc.tablename + "', None, None)")
        cursor = self.session.open_cursor('table:' + tc.tablename, None, None)
        count = 0
        for x in cursor:
            count += 1
        self.assertEqual(count, tc.nentries)
        for i in range(0, tc.nentries):
            key = tc.gen_keys(i)
            val = tc.gen_values(i)
            self.check_one(tc.tablename, cursor, key, val)
        cursor.close()
        self.show_python("cursor.close()")

        # for each index, check each entry
        for idx in tc.idxlist:
            # Although it's possible to open an index on some partial
            # list of columns, we'll keep it simple here, and always
            # use all columns.
            full_idxname = 'index:' + tc.tablename + ':' + idx.idxname
            cols = '(' + ','.join([('c' + str(x)) for x in range(tc.nkeys, tc.nvalues + tc.nkeys)]) + ')'
            self.show_python('# check_entries: ' + full_idxname + cols)
            self.show_python("cursor = self.session.open_cursor('" + full_idxname + cols + "', None, None)")
            cursor = self.session.open_cursor(full_idxname + cols, None, None)
            count = 0
            for x in cursor:
                count += 1
            self.assertEqual(count, tc.nentries)
            for i in range(0, tc.nentries):
                key = idx.gen_keys(i)
                val = tc.gen_values(i)
                self.check_one(full_idxname, cursor, key, val)
            cursor.close()
            self.show_python("cursor.close()")
if __name__ == '__main__':
    wttest.run()