Files
mongo/buildscripts/resmokelib/testing/hooks/initialsync.py
Max Hirschhorn b8f6239094 SERVER-29560 Split hooks.py module into separate files.
Also defines a generic Registry metaclass for making class declarations
store a reference to the class along with a name so that they can be
looked up later.
2017-06-14 20:44:52 -04:00

180 lines
7.6 KiB
Python

"""
Testing hook for verifying correctness of initial sync.
"""
from __future__ import absolute_import
import os.path
import random
import bson
import pymongo
import pymongo.errors
from . import cleanup
from . import jsfile
from ... import errors
from ... import utils
class BackgroundInitialSync(jsfile.JsCustomBehavior):
"""
After every test, this hook checks if a background node has finished initial sync and if so,
validates it, tears it down, and restarts it.
This test accepts a parameter 'n' that specifies a number of tests after which it will wait for
replication to finish before validating and restarting the initial sync node. It also accepts
a parameter 'use_resync' for whether to restart the initial sync node with resync or by
shutting it down and restarting it.
This requires the ReplicaSetFixture to be started with 'start_initial_sync_node=True'. If used
at the same time as CleanEveryN, the 'n' value passed to this hook should be equal to the 'n'
value for CleanEveryN.
"""
DEFAULT_N = cleanup.CleanEveryN.DEFAULT_N
def __init__(self, hook_logger, fixture, use_resync=False, n=DEFAULT_N, shell_options=None):
description = "Background Initial Sync"
js_filename = os.path.join("jstests", "hooks", "run_initial_sync_node_validation.js")
jsfile.JsCustomBehavior.__init__(self, hook_logger, fixture, js_filename,
description, shell_options)
self.use_resync = use_resync
self.n = n
self.tests_run = 0
self.random_restarts = 0
# Restarts initial sync by shutting down the node, clearing its data, and restarting it,
# or by calling resync if use_resync is specified.
def __restart_init_sync(self, test_report, sync_node, sync_node_conn):
if self.use_resync:
self.hook_test_case.logger.info("Calling resync on initial sync node...")
cmd = bson.SON([("resync", 1), ("wait", 0)])
sync_node_conn.admin.command(cmd)
else:
# Tear down and restart the initial sync node to start initial sync again.
if not sync_node.teardown():
raise errors.ServerFailure("%s did not exit cleanly" % (sync_node))
self.hook_test_case.logger.info("Starting the initial sync node back up again...")
sync_node.setup()
sync_node.await_ready()
def _after_test_impl(self, test, test_report, description):
self.tests_run += 1
sync_node = self.fixture.get_initial_sync_node()
sync_node_conn = utils.new_mongo_client(port=sync_node.port)
# If it's been 'n' tests so far, wait for the initial sync node to finish syncing.
if self.tests_run >= self.n:
self.hook_test_case.logger.info(
"%d tests have been run against the fixture, waiting for initial sync"
" node to go into SECONDARY state",
self.tests_run)
self.tests_run = 0
cmd = bson.SON([("replSetTest", 1),
("waitForMemberState", 2),
("timeoutMillis", 20 * 60 * 1000)])
sync_node_conn.admin.command(cmd)
# Check if the initial sync node is in SECONDARY state. If it's been 'n' tests, then it
# should have waited to be in SECONDARY state and the test should be marked as a failure.
# Otherwise, we just skip the hook and will check again after the next test.
try:
state = sync_node_conn.admin.command("replSetGetStatus").get("myState")
if state != 2:
if self.tests_run == 0:
msg = "Initial sync node did not catch up after waiting 20 minutes"
self.hook_test_case.logger.exception("{0} failed: {1}".format(description, msg))
raise errors.TestFailure(msg)
self.hook_test_case.logger.info(
"Initial sync node is in state %d, not state SECONDARY (2)."
" Skipping BackgroundInitialSync hook for %s",
state,
test.short_name())
# If we have not restarted initial sync since the last time we ran the data
# validation, restart initial sync with a 20% probability.
if self.random_restarts < 1 and random.random() < 0.2:
hook_type = "resync" if self.use_resync else "initial sync"
self.hook_test_case.logger.info("randomly restarting " + hook_type +
" in the middle of " + hook_type)
self.__restart_init_sync(test_report, sync_node, sync_node_conn)
self.random_restarts += 1
return
except pymongo.errors.OperationFailure:
# replSetGetStatus can fail if the node is in STARTUP state. The node will soon go into
# STARTUP2 state and replSetGetStatus will succeed after the next test.
self.hook_test_case.logger.info(
"replSetGetStatus call failed in BackgroundInitialSync hook, skipping hook for %s",
test.short_name())
return
self.random_restarts = 0
# Run data validation and dbhash checking.
self.hook_test_case.run_test()
self.__restart_init_sync(test_report, sync_node, sync_node_conn)
class IntermediateInitialSync(jsfile.JsCustomBehavior):
"""
This hook accepts a parameter 'n' that specifies a number of tests after which it will start up
a node to initial sync, wait for replication to finish, and then validate the data. It also
accepts a parameter 'use_resync' for whether to restart the initial sync node with resync or by
shutting it down and restarting it.
This requires the ReplicaSetFixture to be started with 'start_initial_sync_node=True'.
"""
DEFAULT_N = cleanup.CleanEveryN.DEFAULT_N
def __init__(self, hook_logger, fixture, use_resync=False, n=DEFAULT_N):
description = "Intermediate Initial Sync"
js_filename = os.path.join("jstests", "hooks", "run_initial_sync_node_validation.js")
jsfile.JsCustomBehavior.__init__(self, hook_logger, fixture, js_filename, description)
self.use_resync = use_resync
self.n = n
self.tests_run = 0
def _should_run_after_test_impl(self):
self.tests_run += 1
# If we have not run 'n' tests yet, skip this hook.
if self.tests_run < self.n:
return False
self.tests_run = 0
return True
def _after_test_impl(self, test, test_report, description):
sync_node = self.fixture.get_initial_sync_node()
sync_node_conn = utils.new_mongo_client(port=sync_node.port)
if self.use_resync:
self.hook_test_case.logger.info("Calling resync on initial sync node...")
cmd = bson.SON([("resync", 1)])
sync_node_conn.admin.command(cmd)
else:
if not sync_node.teardown():
raise errors.ServerFailure("%s did not exit cleanly" % (sync_node))
self.hook_test_case.logger.info("Starting the initial sync node back up again...")
sync_node.setup()
sync_node.await_ready()
# Do initial sync round.
self.hook_test_case.logger.info("Waiting for initial sync node to go into SECONDARY state")
cmd = bson.SON([("replSetTest", 1),
("waitForMemberState", 2),
("timeoutMillis", 20 * 60 * 1000)])
sync_node_conn.admin.command(cmd)
# Run data validation and dbhash checking.
self.hook_test_case.run_test()