177 lines
7.8 KiB
Python
177 lines
7.8 KiB
Python
"""Test hook that runs tenant migrations continuously."""
|
|
|
|
import random
|
|
import threading
|
|
import time
|
|
import uuid
|
|
|
|
import bson
|
|
import pymongo.errors
|
|
|
|
from buildscripts.resmokelib import errors
|
|
from buildscripts.resmokelib import utils
|
|
from buildscripts.resmokelib.testing.fixtures import interface as fixture_interface
|
|
from buildscripts.resmokelib.testing.fixtures import replicaset
|
|
from buildscripts.resmokelib.testing.hooks import interface
|
|
|
|
|
|
class ContinuousTenantMigration(interface.Hook): # pylint: disable=too-many-instance-attributes
|
|
"""Starts a tenant migration thread at the beginning of each test."""
|
|
|
|
DESCRIPTION = ("Continuous tenant migrations")
|
|
|
|
def __init__(self, hook_logger, fixture, shell_options):
|
|
"""Initialize the ContinuousTenantMigration.
|
|
|
|
Args:
|
|
hook_logger: the logger instance for this hook.
|
|
fixture: the target replica set fixture.
|
|
shell_options: contains the global_vars which contains TestData.dbPrefix to be used for
|
|
tenant migrations.
|
|
|
|
"""
|
|
interface.Hook.__init__(self, hook_logger, fixture, ContinuousTenantMigration.DESCRIPTION)
|
|
|
|
self._fixture = fixture
|
|
self._db_prefix = shell_options["global_vars"]["TestData"]["dbPrefix"]
|
|
|
|
self._rs_fixtures = []
|
|
self._tenant_migration_thread = None
|
|
|
|
def before_suite(self, test_report):
|
|
"""Before suite."""
|
|
# TODO (SERVER-50496): Make the hook start the migration thread once here instead of inside
|
|
# before_test and make it run migrations continuously back and forth between the two replica
|
|
# sets.
|
|
if not self._rs_fixtures:
|
|
self._add_fixture(self._fixture)
|
|
|
|
def after_suite(self, test_report):
|
|
"""After suite."""
|
|
return
|
|
|
|
def before_test(self, test, test_report):
|
|
"""Before test."""
|
|
self.logger.info("Starting the migration thread.")
|
|
self._tenant_migration_thread = _TenantMigrationThread(self.logger, self._rs_fixtures,
|
|
self._db_prefix)
|
|
self._tenant_migration_thread.start()
|
|
|
|
def after_test(self, test, test_report):
|
|
"""After test."""
|
|
self.logger.info("Stopping the migration thread.")
|
|
self._tenant_migration_thread.stop()
|
|
self.logger.info("migration thread stopped.")
|
|
|
|
def _add_fixture(self, fixture):
|
|
if isinstance(fixture, replicaset.ReplicaSetFixture):
|
|
self._rs_fixtures.append(fixture)
|
|
|
|
|
|
class _TenantMigrationThread(threading.Thread): # pylint: disable=too-many-instance-attributes
|
|
MAX_SLEEP_SECONDS = 0.1
|
|
MAX_BLOCK_TIME_MS = 5 * 1000
|
|
TENANT_MIGRATION_ABORTED_ERROR_CODE = 325
|
|
|
|
def __init__(self, logger, rs_fixtures, db_prefix):
|
|
"""Initialize _TenantMigrationThread."""
|
|
threading.Thread.__init__(self, name="TenantMigrationThread")
|
|
self.daemon = True
|
|
self.logger = logger
|
|
self._rs_fixtures = rs_fixtures
|
|
self._db_prefix = db_prefix
|
|
|
|
self._last_exec = time.time()
|
|
|
|
def run(self):
|
|
"""Execute the thread."""
|
|
if not self._rs_fixtures:
|
|
self.logger.warning("No replica set on which to run migrations.")
|
|
return
|
|
|
|
try:
|
|
now = time.time()
|
|
self.logger.info("Starting a tenant migration for database prefix '%s'",
|
|
self._db_prefix)
|
|
self._run_migration(self._rs_fixtures[0])
|
|
self._last_exec = time.time()
|
|
self.logger.info("Completed a tenant migration in %0d ms",
|
|
(self._last_exec - now) * 1000)
|
|
except Exception: # pylint: disable=W0703
|
|
# Proactively log the exception when it happens so it will be
|
|
# flushed immediately.
|
|
self.logger.exception("Migration Thread threw exception")
|
|
|
|
def stop(self):
|
|
"""Stop the thread."""
|
|
self.join()
|
|
|
|
def _enable_abort(self, donor_primary_client, donor_primary_port, donor_primary_rs_name):
|
|
# Configure the failpoint to make the migration abort after the migration has been blocking
|
|
# reads and writes for a randomly generated number of seconds (< MAX_BLOCK_TIME_MS). Must
|
|
# be called with _disable_abort at the start and end of each test so that each test uses
|
|
# its own randomly generated block time.
|
|
try:
|
|
donor_primary_client.admin.command(
|
|
bson.SON([("configureFailPoint", "abortTenantMigrationAfterBlockingStarts"),
|
|
("mode", "alwaysOn"),
|
|
("data",
|
|
bson.SON([("blockTimeMS",
|
|
random.uniform(
|
|
0, _TenantMigrationThread.MAX_BLOCK_TIME_MS))]))]))
|
|
except pymongo.errors.OperationFailure as err:
|
|
self.logger.exception(
|
|
"Unable to enable the failpoint to make migrations abort on donor primary on port "
|
|
+ "%d of replica set '%s'.", donor_primary_port, donor_primary_rs_name)
|
|
raise errors.ServerFailure(
|
|
"Unable to enable the failpoint to make migrations abort on donor primary on port "
|
|
+ "{} of replica set '{}': {}".format(donor_primary_port, donor_primary_rs_name,
|
|
err.args[0]))
|
|
|
|
def _disable_abort(self, donor_primary_client, donor_primary_port, donor_primary_rs_name):
|
|
try:
|
|
donor_primary_client.admin.command(
|
|
bson.SON([("configureFailPoint", "abortTenantMigrationAfterBlockingStarts"),
|
|
("mode", "off")]))
|
|
except pymongo.errors.OperationFailure as err:
|
|
self.logger.exception(
|
|
"Unable to disable the failpoint to make migrations abort on donor primary on port "
|
|
+ "%d of replica set '%s'.", donor_primary_port, donor_primary_rs_name)
|
|
raise errors.ServerFailure(
|
|
"Unable to disable the failpoint to make migrations abort on donor primary on port "
|
|
+ "{} of replica set '{}': {}".format(donor_primary_port, donor_primary_rs_name,
|
|
err.args[0]))
|
|
|
|
def _run_migration(self, rs_fixture):
|
|
donor_primary = rs_fixture.get_primary()
|
|
donor_primary_client = donor_primary.mongo_client()
|
|
|
|
time.sleep(random.uniform(0, _TenantMigrationThread.MAX_SLEEP_SECONDS))
|
|
|
|
self.logger.info(
|
|
"Starting a tenant migration with donor primary on port %d of replica set '%s'.",
|
|
donor_primary.port, rs_fixture.replset_name)
|
|
|
|
try:
|
|
self._enable_abort(donor_primary_client, donor_primary.port, rs_fixture.replset_name)
|
|
|
|
donor_primary_client.admin.command({
|
|
"donorStartMigration": 1, "migrationId": bson.Binary(
|
|
uuid.uuid4().bytes, 4), "recipientConnectionString": "dummySet/dummyHost:1234",
|
|
"databasePrefix": self._db_prefix, "readPreference": {"mode": "primary"}
|
|
}, bson.codec_options.CodecOptions(uuid_representation=bson.binary.UUID_SUBTYPE))
|
|
except pymongo.errors.OperationFailure as err:
|
|
if err.code == _TenantMigrationThread.TENANT_MIGRATION_ABORTED_ERROR_CODE:
|
|
self.logger.exception(
|
|
"tenant migration with donor primary on port %d of replica set '%s' aborted.",
|
|
donor_primary.port, rs_fixture.replset_name)
|
|
return
|
|
raise
|
|
except pymongo.errors.PyMongoError:
|
|
self.logger.exception(
|
|
"Error running tenant migration with donor primary on port %d of replica set '%s'.",
|
|
donor_primary.port, rs_fixture.replset_name)
|
|
raise
|
|
finally:
|
|
self._disable_abort(donor_primary_client, donor_primary.port, rs_fixture.replset_name)
|