Files
mongo/buildscripts/resmokelib/testing/hooks/shard_split.py

523 lines
22 KiB
Python

"""Test hook that runs shard splits continuously."""
import copy
import random
import threading
import time
import uuid
import bson
import pymongo.errors
from buildscripts.resmokelib import errors
from buildscripts.resmokelib.testing.fixtures import interface as fixture_interface
from buildscripts.resmokelib.testing.fixtures import shard_split
from buildscripts.resmokelib.testing.fixtures.replicaset import ReplicaSetFixture
from buildscripts.resmokelib.testing.hooks import interface
class ContinuousShardSplit(interface.Hook): # pylint: disable=too-many-instance-attributes
"""Starts a shard split thread at the beginning of each test."""
DESCRIPTION = ("Continuous shard split operations")
IS_BACKGROUND = True
AWAIT_REPL_TIMEOUT_MINS = ReplicaSetFixture.AWAIT_REPL_TIMEOUT_MINS
def __init__(self, hook_logger, fixture, shell_options):
"""Initialize the ContinuousShardSplit.
Args:
hook_logger: the logger instance for this hook.
fixture: the target ShardSplitFixture containing the donor replica set.
shell_options: contains the global_vars which contains TestData.tenantIds to be used for
shard splits.
"""
interface.Hook.__init__(self, hook_logger, fixture, ContinuousShardSplit.DESCRIPTION)
if not isinstance(fixture, shard_split.ShardSplitFixture):
raise ValueError("The ContinuousShardSplit hook requires a ShardSplitFixture")
self._shard_split_fixture = fixture
self._shell_options = copy.deepcopy(shell_options)
self._shard_split_thread = None
def before_suite(self, test_report):
"""Before suite."""
if not self._shard_split_fixture:
raise ValueError("No ShardSplitFixture to run shard splits on")
self.logger.info("Starting the shard split thread.")
self._shard_split_thread = _ShardSplitThread(self.logger, self._shard_split_fixture,
self._shell_options, test_report)
self._shard_split_thread.start()
def after_suite(self, test_report, teardown_flag=None):
"""After suite."""
self.logger.info("Stopping the shard split thread.")
self._shard_split_thread.stop()
self.logger.info("Stopped the shard split thread.")
def before_test(self, test, test_report):
"""Before test."""
self.logger.info("Resuming the shard split thread.")
self._shard_split_thread.resume(test)
def after_test(self, test, test_report):
"""After test."""
self.logger.info("Pausing the shard split thread.")
self._shard_split_thread.pause()
self.logger.info("Paused the shard split thread.")
class ShardSplitLifeCycle(object):
"""Class for managing the various states of the shard split thread.
The job thread alternates between calling mark_test_started() and mark_test_finished(). The
shard split thread is allowed to perform splits at any point between these two calls.
Note that the job thread synchronizes with the shard split thread outside the context of
this object to know it isn't in the process of running a split.
"""
_TEST_STARTED_STATE = "start"
_TEST_FINISHED_STATE = "finished"
def __init__(self):
"""Initialize the ShardSplitLifeCycle instance."""
self.__lock = threading.Lock()
self.__cond = threading.Condition(self.__lock)
self.test_num = 0
self.__test_state = self._TEST_FINISHED_STATE
self.__should_stop = False
def mark_test_started(self):
"""Signal to the shard split thread that a new test has started.
This function should be called during before_test(). Calling it causes the
wait_for_shard_split_permitted() function to no longer block and to instead return
true.
"""
with self.__lock:
self.test_num += 1
self.__test_state = self._TEST_STARTED_STATE
self.__cond.notify_all()
def mark_test_finished(self):
"""Signal to the shard split thread that the current test has finished.
This function should be called during after_test(). Calling it causes the
wait_for_shard_split_permitted() function to block until mark_test_started() is called
again.
"""
with self.__lock:
self.__test_state = self._TEST_FINISHED_STATE
self.__cond.notify_all()
def is_test_finished(self):
"""Return true if the current test has finished."""
with self.__lock:
return self.__test_state == self._TEST_FINISHED_STATE
def stop(self):
"""Signal to the shard split thread that it should exit.
This function should be called during after_suite(). Calling it causes the
wait_for_shard_split_permitted() function to no longer block and to instead return
false.
"""
with self.__lock:
self.__should_stop = True
self.__cond.notify_all()
def wait_for_shard_split_permitted(self):
"""Block until splits are permitted, or until stop() is called."""
with self.__lock:
while not self.__should_stop:
if self.__test_state == self._TEST_STARTED_STATE:
return True
self.__cond.wait()
return False
def wait_for_shard_split_interval(self, timeout):
"""Block for 'timeout' seconds, or until stop() is called."""
with self.__lock:
self.__cond.wait(timeout)
def poll_for_idle_request(self): # noqa: D205,D400
"""Return true if the shard split thread should continue running splits, or false
if it should temporarily stop running splits.
"""
with self.__lock:
return self.__test_state == self._TEST_FINISHED_STATE
class _ShardSplitOptions:
def __init__( # pylint: disable=too-many-arguments
self, logger, shard_split_fixture, tenant_ids, recipient_tag_name, recipient_set_name):
self.logger = logger
self.migration_id = uuid.uuid4()
self.shard_split_fixture = shard_split_fixture
self.tenant_ids = tenant_ids
self.recipient_tag_name = recipient_tag_name
self.recipient_set_name = recipient_set_name
def get_migration_id_as_binary(self):
"""Return the migration id as BSON Binary."""
return bson.Binary(self.migration_id.bytes, 4)
def get_donor_rs(self):
"""Return the current donor for the split fixture."""
return self.shard_split_fixture.get_donor_rs()
def get_donor_name(self):
"""Return the replica set name for the donor."""
return self.get_donor_rs().replset_name
def get_donor_primary(self):
"""Return a connection to the donor primary."""
return self.get_donor_rs().get_primary(timeout_secs=self.AWAIT_REPL_TIMEOUT_MINS)
def get_donor_nodes(self):
"""Return the nodes for the current shard split fixture donor."""
return self.get_donor_rs().nodes
def get_recipient_nodes(self):
"""Return the recipient nodes for the shard split fixture."""
return self.shard_split_fixture.get_recipient_nodes()
def __str__(self):
opts = {
"migration_id": self.migration_id, "tenant_ids": self.tenant_ids,
"donor": self.get_donor_name(), "recipientSetName": self.recipient_set_name,
"recipientTagName": self.recipient_tag_name
}
return str(opts)
class _ShardSplitThread(threading.Thread): # pylint: disable=too-many-instance-attributes
THREAD_NAME = "ShardSplitThread"
WAIT_SECS_RANGES = [[0.05, 0.1], [0.1, 0.5], [1, 5], [5, 15]]
POLL_INTERVAL_SECS = 0.1
NO_SUCH_MIGRATION_ERR_CODE = 327
INTERNAL_ERR_CODE = 1
def __init__(self, logger, shard_split_fixture, shell_options, test_report):
"""Initialize _ShardSplitThread."""
threading.Thread.__init__(self, name=self.THREAD_NAME)
self.daemon = True
self.logger = logger
self._shard_split_fixture = shard_split_fixture
self._tenant_ids = shell_options["global_vars"]["TestData"]["tenantIds"]
self._auth_options = shell_options["global_vars"]["TestData"]["authOptions"]
self._test = None
self._test_report = test_report
self._shell_options = shell_options
self.__lifecycle = ShardSplitLifeCycle()
# Event set when the thread has been stopped using the 'stop()' method.
self._is_stopped_evt = threading.Event()
# Event set when the thread is not performing shard splits.
self._is_idle_evt = threading.Event()
self._is_idle_evt.set()
def run(self):
"""Execute the thread."""
if not self._shard_split_fixture:
self.logger.warning("No ShardSplitFixture to run shard splits on.")
return
split_count = 0
try:
while True:
self._is_idle_evt.set()
permitted = self.__lifecycle.wait_for_shard_split_permitted()
if not permitted:
break
if split_count >= 3: # TODO(SERVER-66045): Remove this check and run unbounded splits
time.sleep(self.POLL_INTERVAL_SECS)
continue
self._is_idle_evt.clear()
split_opts = self._create_split_opts(split_count)
# Set up the donor for a split
self._shard_split_fixture.add_recipient_nodes(split_opts.recipient_set_name)
# Briefly wait to let the test run before starting the split operation, so that
# the first split is more likely to have data to migrate.
wait_secs = random.uniform(
*self.WAIT_SECS_RANGES[split_count % len(self.WAIT_SECS_RANGES)])
self.logger.info(f"Waiting for {wait_secs} seconds before starting split.")
self.__lifecycle.wait_for_shard_split_interval(wait_secs)
self.logger.info(f"Starting shard split: {str(split_opts)}.")
start_time = time.time()
is_committed = self._run_shard_split(split_opts)
end_time = time.time()
split_count += 1
self.logger.info(
f"Completed shard split {str(split_opts)} in {(end_time - start_time) * 1000} ms."
)
# set up the fixture for the next split operation
if is_committed:
self._shard_split_fixture.replace_donor_with_recipient(
split_opts.recipient_set_name)
else:
self._shard_split_fixture.remove_recipient_nodes()
found_idle_request = self.__lifecycle.poll_for_idle_request()
if found_idle_request:
continue
except Exception: # pylint: disable=W0703
# Proactively log the exception when it happens so it will be flushed immediately.
self.logger.exception("Shard split thread threw exception")
# The event should be signaled whenever the thread is not performing shard splits.
self._is_idle_evt.set()
def stop(self):
"""Stop the thread when the suite finishes."""
self.__lifecycle.stop()
self._is_stopped_evt.set()
# Unpause to allow the thread to finish.
self.resume(self._test)
self.join()
def pause(self):
"""Pause the thread after test."""
self.__lifecycle.mark_test_finished()
# Wait until we are no longer executing splits.
self._is_idle_evt.wait()
# Check if the thread is alive in case it has thrown an exception while running.
self._check_thread()
# Check that the fixture is still running.
if not self._shard_split_fixture.is_running():
raise errors.ServerFailure(
f"ShardSplitFixture with pids {self._shard_split_fixture.pids()} expected to be running in"
" ContinuousShardSplit, but wasn't.")
def resume(self, test):
"""Resume the thread before test."""
self._test = test
self.__lifecycle.mark_test_started()
def _wait(self, timeout):
"""Wait until stop or timeout."""
self._is_stopped_evt.wait(timeout)
def short_name(self):
"""Return the name of the thread."""
return self.THREAD_NAME
def _check_thread(self):
"""Throw an error if the thread is not running."""
if not self.is_alive():
msg = "Shard split thread is not running."
self.logger.error(msg)
raise errors.ServerFailure(msg)
def _is_fail_point_abort_reason(self, abort_reason):
return abort_reason["code"] == self.INTERNAL_ERR_CODE and abort_reason[
"errmsg"] == "simulate a shard split error"
def _create_split_opts(self, split_count):
recipient_set_name = f"rs{split_count+1}"
recipient_tag_name = "recipientNode"
return _ShardSplitOptions(self.logger, self._shard_split_fixture, self._tenant_ids,
recipient_tag_name, recipient_set_name)
def _create_client(self, node):
return fixture_interface.authenticate(node.mongo_client(), self._auth_options)
def _run_shard_split(self, split_opts): # noqa: D205,D400
try:
donor_client = self._create_client(split_opts.get_donor_rs())
res = self._commit_shard_split(donor_client, split_opts)
is_committed = res["state"] == "committed"
# Garbage collect the split prior to throwing error to avoid conflicting operations
# in the next test.
if is_committed:
# Wait for the donor/proxy to reroute at least one command before doing garbage
# collection. Stop waiting when the test finishes.
self._wait_for_reroute_or_test_completion(donor_client, split_opts)
self._forget_shard_split(donor_client, split_opts)
self._wait_for_garbage_collection(split_opts)
if not res["ok"]:
raise errors.ServerFailure(
f"Shard split '{split_opts.migration_id}' on replica set "
f"'{split_opts.get_donor_name()}' failed: {str(res)}")
if is_committed:
return True
abort_reason = res["abortReason"]
if self._is_fail_point_abort_reason(abort_reason):
self.logger.info(
f"Shard split '{split_opts.migration_id}' on replica set "
f"'{split_opts.get_donor_name()}' aborted due to failpoint: {str(res)}.")
return False
raise errors.ServerFailure(
f"Shard split '{str(split_opts.migration_id)}' with donor replica set "
f"'{split_opts.get_donor_name()}' aborted due to an error: {str(res)}")
except pymongo.errors.PyMongoError:
self.logger.exception(
f"Error running shard split '{split_opts.migration_id}' with donor primary on "
f"replica set '{split_opts.get_donor_name()}'.")
raise
def _commit_shard_split(self, donor_client, split_opts): # noqa: D205,D400
self.logger.info(f"Starting shard split '{split_opts.migration_id}' on replica set "
f"'{split_opts.get_donor_name()}'.")
while True:
try:
res = donor_client.admin.command({
"commitShardSplit": 1, "migrationId": split_opts.get_migration_id_as_binary(),
"tenantIds": split_opts.tenant_ids,
"recipientTagName": split_opts.recipient_tag_name, "recipientSetName":
split_opts.recipient_set_name
}, bson.codec_options.CodecOptions(uuid_representation=bson.binary.UUID_SUBTYPE))
if res["state"] == "committed":
self.logger.info(f"Shard split '{split_opts.migration_id}' on replica set "
f"'{split_opts.get_donor_name()}' has committed.")
return res
if res["state"] == "aborted":
self.logger.info(f"Shard split '{split_opts.migration_id}' on replica set "
f"'{split_opts.get_donor_name()}' has aborted: {str(res)}.")
return res
if not res["ok"]:
self.logger.info(f"Shard split '{split_opts.migration_id}' on replica set "
f"'{split_opts.get_donor_name()}' has failed: {str(res)}.")
return res
except pymongo.errors.ConnectionFailure:
self.logger.info(
f"Retrying shard split '{split_opts.migration_id}' against replica set "
f"'{split_opts.get_donor_name()}'.")
def _forget_shard_split(self, donor_client, split_opts):
self.logger.info(f"Forgetting shard split '{split_opts.migration_id}' on replica set "
f"'{split_opts.get_donor_name()}'.")
while True:
try:
donor_client.admin.command(
{"forgetShardSplit": 1, "migrationId": split_opts.get_migration_id_as_binary()},
bson.codec_options.CodecOptions(uuid_representation=bson.binary.UUID_SUBTYPE))
return
except pymongo.errors.ConnectionFailure:
self.logger.info(
f"Retrying forget shard split '{split_opts.migration_id}' against replica "
f"set '{split_opts.get_donor_name()}'.")
continue
except pymongo.errors.OperationFailure as err:
if err.code != self.NO_SUCH_MIGRATION_ERR_CODE:
raise
# The fixture was restarted.
self.logger.info(
f"Could not find shard split '{split_opts.migration_id}' on donor primary on "
f"replica set '{split_opts.get_donor_name()}': {str(err)}.")
return
except pymongo.errors.PyMongoError:
self.logger.exception(
f"Error forgetting shard split '{split_opts.migration_id}' on donor primary on "
f"replica set '{split_opts.get_donor_name()}'.")
raise
def _wait_for_garbage_collection(self, split_opts): # noqa: D205,D400
try:
donor_nodes = split_opts.get_donor_nodes()
for donor_node in donor_nodes:
self.logger.info(
f"Waiting for shard split '{split_opts.migration_id}' to be garbage collected on donor node on port {donor_node.port} of replica set '{split_opts.get_donor_name()}'."
)
donor_node_client = self._create_client(donor_node)
while True:
try:
res = donor_node_client.config.command({
"count": "tenantSplitDonors",
"query": {"tenantIds": split_opts.tenant_ids}
})
if res["n"] == 0:
break
except pymongo.errors.ConnectionFailure:
self.logger.info(
f"Retrying waiting for shard split '{split_opts.migration_id}' to be garbage collected on donor node on port {donor_node.port} of replica set '{split_opts.get_donor_name()}'."
)
continue
time.sleep(self.POLL_INTERVAL_SECS)
recipient_nodes = split_opts.get_recipient_nodes()
for recipient_node in recipient_nodes:
self.logger.info(
f"Waiting for shard split '{split_opts.migration_id}' to be garbage collected on recipient node on port {recipient_node.port} of replica set '{split_opts.recipient_set_name}'."
)
recipient_node_client = self._create_client(recipient_node)
while True:
try:
res = recipient_node_client.config.command({
"count": "tenantSplitDonors",
"query": {"tenantIds": split_opts.tenant_ids}
})
if res["n"] == 0:
break
except pymongo.errors.ConnectionFailure:
self.logger.info(
f"Retrying waiting for shard split '{split_opts.migration_id}' to be garbage collected on recipient node on port {recipient_node.port} of replica set '{split_opts.recipient_set_name}'."
)
continue
time.sleep(self.POLL_INTERVAL_SECS)
except pymongo.errors.PyMongoError:
self.logger.exception(
f"Error waiting for shard split '{split_opts.migration_id}' from donor replica set '{split_opts.get_donor_name()} to recipient replica set '{split_opts.recipient_set_name}' to be garbage collected."
)
raise
def _wait_for_reroute_or_test_completion(self, donor_client, split_opts):
start_time = time.time()
self.logger.info(
f"Waiting for donor primary of replica set '{split_opts.get_donor_name()}' for shard split '{split_opts.migration_id}' to reroute at least one conflicting command. Stop waiting when the test finishes."
)
while not self.__lifecycle.is_test_finished():
try:
# We are reusing the infrastructure originally developed for tenant migrations,
# and aren't concerned about conflicts because we don't expect the tenant migration
# and shard split hooks to run concurrently.
doc = donor_client["testTenantMigration"]["rerouted"].find_one(
{"_id": split_opts.get_migration_id_as_binary()})
if doc is not None:
return
except pymongo.errors.ConnectionFailure:
self.logger.info(
f"Retrying waiting for donor primary of replica set '{split_opts.get_donor_name()}' for shard split '{split_opts.migration_id}' to reroute at least one conflicting command."
)
continue
except pymongo.errors.PyMongoError:
end_time = time.time()
self.logger.exception(
f"Error running find command on donor primary replica set '{split_opts.get_donor_name()}' after waiting for reroute for {(end_time - start_time) * 1000} ms"
)
raise
time.sleep(self.POLL_INTERVAL_SECS)