Files
mongo/buildscripts/resmokelib/testing/fixtures/shard_split.py

401 lines
19 KiB
Python

"""Fixture for testing shard split operations."""
import time
import os.path
import threading
import shutil
import pymongo
from bson.objectid import ObjectId
import buildscripts.resmokelib.testing.fixtures.interface as interface
from buildscripts.resmokelib.testing.fixtures.fixturelib import with_naive_retry
def _is_replica_set_fixture(fixture):
"""Determine whether the passed in fixture is a ReplicaSetFixture."""
return hasattr(fixture, 'replset_name')
def _teardown_and_clean_fixture(fixture):
"""Teardown the provided fixture, and remove its data directory."""
# ReplicaSetFixtures in the shard split fixture share the same logger as the parent
# ShardSplitFixture instance. We only want to explicitly close the loggers if we are tearing
# down single recipient nodes (before they become a replica set).
should_close_logger = not _is_replica_set_fixture(fixture)
fixture.teardown(finished=should_close_logger)
# Remove the data directory for the node to prevent unbounded disk space utilization.
shutil.rmtree(fixture.get_dbpath_prefix(), ignore_errors=False)
class ShardSplitFixture(interface.MultiClusterFixture):
"""Fixture which provides JSTests with a replica set and recipient nodes to run splits against."""
AWAIT_REPL_TIMEOUT_MINS = 5
AWAIT_REPL_TIMEOUT_FOREVER_MINS = 24 * 60
def __init__(
self,
logger,
job_num,
fixturelib,
common_mongod_options=None,
per_mongod_options=None,
dbpath_prefix=None,
preserve_dbpath=False,
num_nodes_per_replica_set=2,
auth_options=None,
replset_config_options=None,
mixed_bin_versions=None,
):
"""Initialize ShardSplitFixture with different options for the replica set processes."""
interface.MultiClusterFixture.__init__(self, logger, job_num, fixturelib,
dbpath_prefix=dbpath_prefix)
self.__lock = threading.Lock()
self.common_mongod_options = self.fixturelib.default_if_none(common_mongod_options, {})
self.per_mongod_options = self.fixturelib.default_if_none(per_mongod_options, {})
self.dbpath_prefix = dbpath_prefix
self.preserve_dbpath = preserve_dbpath
self.auth_options = auth_options
self.replset_config_options = self.fixturelib.default_if_none(replset_config_options, {})
self.mixed_bin_versions = self.fixturelib.default_if_none(mixed_bin_versions,
self.config.MIXED_BIN_VERSIONS)
self.num_nodes_per_replica_set = num_nodes_per_replica_set if num_nodes_per_replica_set \
else self.config.NUM_REPLSET_NODES
# The default shard split timeout (10 seconds) is not long enough for some test cases
# which have slow system performances may cause the shard split operation to be long.
if "set_parameters" not in self.common_mongod_options:
self.common_mongod_options["set_parameters"] = {}
if "shardSplitTimeoutMS" not in self.common_mongod_options["set_parameters"]:
self.common_mongod_options["set_parameters"]["shardSplitTimeoutMS"] = 60000
self.fixtures = []
self._can_teardown_retired_donor_rs = threading.Event()
# By default, we can always tear down the retired donor rs
self._can_teardown_retired_donor_rs.set()
# Make the initial donor replica set
donor_rs_name = "rs0"
mongod_options = self.common_mongod_options.copy()
mongod_options["dbpath"] = os.path.join(self._dbpath_prefix, donor_rs_name)
mongod_options["serverless"] = True
self.fixtures.append(
self.fixturelib.make_fixture(
"ReplicaSetFixture", self.logger, self.job_num, mongod_options=mongod_options,
preserve_dbpath=self.preserve_dbpath, num_nodes=self.num_nodes_per_replica_set,
auth_options=self.auth_options, replset_config_options=self.replset_config_options,
mixed_bin_versions=self.mixed_bin_versions, replicaset_logging_prefix=donor_rs_name,
all_nodes_electable=True, replset_name=donor_rs_name))
# Ensure that all nodes are only ever run on the same deterministic set of ports, this
# makes it easier to reroute in the jstest overrides
self._port_index = 0
self._ports = [[node.port for node in self.get_donor_rs().nodes],
[
self.fixturelib.get_next_port(self.job_num)
for _ in range(self.num_nodes_per_replica_set)
]]
def pids(self):
""":return: pids owned by this fixture if any."""
out = []
with self.__lock:
for fixture in self.fixtures:
out.extend(fixture.pids())
if not out:
self.logger.debug('No fixtures when gathering pids.')
return out
def setup(self):
"""Set up the replica sets."""
# Don't take the lock because we don't expect setup to be called while the
# ContinuousShardSplit hook is running, which is the only thing that can modify
# self.fixtures. We don't want to take the lock because it would be held while starting
# mongod instances, which is prone to hanging and could cause other functions which take
# the lock to hang.
for fixture in self.fixtures:
fixture.setup()
def await_ready(self):
"""Block until the fixture can be used for testing."""
# Don't take the lock because we don't expect await_ready to be called while the
# ContinuousShardSplit hook is running, which is the only thing that can modify
# self.fixtures. We don't want to take the lock because it would be held while waiting for
# the donor to initiate which may take a long time.
for fixture in self.fixtures:
fixture.await_ready()
def _do_teardown(self, mode=None):
"""Shut down the replica sets."""
self.logger.info("Stopping all replica sets...")
running_at_start = self.is_running()
if not running_at_start:
self.logger.warning("Donor replica set expected to be running, but wasn't.")
teardown_handler = interface.FixtureTeardownHandler(self.logger)
# Don't take the lock because we don't expect teardown to be called while the
# ContinuousShardSplit hook is running, which is the only thing that can modify
# self.fixtures. Tearing down may take a long time, so taking the lock during that process
# might result in hangs in other functions which need to take the lock.
for fixture in reversed(self.fixtures):
type_name = f"replica set '{fixture.replset_name}'" if _is_replica_set_fixture(
fixture) else f"standalone on port {fixture.port}"
teardown_handler.teardown(fixture, type_name, mode=mode)
if teardown_handler.was_successful():
self.logger.info("Successfully stopped donor replica set and all recipient nodes.")
else:
self.logger.error("Stopping the fixture failed.")
raise self.fixturelib.ServerFailure(teardown_handler.get_error_message())
def is_running(self):
"""Return true if all replica sets are still operating."""
# This method is most importantly used in between test runs in job.py to determine if a
# fixture has crashed between test invocations. We return the `is_running` status of the
# donor here, instead of all fixtures, some of which may not have been started yet.
return self.get_donor_rs().is_running()
def get_internal_connection_string(self):
"""Return the internal connection string to the replica set that currently starts out owning the data."""
donor_rs = self.get_donor_rs()
if not donor_rs:
raise ValueError("Must call setup() before calling get_internal_connection_string()")
return donor_rs.get_internal_connection_string()
def get_driver_connection_url(self):
"""Return the driver connection URL to the replica set that currently starts out owning the data."""
donor_rs = self.get_donor_rs()
if not donor_rs:
raise ValueError("Must call setup() before calling get_driver_connection_url")
return donor_rs.get_driver_connection_url()
def get_node_info(self):
"""Return a list of dicts of NodeInfo objects."""
output = []
with self.__lock:
for fixture in self.fixtures:
output += fixture.get_node_info()
return output
def get_independent_clusters(self):
"""Return the replica sets involved in the tenant migration."""
with self.__lock:
return self.fixtures.copy()
def get_donor_rs(self):
""":return the donor replica set."""
with self.__lock:
donor_rs = next(iter(self.fixtures), None)
if donor_rs and not _is_replica_set_fixture(donor_rs):
raise ValueError("Invalid configuration, donor_rs is not a ReplicaSetFixture")
return donor_rs
def get_recipient_nodes(self):
""":return the recipient nodes for the current split operation."""
with self.__lock:
return self.fixtures[1:]
def _create_client(self, fixture, **kwargs):
return fixture.mongo_client(username=self.auth_options["username"],
password=self.auth_options["password"],
authSource=self.auth_options["authenticationDatabase"],
authMechanism=self.auth_options["authenticationMechanism"],
uuidRepresentation='standard', **kwargs)
def add_recipient_nodes(self, recipient_set_name, recipient_tag_name=None):
"""Build recipient nodes, and reconfig them into the donor as non-voting members."""
recipient_tag_name = recipient_tag_name or "recipientNode"
donor_rs_name = self.get_donor_rs().replset_name
self.logger.info(
f"Adding {self.num_nodes_per_replica_set} recipient nodes to donor replica set '{donor_rs_name}'."
)
with self.__lock:
self._port_index ^= 1 # Toggle the set of mongod ports between index 0 and 1
for i in range(self.num_nodes_per_replica_set):
mongod_logger = self.fixturelib.new_fixture_node_logger(
"MongoDFixture", self.job_num, f"{recipient_set_name}:node{i}")
mongod_options = self.common_mongod_options.copy()
# Even though these nodes are not starting in a replica set, we structure their
# files on disk as if they were already part of the new recipient set. This makes
# logging and cleanup easier.
mongod_options["dbpath"] = os.path.join(self._dbpath_prefix, recipient_set_name,
"node{}".format(i))
mongod_options["set_parameters"] = mongod_options.get(
"set_parameters", self.fixturelib.make_historic({})).copy()
mongod_options["serverless"] = True
mongod_port = self._ports[self._port_index][i]
self.fixtures.append(
self.fixturelib.make_fixture(
"MongoDFixture", mongod_logger, self.job_num, mongod_options=mongod_options,
dbpath_prefix=self.dbpath_prefix, preserve_dbpath=self.preserve_dbpath,
port=mongod_port))
recipient_nodes = self.get_recipient_nodes()
for recipient_node in recipient_nodes:
recipient_node.setup()
recipient_node.await_ready()
# Reconfig the donor to add the recipient nodes as non-voting members
donor_client = self._create_client(self.get_donor_rs())
repl_config = with_naive_retry(lambda: donor_client.admin.command({"replSetGetConfig": 1})[
"config"])
repl_members = repl_config["members"]
for recipient_node in recipient_nodes:
# It is possible for the reconfig below to fail with a retryable error code like
# 'InterruptedDueToReplStateChange'. In these cases, we need to run the reconfig
# again, but some or all of the recipient nodes might have already been added to
# the member list. Only add recipient nodes which have not yet been added on a
# retry.
recipient_host = recipient_node.get_internal_connection_string()
recipient_entry = {
"host": recipient_host, "votes": 0, "priority": 0, "hidden": True,
"tags": {recipient_tag_name: str(ObjectId())}
}
member_exists = False
for index, member in enumerate(repl_members):
if member["host"] == recipient_host:
repl_members[index] = recipient_entry
member_exists = True
if not member_exists:
repl_members.append(recipient_entry)
# Re-index all members from 0
for idx, member in enumerate(repl_members):
member["_id"] = idx
# Prepare the new config
repl_config["version"] = repl_config["version"] + 1
repl_config["members"] = repl_members
self.logger.info(
f"Reconfiguring donor replica set to add non-voting recipient nodes: {repl_config}")
with_naive_retry(lambda: donor_client.admin.command({
"replSetReconfig": repl_config, "maxTimeMS": self.AWAIT_REPL_TIMEOUT_MINS * 60 * 1000
}))
# Wait for recipient nodes to become secondaries
self._await_recipient_nodes()
def _await_recipient_nodes(self, timeout_secs=None):
"""Wait for recipient nodes to become available."""
if timeout_secs is None:
timeout_secs = self.AWAIT_REPL_TIMEOUT_MINS * 60
start = time.time()
recipient_nodes = self.get_recipient_nodes()
for recipient_node in recipient_nodes:
recipient_client = self._create_client(recipient_node,
read_preference=pymongo.ReadPreference.SECONDARY)
while True:
now = time.time()
if (now - start) >= timeout_secs:
msg = f"Timed out while waiting for secondary on port {recipient_node.port} to become available."
self.logger.error(msg)
raise self.fixturelib.ServerFailure(msg)
self.logger.info(
f"Waiting for secondary on port {recipient_node.port} to become available.")
try:
is_secondary = recipient_client.admin.command("isMaster")["secondary"]
if is_secondary:
break
except pymongo.errors.OperationFailure as err:
if err.code != ShardSplitFixture._INTERRUPTED_DUE_TO_STORAGE_CHANGE:
raise
time.sleep(0.1) # Wait a little bit before trying again.
self.logger.info(f"Secondary on port {recipient_node.port} is now available.")
def remove_recipient_nodes(self, recipient_tag_name=None):
"""Remove recipient nodes from the donor."""
recipient_tag_name = recipient_tag_name or "recipientNode"
donor_rs_name = self.get_donor_rs().replset_name
recipient_nodes = self.get_recipient_nodes()
with self.__lock:
# Reset the port-set, so we select the same ports next time.
self._port_index ^= 1
# Remove the recipient nodes from the internal fixture list.
donor_rs = next(iter(self.fixtures), None)
if donor_rs and not _is_replica_set_fixture(donor_rs):
raise ValueError("Invalid configuration, donor_rs is not a ReplicaSetFixture")
self.fixtures = [donor_rs]
donor_client = self._create_client(self.get_donor_rs())
repl_config = with_naive_retry(lambda: donor_client.admin.command({"replSetGetConfig": 1})[
"config"])
repl_members = [
member for member in repl_config["members"]
if not 'tags' in member or not recipient_tag_name in member["tags"]
]
# Re-index all members from 0
for idx, member in enumerate(repl_members):
member["_id"] = idx
# Prepare the new config
repl_config["version"] = repl_config["version"] + 1
repl_config["members"] = repl_members
# It's possible that the recipient config has been removed in a previous remove attempt.
if "recipientConfig" in repl_config:
del repl_config["recipientConfig"]
self.logger.info(
f"Reconfiguring donor '{donor_rs_name}' to remove recipient nodes: {repl_config}")
with_naive_retry(lambda: donor_client.admin.command({
"replSetReconfig": repl_config, "maxTimeMS": self.AWAIT_REPL_TIMEOUT_MINS * 60 * 1000
}))
self.logger.info("Tearing down recipient nodes and removing data directories.")
for recipient_node in reversed(recipient_nodes):
_teardown_and_clean_fixture(recipient_node)
def replace_donor_with_recipient(self, recipient_set_name):
"""Replace the current donor with the newly initiated recipient."""
self.logger.info(
f"Making new donor replica set '{recipient_set_name}' from existing recipient nodes.")
mongod_options = self.common_mongod_options.copy()
mongod_options["dbpath"] = os.path.join(self._dbpath_prefix, recipient_set_name)
mongod_options["serverless"] = True
new_donor_rs = self.fixturelib.make_fixture(
"ReplicaSetFixture", self.logger, self.job_num, mongod_options=mongod_options,
preserve_dbpath=self.preserve_dbpath, num_nodes=self.num_nodes_per_replica_set,
auth_options=self.auth_options, replset_config_options=self.replset_config_options,
mixed_bin_versions=self.mixed_bin_versions,
replicaset_logging_prefix=recipient_set_name, all_nodes_electable=True,
replset_name=recipient_set_name, existing_nodes=self.get_recipient_nodes())
new_donor_rs.get_primary() # Await an election of a new donor primary
self.logger.info("Replacing internal fixtures with new donor replica set.")
retired_donor_rs = self.get_donor_rs()
with self.__lock:
self.fixtures = [new_donor_rs]
self._can_teardown_retired_donor_rs.wait()
self.logger.info(f"Retiring old donor replica set '{retired_donor_rs.replset_name}'.")
_teardown_and_clean_fixture(retired_donor_rs)
def enter_step_down(self):
"""Called by the ContinuousStepDown hook to indicate that we are stepping down."""
self.logger.info("Entering stepdown, preventing donor from being retired.")
self._can_teardown_retired_donor_rs.clear()
def exit_step_down(self):
"""Called by the ContinuousStepDown hook to indicate that we are done stepping down."""
self.logger.info("Exiting stepdown, donor can now be retired.")
self._can_teardown_retired_donor_rs.set()