diff --git a/buildscripts/resmokelib/config.py b/buildscripts/resmokelib/config.py index 8341a81e1b7..f4361ba2b52 100644 --- a/buildscripts/resmokelib/config.py +++ b/buildscripts/resmokelib/config.py @@ -104,6 +104,7 @@ DEFAULTS = { "linear_chain": None, "num_replset_nodes": None, "num_shards": None, + "export_mongod_config": "off", # Internal testing options. "internal_params": [], @@ -437,6 +438,9 @@ NUM_REPLSETS = None # Specifies the number of shards in a ShardedClusterFixture. NUM_SHARDS = None +# Specifies whether to export the history of mongod config options. +EXPORT_MONGOD_CONFIG = None + # If true, run ReplicaSetFixture with linear chaining. LINEAR_CHAIN = None diff --git a/buildscripts/resmokelib/configure_resmoke.py b/buildscripts/resmokelib/configure_resmoke.py index ac2757e9a22..08ffa913c59 100644 --- a/buildscripts/resmokelib/configure_resmoke.py +++ b/buildscripts/resmokelib/configure_resmoke.py @@ -221,6 +221,7 @@ def _update_config_vars(values): # pylint: disable=too-many-statements,too-many _config.SHELL_READ_MODE = config.pop("shell_read_mode") _config.SHELL_WRITE_MODE = config.pop("shell_write_mode") _config.SPAWN_USING = config.pop("spawn_using") + _config.EXPORT_MONGOD_CONFIG = config.pop("export_mongod_config") _config.STAGGER_JOBS = config.pop("stagger_jobs") == "on" _config.STORAGE_ENGINE = config.pop("storage_engine") _config.STORAGE_ENGINE_CACHE_SIZE = config.pop("storage_engine_cache_size_gb") diff --git a/buildscripts/resmokelib/core/programs.py b/buildscripts/resmokelib/core/programs.py index 05485f5bf65..d3763bdb791 100644 --- a/buildscripts/resmokelib/core/programs.py +++ b/buildscripts/resmokelib/core/programs.py @@ -12,6 +12,7 @@ from buildscripts.resmokelib import config from buildscripts.resmokelib import utils from buildscripts.resmokelib.core import jasper_process from buildscripts.resmokelib.core import process +from buildscripts.resmokelib.utils.history import make_historic, HistoryDict from buildscripts.resmokelib.logging import loggers from buildscripts.resmokelib.multiversionconstants import LAST_LTS_MONGOD_BINARY from buildscripts.resmokelib.multiversionconstants import LAST_LTS_MONGOS_BINARY @@ -24,34 +25,35 @@ from buildscripts.resmokelib.multiversionconstants import LAST_LTS_MONGOS_BINARY # The default verbosity setting for any tests that are not started with an Evergreen task id. This # will apply to any tests run locally. -DEFAULT_MONGOD_LOG_COMPONENT_VERBOSITY = { +DEFAULT_MONGOD_LOG_COMPONENT_VERBOSITY = make_historic({ "replication": {"rollback": 2}, "sharding": {"migration": 2}, "transaction": 4, "tenantMigration": 4 -} +}) -DEFAULT_LAST_LTS_MONGOD_LOG_COMPONENT_VERBOSITY = {"replication": {"rollback": 2}, "transaction": 4} +DEFAULT_LAST_LTS_MONGOD_LOG_COMPONENT_VERBOSITY = make_historic( + {"replication": {"rollback": 2}, "transaction": 4}) # The default verbosity setting for any mongod processes running in Evergreen i.e. started with an # Evergreen task id. -DEFAULT_EVERGREEN_MONGOD_LOG_COMPONENT_VERBOSITY = { +DEFAULT_EVERGREEN_MONGOD_LOG_COMPONENT_VERBOSITY = make_historic({ "replication": {"election": 4, "heartbeats": 2, "initialSync": 2, "rollback": 2}, "sharding": {"migration": 2}, "storage": {"recovery": 2}, "transaction": 4, "tenantMigration": 4 -} +}) # The default verbosity setting for any last-lts mongod processes running in Evergreen i.e. started # with an Evergreen task id. -DEFAULT_EVERGREEN_LAST_LTS_MONGOD_LOG_COMPONENT_VERBOSITY = { +DEFAULT_EVERGREEN_LAST_LTS_MONGOD_LOG_COMPONENT_VERBOSITY = make_historic({ "replication": {"election": 4, "heartbeats": 2, "initialSync": 2, "rollback": 2}, "storage": {"recovery": 2}, "transaction": 4 -} +}) # The default verbosity setting for any tests that are not started with an Evergreen task id. This # will apply to any tests run locally. -DEFAULT_MONGOS_LOG_COMPONENT_VERBOSITY = {"transaction": 3} +DEFAULT_MONGOS_LOG_COMPONENT_VERBOSITY = make_historic({"transaction": 3}) # The default verbosity setting for any tests running in Evergreen i.e. started with an Evergreen # task id. -DEFAULT_EVERGREEN_MONGOS_LOG_COMPONENT_VERBOSITY = {"transaction": 3} +DEFAULT_EVERGREEN_MONGOS_LOG_COMPONENT_VERBOSITY = make_historic({"transaction": 3}) def make_process(*args, **kwargs): @@ -119,18 +121,27 @@ def _add_testing_set_parameters(suite_set_parameters): def mongod_program( # pylint: disable=too-many-branches,too-many-statements - logger, job_num, executable=None, process_kwargs=None, **kwargs): - """Return a Process instance that starts mongod arguments constructed from 'kwargs'.""" + logger, job_num, executable=None, process_kwargs=None, mongod_options=None): + """ + Return a Process instance that starts mongod arguments constructed from 'mongod_options'. + + @param logger - The logger to pass into the process. + @param executable - The mongod executable to run. + @param process_kwargs - A dict of key-value pairs to pass to the process. + @param mongod_options - A HistoryDict describing the various options to pass to the mongod. + """ executable = utils.default_if_none(executable, config.DEFAULT_MONGOD_EXECUTABLE) + mongod_options = utils.default_if_none(mongod_options, make_historic({})).copy() args = [executable] # Apply the --setParameter command line argument. Command line options to resmoke.py override # the YAML configuration. - suite_set_parameters = kwargs.pop("set_parameters", {}) + # We leave the parameters attached for now so the top-level dict tracks its history. + suite_set_parameters = mongod_options.setdefault("set_parameters", make_historic({})) if config.MONGOD_SET_PARAMETERS is not None: - suite_set_parameters.update(utils.load_yaml(config.MONGOD_SET_PARAMETERS)) + suite_set_parameters.update(make_historic(utils.load_yaml(config.MONGOD_SET_PARAMETERS))) # Set default log verbosity levels if none were specified. if "logComponentVerbosity" not in suite_set_parameters: @@ -142,14 +153,14 @@ def mongod_program( # pylint: disable=too-many-branches,too-many-statements # issue split commands to create more chunks. As a result, the balancer will also end up moving # chunks for the sessions collection to balance the chunks across shards. Unless the suite is # explicitly prepared to handle these background migrations, set the parameter to 1. - if "configsvr" in kwargs and "minNumChunksForSessionsCollection" not in suite_set_parameters: + if "configsvr" in mongod_options and "minNumChunksForSessionsCollection" not in suite_set_parameters: suite_set_parameters["minNumChunksForSessionsCollection"] = 1 # orphanCleanupDelaySecs controls an artificial delay before cleaning up an orphaned chunk # that has migrated off of a shard, meant to allow most dependent queries on secondaries to # complete first. It defaults to 900, or 15 minutes, which is prohibitively long for tests. # Setting it in the .yml file overrides this. - if "shardsvr" in kwargs and "orphanCleanupDelaySecs" not in suite_set_parameters: + if "shardsvr" in mongod_options and "orphanCleanupDelaySecs" not in suite_set_parameters: suite_set_parameters["orphanCleanupDelaySecs"] = 1 # The LogicalSessionCache does automatic background refreshes in the server. This is @@ -198,14 +209,15 @@ def mongod_program( # pylint: disable=too-many-branches,too-many-statements # the potential to mask issues such as SERVER-31609 because it allows the operationTime of # cluster to advance even if the client is blocked for other reasons. We should disable the # periodic no-op writer. Set in the .yml file to override this. - if "replSet" in kwargs and "writePeriodicNoops" not in suite_set_parameters: + if "replSet" in mongod_options and "writePeriodicNoops" not in suite_set_parameters: suite_set_parameters["writePeriodicNoops"] = False # The default time for stepdown and quiesce mode in response to SIGTERM is 15 seconds. Reduce # this to 100ms for faster shutdown. On branches 4.4 and earlier, there is no quiesce mode, but # the default time for stepdown is 10 seconds. # TODO(SERVER-47797): Remove reference to waitForStepDownOnNonCommandShutdown. - if ("replSet" in kwargs and "waitForStepDownOnNonCommandShutdown" not in suite_set_parameters + if ("replSet" in mongod_options + and "waitForStepDownOnNonCommandShutdown" not in suite_set_parameters and "shutdownTimeoutMillisForSignaledShutdown" not in suite_set_parameters): if executable == LAST_LTS_MONGOD_BINARY: suite_set_parameters["waitForStepDownOnNonCommandShutdown"] = False @@ -217,9 +229,8 @@ def mongod_program( # pylint: disable=too-many-branches,too-many-statements if ("failpoint.flowControlTicketOverride" not in suite_set_parameters and config.FLOW_CONTROL_TICKETS is not None): - suite_set_parameters["failpoint.flowControlTicketOverride"] = { - "mode": "alwaysOn", "data": {"numTickets": config.FLOW_CONTROL_TICKETS} - } + suite_set_parameters["failpoint.flowControlTicketOverride"] = make_historic( + {"mode": "alwaysOn", "data": {"numTickets": config.FLOW_CONTROL_TICKETS}}) _add_testing_set_parameters(suite_set_parameters) @@ -246,13 +257,13 @@ def mongod_program( # pylint: disable=too-many-branches,too-many-statements opts_without_vals = ("nojournal", "logappend") # Have the --nojournal command line argument to resmoke.py unset the journal option. - if shortcut_opts["nojournal"] and "journal" in kwargs: - del kwargs["journal"] + if shortcut_opts["nojournal"] and "journal" in mongod_options: + del mongod_options["journal"] # Ensure that config servers run with journaling enabled. - if "configsvr" in kwargs: + if "configsvr" in mongod_options: shortcut_opts["nojournal"] = False - kwargs["journal"] = "" + mongod_options["journal"] = "" # Command line options override the YAML configuration. for opt_name in shortcut_opts: @@ -261,25 +272,32 @@ def mongod_program( # pylint: disable=too-many-branches,too-many-statements # Options that are specified as --flag on the command line are represented by a boolean # value where True indicates that the flag should be included in 'kwargs'. if opt_value: - kwargs[opt_name] = "" + mongod_options[opt_name] = "" else: # Options that are specified as --key=value on the command line are represented by a # value where None indicates that the key-value pair shouldn't be included in 'kwargs'. if opt_value is not None: - kwargs[opt_name] = opt_value + mongod_options[opt_name] = opt_value # Override the storage engine specified on the command line with "wiredTiger" if running a # config server replica set. - if "replSet" in kwargs and "configsvr" in kwargs: - kwargs["storageEngine"] = "wiredTiger" + if "replSet" in mongod_options and "configsvr" in mongod_options: + mongod_options["storageEngine"] = "wiredTiger" + + # set_parameters has its own logic above + mongod_options.pop("set_parameters") # Apply the rest of the command line arguments. - _apply_kwargs(args, kwargs) + _apply_kwargs(args, mongod_options) - _set_keyfile_permissions(kwargs) + _set_keyfile_permissions(mongod_options) - process_kwargs = utils.default_if_none(process_kwargs, {}) + process_kwargs = make_historic(utils.default_if_none(process_kwargs, {})) process_kwargs["job_num"] = job_num + if config.EXPORT_MONGOD_CONFIG == "regular": + mongod_options.dump_history(f"{logger.name}_config.yml") + elif config.EXPORT_MONGOD_CONFIG == "detailed": + mongod_options.dump_history(f"{logger.name}_config.yml", include_location=True) return make_process(logger, args, **process_kwargs) @@ -291,7 +309,7 @@ def mongos_program(logger, job_num, test_id=None, executable=None, process_kwarg # Apply the --setParameter command line argument. Command line options to resmoke.py override # the YAML configuration. - suite_set_parameters = kwargs.pop("set_parameters", {}) + suite_set_parameters = make_historic(kwargs.pop("set_parameters", {})) if config.MONGOS_SET_PARAMETERS is not None: suite_set_parameters.update(utils.load_yaml(config.MONGOS_SET_PARAMETERS)) @@ -309,7 +327,7 @@ def mongos_program(logger, job_num, test_id=None, executable=None, process_kwarg _set_keyfile_permissions(kwargs) - process_kwargs = utils.default_if_none(process_kwargs, {}) + process_kwargs = make_historic(utils.default_if_none(process_kwargs, {})) process_kwargs["job_num"] = job_num process_kwargs["test_id"] = test_id return make_process(logger, args, **process_kwargs) @@ -506,7 +524,7 @@ def _format_shell_vars(sb, paths, value): return lst[0] + ''.join(f'["{i}"]' for i in lst[1:]) # Only need to do special handling for JSON objects. - if not isinstance(value, dict): + if not isinstance(value, (dict, HistoryDict)): sb.append("%s = %s" % (bracketize(paths), json.dumps(value))) return diff --git a/buildscripts/resmokelib/run/__init__.py b/buildscripts/resmokelib/run/__init__.py index 444ee40b3f0..5b9df271f3b 100644 --- a/buildscripts/resmokelib/run/__init__.py +++ b/buildscripts/resmokelib/run/__init__.py @@ -896,6 +896,13 @@ class RunPlugin(PluginInterface): metavar="ON|OFF", help=("Enables or disables the stagger of launching resmoke jobs." " Defaults to %%default.")) + internal_options.add_argument( + "--exportMongodConfig", dest="export_mongod_config", choices=("off", "regular", + "detailed"), + help=("Exports a yaml containing the history of each mongod config option to" + " {nodeName}_config.yml." + " Defaults to 'off'. A 'detailed' export will include locations of accesses.")) + evergreen_options = parser.add_argument_group( title=_EVERGREEN_ARGUMENT_TITLE, description=( "Options used to propagate information about the Evergreen task running this" diff --git a/buildscripts/resmokelib/testing/fixtures/interface.py b/buildscripts/resmokelib/testing/fixtures/interface.py index ad6be804cd6..c1bcac60ebe 100644 --- a/buildscripts/resmokelib/testing/fixtures/interface.py +++ b/buildscripts/resmokelib/testing/fixtures/interface.py @@ -40,7 +40,7 @@ def make_fixture(class_name, *args, **kwargs): return _FIXTURES[class_name](*args, **kwargs) -class Fixture(object, metaclass=registry.make_registry_metaclass(_FIXTURES)): +class Fixture(object, metaclass=registry.make_registry_metaclass(_FIXTURES)): # pylint: disable=invalid-metaclass """Base class for all fixtures.""" # We explicitly set the 'REGISTERED_NAME' attribute so that PyLint realizes that the attribute diff --git a/buildscripts/resmokelib/testing/fixtures/replicaset.py b/buildscripts/resmokelib/testing/fixtures/replicaset.py index 4ae5d810772..516b4ba7ccd 100644 --- a/buildscripts/resmokelib/testing/fixtures/replicaset.py +++ b/buildscripts/resmokelib/testing/fixtures/replicaset.py @@ -11,6 +11,7 @@ from buildscripts.resmokelib import config from buildscripts.resmokelib import errors from buildscripts.resmokelib import logging from buildscripts.resmokelib import utils +from buildscripts.resmokelib.utils.history import make_historic from buildscripts.resmokelib.multiversionconstants import LAST_LTS_MONGOD_BINARY from buildscripts.resmokelib.testing.fixtures import interface from buildscripts.resmokelib.testing.fixtures import replicaset_utils @@ -40,12 +41,13 @@ class ReplicaSetFixture(interface.ReplFixture): # pylint: disable=too-many-inst interface.ReplFixture.__init__(self, logger, job_num, dbpath_prefix=dbpath_prefix) self.mongod_executable = mongod_executable - self.mongod_options = utils.default_if_none(mongod_options, {}) + self.mongod_options = make_historic(utils.default_if_none(mongod_options, {})) self.preserve_dbpath = preserve_dbpath self.start_initial_sync_node = start_initial_sync_node self.write_concern_majority_journal_default = write_concern_majority_journal_default self.auth_options = auth_options - self.replset_config_options = utils.default_if_none(replset_config_options, {}) + self.replset_config_options = make_historic( + utils.default_if_none(replset_config_options, {})) self.voting_secondaries = voting_secondaries self.all_nodes_electable = all_nodes_electable self.use_replica_set_connection_string = use_replica_set_connection_string @@ -92,12 +94,12 @@ class ReplicaSetFixture(interface.ReplFixture): # pylint: disable=too-many-inst self.use_replica_set_connection_string = self.all_nodes_electable if self.default_write_concern is True: - self.default_write_concern = { + self.default_write_concern = make_historic({ "w": "majority", # Use a "signature" value that won't typically match a value assigned in normal use. # This way the wtimeout set by this override is distinguishable in the server logs. "wtimeout": 5 * 60 * 1000 + 321, # 300321ms - } + }) # Set the default oplogSize to 511MB. self.mongod_options.setdefault("oplogSize", 511) @@ -132,10 +134,10 @@ class ReplicaSetFixture(interface.ReplFixture): # pylint: disable=too-many-inst self.nodes[i].mongod_options["set_parameters"][steady_state_constraint_param] = True if self.linear_chain and i > 0: self.nodes[i].mongod_options["set_parameters"][ - "failpoint.forceSyncSourceCandidate"] = { + "failpoint.forceSyncSourceCandidate"] = make_historic({ "mode": "alwaysOn", "data": {"hostAndPort": self.nodes[i - 1].get_internal_connection_string()} - } + }) self.nodes[i].setup() if self.start_initial_sync_node: diff --git a/buildscripts/resmokelib/testing/fixtures/shardedcluster.py b/buildscripts/resmokelib/testing/fixtures/shardedcluster.py index 0101786dd01..f60eee61471 100644 --- a/buildscripts/resmokelib/testing/fixtures/shardedcluster.py +++ b/buildscripts/resmokelib/testing/fixtures/shardedcluster.py @@ -11,6 +11,7 @@ from buildscripts.resmokelib import core from buildscripts.resmokelib import errors from buildscripts.resmokelib import logging from buildscripts.resmokelib import utils +from buildscripts.resmokelib.utils.history import make_historic from buildscripts.resmokelib.multiversionconstants import LAST_LTS_MONGOS_BINARY from buildscripts.resmokelib.testing.fixtures import interface from buildscripts.resmokelib.testing.fixtures import replicaset @@ -38,12 +39,13 @@ class ShardedClusterFixture(interface.Fixture): # pylint: disable=too-many-inst raise ValueError("Cannot specify mongod_options.dbpath") self.mongos_executable = mongos_executable - self.mongos_options = utils.default_if_none(mongos_options, {}) - self.mongod_options = utils.default_if_none(mongod_options, {}) + self.mongos_options = make_historic(utils.default_if_none(mongos_options, {})) + self.mongod_options = make_historic(utils.default_if_none(mongod_options, {})) self.mongod_executable = mongod_executable - self.mongod_options["set_parameters"] = mongod_options.get("set_parameters", {}).copy() + self.mongod_options["set_parameters"] = make_historic( + mongod_options.get("set_parameters", {})).copy() self.mongod_options["set_parameters"]["migrationLockAcquisitionMaxWaitMS"] = \ - mongod_options["set_parameters"].get("migrationLockAcquisitionMaxWaitMS", 30000) + self.mongod_options["set_parameters"].get("migrationLockAcquisitionMaxWaitMS", 30000) self.preserve_dbpath = preserve_dbpath # Use 'num_shards' and 'num_rs_nodes_per_shard' values from the command line if they exist. num_shards_option = config.NUM_SHARDS @@ -55,8 +57,8 @@ class ShardedClusterFixture(interface.Fixture): # pylint: disable=too-many-inst self.enable_balancer = enable_balancer self.enable_autosplit = enable_autosplit self.auth_options = auth_options - self.configsvr_options = utils.default_if_none(configsvr_options, {}) - self.shard_options = utils.default_if_none(shard_options, {}) + self.configsvr_options = make_historic(utils.default_if_none(configsvr_options, {})) + self.shard_options = make_historic(utils.default_if_none(shard_options, {})) self.mixed_bin_versions = utils.default_if_none(mixed_bin_versions, config.MIXED_BIN_VERSIONS) @@ -269,7 +271,7 @@ class ShardedClusterFixture(interface.Fixture): # pylint: disable=too-many-inst replset_config_options["configsvr"] = True mongod_options = self.mongod_options.copy() - mongod_options.update(configsvr_options.pop("mongod_options", {})) + mongod_options.update(make_historic(configsvr_options.pop("mongod_options", {}))) mongod_options["configsvr"] = "" mongod_options["dbpath"] = os.path.join(self._dbpath_prefix, "config") mongod_options["replSet"] = ShardedClusterFixture._CONFIGSVR_REPLSET_NAME @@ -294,7 +296,7 @@ class ShardedClusterFixture(interface.Fixture): # pylint: disable=too-many-inst auth_options = shard_options.pop("auth_options", self.auth_options) preserve_dbpath = shard_options.pop("preserve_dbpath", self.preserve_dbpath) - replset_config_options = shard_options.pop("replset_config_options", {}) + replset_config_options = make_historic(shard_options.pop("replset_config_options", {})) replset_config_options["configsvr"] = False mixed_bin_versions = self.mixed_bin_versions @@ -304,7 +306,7 @@ class ShardedClusterFixture(interface.Fixture): # pylint: disable=too-many-inst num_rs_nodes_per_shard] mongod_options = self.mongod_options.copy() - mongod_options.update(shard_options.pop("mongod_options", {})) + mongod_options.update(make_historic(shard_options.pop("mongod_options", {}))) mongod_options["shardsvr"] = "" mongod_options["dbpath"] = os.path.join(self._dbpath_prefix, "shard{}".format(index)) mongod_options["replSet"] = ShardedClusterFixture._SHARD_REPLSET_NAME_PREFIX + str(index) @@ -369,7 +371,7 @@ class _MongoSFixture(interface.Fixture): # Default to command line options if the YAML configuration is not passed in. self.mongos_executable = utils.default_if_none(mongos_executable, config.MONGOS_EXECUTABLE) - self.mongos_options = utils.default_if_none(mongos_options, {}).copy() + self.mongos_options = make_historic(utils.default_if_none(mongos_options, {})).copy() self.mongos = None self.port = None diff --git a/buildscripts/resmokelib/testing/fixtures/standalone.py b/buildscripts/resmokelib/testing/fixtures/standalone.py index e8252c1cb52..de56719e357 100644 --- a/buildscripts/resmokelib/testing/fixtures/standalone.py +++ b/buildscripts/resmokelib/testing/fixtures/standalone.py @@ -11,6 +11,7 @@ from buildscripts.resmokelib import config from buildscripts.resmokelib import core from buildscripts.resmokelib import errors from buildscripts.resmokelib import utils +from buildscripts.resmokelib.utils.history import make_historic from buildscripts.resmokelib.testing.fixtures import interface @@ -24,7 +25,7 @@ class MongoDFixture(interface.Fixture): preserve_dbpath=False): """Initialize MongoDFixture with different options for the mongod process.""" - self.mongod_options = utils.default_if_none(mongod_options, {}) + self.mongod_options = make_historic(utils.default_if_none(mongod_options, {})) interface.Fixture.__init__(self, logger, job_num, dbpath_prefix=dbpath_prefix) if "dbpath" in self.mongod_options and dbpath_prefix is not None: @@ -33,7 +34,7 @@ class MongoDFixture(interface.Fixture): # Default to command line options if the YAML configuration is not passed in. self.mongod_executable = utils.default_if_none(mongod_executable, config.MONGOD_EXECUTABLE) - self.mongod_options = utils.default_if_none(mongod_options, {}).copy() + self.mongod_options = make_historic(utils.default_if_none(mongod_options, {})).copy() # The dbpath in mongod_options takes precedence over other settings to make it easier for # users to specify a dbpath containing data to test against. @@ -66,8 +67,9 @@ class MongoDFixture(interface.Fixture): self.mongod_options["port"] = core.network.PortAllocator.next_fixture_port(self.job_num) self.port = self.mongod_options["port"] - mongod = core.programs.mongod_program( - self.logger, self.job_num, executable=self.mongod_executable, **self.mongod_options) + mongod = core.programs.mongod_program(self.logger, self.job_num, + executable=self.mongod_executable, + mongod_options=self.mongod_options) try: self.logger.info("Starting mongod on port %d...\n%s", self.port, mongod.as_command()) mongod.start() diff --git a/buildscripts/resmokelib/testing/hooks/interface.py b/buildscripts/resmokelib/testing/hooks/interface.py index a635f44f707..f9739dd2575 100644 --- a/buildscripts/resmokelib/testing/hooks/interface.py +++ b/buildscripts/resmokelib/testing/hooks/interface.py @@ -19,7 +19,7 @@ def make_hook(class_name, *args, **kwargs): return _HOOKS[class_name](*args, **kwargs) -class Hook(object, metaclass=registry.make_registry_metaclass(_HOOKS)): +class Hook(object, metaclass=registry.make_registry_metaclass(_HOOKS)): # pylint: disable=invalid-metaclass """Common interface all Hooks will inherit from.""" REGISTERED_NAME = registry.LEAVE_UNREGISTERED diff --git a/buildscripts/resmokelib/testing/testcases/interface.py b/buildscripts/resmokelib/testing/testcases/interface.py index 237cf0f4049..474ddb2ae01 100644 --- a/buildscripts/resmokelib/testing/testcases/interface.py +++ b/buildscripts/resmokelib/testing/testcases/interface.py @@ -21,7 +21,7 @@ def make_test_case(test_kind, *args, **kwargs): return _TEST_CASES[test_kind](*args, **kwargs) -class TestCase(unittest.TestCase, metaclass=registry.make_registry_metaclass(_TEST_CASES)): # pylint: disable=too-many-instance-attributes +class TestCase(unittest.TestCase, metaclass=registry.make_registry_metaclass(_TEST_CASES)): # pylint: disable=too-many-instance-attributes, invalid-metaclass """A test case to execute.""" REGISTERED_NAME = registry.LEAVE_UNREGISTERED diff --git a/buildscripts/resmokelib/utils/history.py b/buildscripts/resmokelib/utils/history.py new file mode 100644 index 00000000000..6b82f8d4dd0 --- /dev/null +++ b/buildscripts/resmokelib/utils/history.py @@ -0,0 +1,414 @@ +"""Contains classes and methods for tracking historic state.""" + +from abc import ABC, abstractmethod +from collections import defaultdict +from collections.abc import MutableMapping +from dataclasses import dataclass, field, asdict +from enum import Enum +import traceback +import typing +import copy +import yaml + +from buildscripts.resmokelib.utils import default_if_none, load_yaml, load_yaml_file +from buildscripts.resmokelib.utils import registry + +# How large of a stack to take for each location. +STACK_LIMIT = 3 + +_HISTORICS = {} # type: ignore + +SCHEMA_VERSION = "0.1" + + +def make_historic(obj): + """Convert a python object into a corresponding Historic.""" + if isinstance(obj, ALLOWED_TYPES): + return obj + obj_class = _HISTORICS[type(obj).__name__] + return obj_class.from_python_obj(obj) + + +def historic_from_stored_dict(stored_dict): + """Convert a dict for storage into a historic object.""" + obj_class = _HISTORICS[stored_dict["object_class"]] + return obj_class.from_storable_dict(stored_dict) + + +def storable_dict_from_historic(to_storable): + """Convert a historic into a dict that can be stored in history.""" + if isinstance(to_storable, Historic): + return to_storable.to_storable_dict() + else: + # If we're a primitive immutable + return to_storable + + +class Historic(ABC, metaclass=registry.make_registry_metaclass(_HISTORICS, type(ABC))): # pylint: disable=invalid-metaclass + """ABC for classes that have trackable historic state.""" + + def __init__(self): + """Initialize subscriber list.""" + self._subscribers = [] + + def subscribe(self, subscriber, key): + """ + Subscribe to the Historic object. + + The subscriber's accept_read / write is called on an update. + """ + if not isinstance(subscriber, Historic): + raise ValueError("Subscribers should inherit from the Historic ABC.") + + self._subscribers.append(Subscriber(obj=subscriber, key=key)) + + def unsubscribe(self, subscriber): + """Allow a subscriber to unsubscribe from notifications.""" + self._subscribers = [sub for sub in self._subscribers if sub.obj is not subscriber] + + def notify_subscriber_read(self): + """Notify the subscribers that a read has happened.""" + for subscriber in self._subscribers: + subscriber.obj.accept_read(subscriber.key) + + def notify_subscriber_write(self): + """Notify the subscribers that a write has happened.""" + for subscriber in self._subscribers: + subscriber.obj.accept_write(subscriber.key) + + @abstractmethod + def to_storable_dict(self): + """ + Convert this object to a dict that can be stored in yaml. + + Note that if a Historic stores history data itself, this is allowed to + be lost in the storage/retrieval of subordinate Historics. + """ + return + + @staticmethod + @abstractmethod + def from_storable_dict(raw_dict): + """Create a new object from the raw dict returned by to_storable_dict.""" + return + + @staticmethod + @abstractmethod + def from_python_obj(obj): + """ + Create a new Historic from the given python object. + + If inheriting from this in a class that wraps a python object, + include a REGISTERED_NAME string attribute for the type it converts from. + Otherwise, override this function to just return obj. + """ + return + + def accept_read(self, key): # pylint: disable=unused-argument + """ + Update state based on a subscriber's read. + + Override this method if a class also tracks historic state. + """ + self.notify_subscriber_read() + + def accept_write(self, key): # pylint: disable=unused-argument + """ + Update state based on a subscriber's write. + + Override this method if a class also tracks historic state. + """ + self.notify_subscriber_write() + + +@dataclass +class Subscriber: + """Class representing the subscriber to a Historic.""" + + obj: 'typing.Any' + key: 'typing.Any' + + +# We only allow immutable types or types that have special logic +# for being inside a HistoryDict, or else we may miss changes and +# have difficulty converting to yaml. +ALLOWED_TYPES = (bool, int, float, str, type(None), Historic) + + +class HistoryDict(MutableMapping, Historic): # pylint: disable=too-many-ancestors + """ + Dict-like class that tracks history. + + Smart stored classes can decide for themselves what + counts as an access. Don't assume thread safety. + Note that this class will deep-copy stored values. + """ + + REGISTERED_NAME = "dict" + + def __init__(self, filename=None, yaml_string=None, raw_dict=None): + """Init from a yaml file, from a yaml string, or default-construct.""" + + super(HistoryDict, self).__init__() + + if filename is not None and yaml_string is not None: + raise ValueError("Cannot construct HistoryDict from both yaml object and file.") + + self._history_store = defaultdict(list) + self._value_store = dict() + self._global_time = 0 + + raw_dict = default_if_none(raw_dict, {}) + if filename is not None: + raw_dict = load_yaml_file(filename) + elif yaml_string is not None: + raw_dict = load_yaml(yaml_string) + else: + return # Just default-construct. + + schema_version = raw_dict["SchemaVersion"] + if schema_version != SCHEMA_VERSION: + raise ValueError( + f"Invalid schema version. Expected {SCHEMA_VERSION} but found {schema_version}.") + history_dict = raw_dict["History"] + for key in history_dict: + for raw_access in history_dict[key]: + access = Access.from_dict(raw_access) + self._history_store[key].append(access) + self._global_time = max(access.time, self._global_time) + last_val = self._retrieve_last_value(key) + if last_val is not TOMBSTONE: + self._value_store[key] = last_val + + # The next recorded global time should be 1 higher than the last. + self._global_time += 1 + + def dump_history(self, filename=None, include_location=False): + """Dump the history to as yaml.""" + + # We can't safe_dump python objects, so we convert the whole store + # to yaml-able data. (We assume the stored contents are yaml-able.) + dumpable = dict() + for key in self._history_store: + dumpable[key] = list() + for access in self._history_store[key]: + access_dict = access.as_dict() + if include_location: + access_dict["location"] = PipeLiteral(access_dict["location"]) + else: + del access_dict["location"] + dumpable[key].append(access_dict) + + to_dump = {"History": dumpable} + dump = yaml.dump(to_dump) + + # Improves human readability for this data a ton. + processed = [] + for line in dump.splitlines(): + if not line.startswith((" -", " ")): + # If this line starts a top-level key. + processed.append("\n") + processed.append(line) + output = "\n".join(processed) + + # Make sure SchemaVersion is at the top. + output = f"SchemaVersion: \"{SCHEMA_VERSION}\"\n" + output + if filename is not None: + with open(filename, "w") as fp: + fp.write(output) + + return output + + def write_equals(self, other_dict): + """Compare two dicts for write equality.""" + if not len(other_dict._value_store) == len(self._value_store): # pylint: disable=protected-access + return False + + for key in self._value_store: + our_writes = [ + access.value_written for access in self._history_store[key] + if access.type == AccessType.WRITE + ] + their_writes = [ + access.value_written for access in other_dict._history_store[key] # pylint: disable=protected-access + if access.type == AccessType.WRITE + ] + if not our_writes == their_writes: + return False + return True + + def to_storable_dict(self): + """Convert to a dict for storage, overrides Historic.""" + storable_dict = {} + for key, value in self._value_store.items(): + storable_dict[key] = storable_dict_from_historic(value) + return {"object_class": HistoryDict.REGISTERED_NAME, "object_value": storable_dict} + + @staticmethod + def from_storable_dict(raw_dict): + """Convert from a dict for storage, overrides Historic.""" + return HistoryDict(raw_dict=raw_dict["object_value"]) + + @staticmethod + def from_python_obj(obj): + """Convert from a python object, overrides Historic.""" + if not isinstance(obj, dict): + raise ValueError("HistoryDict can only be converted from dict python objects.") + history_dict = HistoryDict() + for key, value in obj.items(): + history_dict[key] = make_historic(value) + return history_dict + + def accept_read(self, key): + """Record subscribee's read. Overrides Historic.""" + self._record_read(key) + super(HistoryDict, self).accept_read(key) + + def accept_write(self, key): + """Record subscribee's write. Overrides Historic.""" + self._record_write(key, self._value_store[key]) + super(HistoryDict, self).accept_write(key) + + def copy(self): + """ + Shallow-copy the value store, deep-copy history. + + Don't record reads/writes here. + """ + history_dict = HistoryDict() + history_dict._global_time = self._global_time # pylint: disable=protected-access + history_dict._history_store = copy.deepcopy(self._history_store) # pylint: disable=protected-access + for key, value in self.items(): + history_dict[key] = make_historic(value) + return history_dict + + def __getitem__(self, key): + self._record_read(key) + self.notify_subscriber_read() + + # We don't return a deep copy because we rely on objects to alert us + # when modified. + return self._value_store[key] + + def __setitem__(self, key, value): + if not isinstance(value, ALLOWED_TYPES): + raise ValueError(f"HistoryDict cannot store type {type(value)}." + " Please use a different type or create a Historic wrapper.") + + self._value_store[key] = value + self._record_write(key, value) + if isinstance(value, HistoryDict): + value.subscribe(self, key) + + self.notify_subscriber_write() + + def __delitem__(self, key): + self._record_delete(key) + if isinstance(self._value_store[key], Historic): + self._value_store[key].unsubscribe(self) + del self._value_store[key] + + self.notify_subscriber_write() + + def __iter__(self): + return iter(self._value_store) + + def __len__(self): + return len(self._value_store) + + def __str__(self): + # Dict's str doesn't recursively convert subordinate HistoryDicts + pairs = [] + for key, value in self._value_store.items(): + if isinstance(value, str): + pairs.append(f"'{key}': '{str(value)}'") + else: + pairs.append(f"'{key}': {str(value)}") + return "{" + ", ".join(pairs) + "}" + + def _record_read(self, key): + cur_access = Access(type=AccessType.READ, location=_get_location(), value_written=None, + time=self._global_time) + self._history_store[key].append(cur_access) + self._global_time += 1 + + def _record_write(self, key, value): + history_value = storable_dict_from_historic(value) + cur_access = Access(type=AccessType.WRITE, location=_get_location(), + value_written=copy.deepcopy(history_value), time=self._global_time) + self._history_store[key].append(cur_access) + self._global_time += 1 + + def _record_delete(self, key): + cur_access = Access(type=AccessType.DELETE, location=_get_location(), value_written=None, + time=self._global_time) + self._history_store[key].append(cur_access) + self._global_time += 1 + + def _retrieve_last_value(self, key): + for access in reversed(self._history_store[key]): + if access.type == AccessType.WRITE: + if isinstance(access.value_written, dict): + return historic_from_stored_dict(access.value_written) + else: + return copy.deepcopy(access.value_written) + elif access.type == AccessType.DELETE: + break + return TOMBSTONE + + +# Represents a value that was deleted (or was never created). +TOMBSTONE = object() + + +class AccessType(Enum): + """Class representing the operation performed in an accesss.""" + + READ = 0 + WRITE = 1 + DELETE = 2 + + +@dataclass +class Access: + """Class representing an access to store in the dict's history.""" + + type: 'AccessType' + time: int + location: ['traceback.FrameSummary'] = field(default_factory=list) + value_written: 'typing.Any' = None + + def as_dict(self): + """Convert this class into a dict (accounting for AccessType).""" + self_dict = asdict(self) + self_dict["type"] = self_dict["type"].name + self_dict["value_written"] = copy.deepcopy(self.value_written) + return self_dict + + @staticmethod + def from_dict(raw_dict): + """Retrieve this class from a dict (accounting for AccessType).""" + return Access( + type=AccessType[raw_dict["type"]], time=raw_dict["time"], + location=raw_dict["location"] if "location" in raw_dict else list(), + value_written=copy.deepcopy(raw_dict["value_written"]) + if "value_written" in raw_dict else None) + + +def _get_location(): + """Return the location as a string, accounting for this function and the parent in the stack.""" + return "".join(traceback.format_stack(limit=STACK_LIMIT + 2)[:-2]) + + +class PipeLiteral(str): + """Construct with a string to create a pipe literal for yaml representation.""" + + pass + + +def pipe_literal_representer(dumper, data): + """Create a representer for pipe literals, used internally for pyyaml.""" + return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|') + + +yaml.add_representer(PipeLiteral, pipe_literal_representer) diff --git a/buildscripts/resmokelib/utils/registry.py b/buildscripts/resmokelib/utils/registry.py index 4248b8c38b3..47b51b051c5 100644 --- a/buildscripts/resmokelib/utils/registry.py +++ b/buildscripts/resmokelib/utils/registry.py @@ -6,22 +6,26 @@ This pattern enables the associated class to be looked up later by using its name. """ +from buildscripts.resmokelib.utils import default_if_none + # Specifying 'LEAVE_UNREGISTERED' as the "REGISTERED_NAME" attribute will cause the class to be # omitted from the registry. This is particularly useful for base classes that define an interface # or common functionality, and aren't intended to be constructed explicitly. LEAVE_UNREGISTERED = object() -def make_registry_metaclass(registry_store): +def make_registry_metaclass(registry_store, base_metaclass=None): """Return a new Registry metaclass.""" if not isinstance(registry_store, dict): raise TypeError("'registry_store' argument must be a dict") - class Registry(type): + base_metaclass = default_if_none(base_metaclass, type) + + class Registry(base_metaclass): """A metaclass that stores a reference to all registered classes.""" - def __new__(mcs, class_name, base_classes, class_dict): # pylint: disable=bad-mcs-classmethod-argument + def __new__(mcs, class_name, base_classes, class_dict): # pylint: disable=bad-mcs-classmethod-argument,bad-classmethod-argument """Create and returns a new instance of Registry. The registry is a class named 'class_name' derived from 'base_classes' @@ -40,7 +44,7 @@ def make_registry_metaclass(registry_store): """ registered_name = class_dict.setdefault("REGISTERED_NAME", class_name) - cls = type.__new__(mcs, class_name, base_classes, class_dict) + cls = base_metaclass.__new__(mcs, class_name, base_classes, class_dict) if registered_name is not LEAVE_UNREGISTERED: if registered_name in registry_store: diff --git a/buildscripts/tests/resmokelib/utils/test_history.py b/buildscripts/tests/resmokelib/utils/test_history.py new file mode 100644 index 00000000000..2e4fc5f9910 --- /dev/null +++ b/buildscripts/tests/resmokelib/utils/test_history.py @@ -0,0 +1,246 @@ +"""Unit tests for buildscripts/resmokelib/utils/scheduler.py.""" + +import sched +import unittest + +from buildscripts.resmokelib.utils.history import HistoryDict, make_historic + +# pylint: disable=missing-docstring + + +class TestHistory(unittest.TestCase): + """Unit tests for the HistoryDict class.""" + + def test_acts_like_dict(self): + test_dict = HistoryDict() + self.assertRaises(KeyError, lambda: test_dict["nonexistent_key"]) + + test_dict["key1"] = "key1value1" + self.assertEqual(test_dict["key1"], "key1value1") + test_dict["key1"] = "key1value2" + self.assertEqual(test_dict["key1"], "key1value2") + del test_dict["key1"] + self.assertRaises(KeyError, lambda: test_dict["key1"]) + + def test_is_iterable(self): + test_dict = HistoryDict() + test_dict["key1"] = "key1val" + test_dict["key2"] = "key2val" + test_dict["key3"] = "key3val" + test_dict["key4"] = "key4val" + + expected_vals = ["key1val", "key2val", "key3val", "key4val"] + actual_vals = [] + for key in test_dict: + actual_vals.append(test_dict[key]) + + self.assertCountEqual(actual_vals, expected_vals) + + def test_inner_dict(self): + test_dict = HistoryDict() + inner_dict = HistoryDict() + inner_dict["foo"] = "bar" + test_dict["innerDict"] = inner_dict + test_dict["innerDict"]["foo"] = "za" + another_ref = test_dict["innerDict"] + another_ref["another_added"] = "another_val" + self.assertEqual(test_dict["innerDict"]["foo"], "za") + self.assertEqual(test_dict["innerDict"]["another_added"], "another_val") + + expected_test_dict = """SchemaVersion: "0.1" + + +History: + + + innerDict: + - time: 0 + type: WRITE + value_written: + object_class: dict + object_value: + foo: bar + - time: 1 + type: READ + value_written: null + - time: 2 + type: WRITE + value_written: + object_class: dict + object_value: + foo: za + - time: 3 + type: READ + value_written: null + - time: 4 + type: WRITE + value_written: + object_class: dict + object_value: + another_added: another_val + foo: za + - time: 5 + type: READ + value_written: null + - time: 6 + type: READ + value_written: null + - time: 7 + type: READ + value_written: null + - time: 8 + type: READ + value_written: null""" + + test_dict_dumped = test_dict.dump_history() + self.assertEqual(test_dict_dumped, expected_test_dict) + final_dict = HistoryDict(yaml_string=test_dict_dumped) + self.assertEqual(expected_test_dict, final_dict.dump_history()) + + def test_make_historic(self): + actual_dict = {"foo": "bar", "a": "b", "innerdict": {"innerkey": "innerval"}} + test_dict = make_historic(actual_dict) + test_dict["a"] = "c" + + # Updating actual_dict doesn't affect test_dict (it copied). + actual_dict["foo"] = "za" + # Similarly, updating the inner dict doesn't either. + actual_dict["innerdict"]["innerkey"] = "innerval2" + + # However, updating the inner dict on the test_dict does. + test_dict["innerdict"]["innerkey"] = "secondinnerval" + expected_test_dict = """SchemaVersion: "0.1" + + +History: + + + a: + - time: 1 + type: WRITE + value_written: b + - time: 3 + type: WRITE + value_written: c + + + foo: + - time: 0 + type: WRITE + value_written: bar + + + innerdict: + - time: 2 + type: WRITE + value_written: + object_class: dict + object_value: + innerkey: innerval + - time: 4 + type: READ + value_written: null + - time: 5 + type: WRITE + value_written: + object_class: dict + object_value: + innerkey: secondinnerval""" + + self.assertEqual(test_dict.dump_history(), expected_test_dict) + + def test_dump_and_load(self): + test_dict = HistoryDict() + + test_dict["key1"] = "key1value1" + test_dict["key1"] = "key1value2" + test_dict["key2"] = "key2value1" + del test_dict["key1"] + + # Testing with location would be flaky across machines since it + # uses absolute pathing. It's just for human convenience anyway. + expected_test_dict = """SchemaVersion: "0.1" + + +History: + + + key1: + - time: 0 + type: WRITE + value_written: key1value1 + - time: 1 + type: WRITE + value_written: key1value2 + - time: 3 + type: DELETE + value_written: null + + + key2: + - time: 2 + type: WRITE + value_written: key2value1""" + self.assertEqual(test_dict.dump_history(), expected_test_dict) + + test_dict["key2"] = "key2value2" + second_dict = HistoryDict(yaml_string=expected_test_dict) + + self.assertRaises(KeyError, lambda: second_dict["key1"]) + self.assertEqual(second_dict["key2"], "key2value1") + + # Include the reads / writes we just did. + expected_second_dict = """SchemaVersion: "0.1" + + +History: + + + key1: + - time: 0 + type: WRITE + value_written: key1value1 + - time: 1 + type: WRITE + value_written: key1value2 + - time: 3 + type: DELETE + value_written: null + - time: 4 + type: READ + value_written: null + + + key2: + - time: 2 + type: WRITE + value_written: key2value1 + - time: 5 + type: READ + value_written: null""" + + self.assertEqual(second_dict.dump_history(), expected_second_dict) + + def test_write_equality(self): + test_dict = HistoryDict() + test_dict["foo"] = "bar" + test_dict["myint"] = 1 + test_dict["foo"] = "za" + test_dict["innerdict"] = make_historic({"a": "b"}) + + second_dict = HistoryDict() + second_dict["foo"] = "bar" + second_dict["myint"] = 1 + second_dict["foo"] = "za" + second_dict["innerdict"] = make_historic({"a": "b"}) + + self.assertTrue(test_dict.write_equals(second_dict)) + + second_dict["another"] = "write" + self.assertFalse(test_dict.write_equals(second_dict)) + test_dict["another"] = "write" + self.assertTrue(test_dict.write_equals(second_dict)) + + # Reads aren't counted + gotten_value = second_dict["foo"] # pylint: disable=unused-variable + self.assertTrue(test_dict.write_equals(second_dict))