""" Customize the behavior of a fixture by allowing special code to be executed before or after each test, and before or after each suite. """ from __future__ import absolute_import import os import sys import bson import pymongo from . import fixtures from . import testcases from .. import errors from .. import logging from .. import utils def make_custom_behavior(class_name, *args, **kwargs): """ Factory function for creating CustomBehavior instances. """ if class_name not in _CUSTOM_BEHAVIORS: raise ValueError("Unknown custom behavior class '%s'" % (class_name)) return _CUSTOM_BEHAVIORS[class_name](*args, **kwargs) class CustomBehavior(object): """ The common interface all CustomBehaviors will inherit from. """ @staticmethod def start_dynamic_test(test_case, test_report): """ If a CustomBehavior wants to add a test case that will show up in the test report, it should use this method to add it to the report, since we will need to count it as a dynamic test to get the stats in the summary information right. """ test_report.startTest(test_case, dynamic=True) def __init__(self, logger, fixture): """ Initializes the CustomBehavior with the specified fixture. """ if not isinstance(logger, logging.Logger): raise TypeError("logger must be a Logger instance") self.logger = logger self.fixture = fixture def before_suite(self, test_report): """ The test runner calls this exactly once before they start running the suite. """ pass def after_suite(self, test_report): """ The test runner calls this exactly once after all tests have finished executing. Be sure to reset the behavior back to its original state so that it can be run again. """ pass def before_test(self, test_report): """ Each test will call this before it executes. Raises a TestFailure if the test should be marked as a failure, or a ServerFailure if the fixture exits uncleanly or unexpectedly. """ pass def after_test(self, test_report): """ Each test will call this after it executes. Raises a TestFailure if the test should be marked as a failure, or a ServerFailure if the fixture exits uncleanly or unexpectedly. """ pass class CleanEveryN(CustomBehavior): """ Restarts the fixture after it has ran 'n' tests. On mongod-related fixtures, this will clear the dbpath. """ DEFAULT_N = 20 def __init__(self, logger, fixture, n=DEFAULT_N): CustomBehavior.__init__(self, logger, fixture) # Try to isolate what test triggers the leak by restarting the fixture each time. if "detect_leaks=1" in os.getenv("ASAN_OPTIONS", ""): self.logger.info("ASAN_OPTIONS environment variable set to detect leaks, so restarting" " the fixture after each test instead of after every %d.", n) n = 1 self.n = n self.tests_run = 0 def after_test(self, test_report): self.tests_run += 1 if self.tests_run >= self.n: self.logger.info("%d tests have been run against the fixture, stopping it...", self.tests_run) self.tests_run = 0 teardown_success = self.fixture.teardown() self.logger.info("Starting the fixture back up again...") self.fixture.setup() self.fixture.await_ready() # Raise this after calling setup in case --continueOnFailure was specified. if not teardown_success: raise errors.TestFailure("%s did not exit cleanly" % (self.fixture)) class CheckReplDBHash(CustomBehavior): """ Waits for replication after each test, then checks that the dbhahses of all databases other than "local" match on the primary and all of the secondaries. If any dbhashes do not match, logs information about what was different (e.g. Different numbers of collections, missing documents in a collection, mismatching documents, etc). Compatible only with ReplFixture subclasses. """ def __init__(self, logger, fixture): if not isinstance(fixture, fixtures.ReplFixture): raise TypeError("%s does not support replication" % (fixture.__class__.__name__)) CustomBehavior.__init__(self, logger, fixture) self.test_case = testcases.TestCase(self.logger, "Hook", "#dbhash#") self.started = False def after_test(self, test_report): """ After each test, check that the dbhash of the test database is the same on all nodes in the replica set or master/slave fixture. """ try: if not self.started: CustomBehavior.start_dynamic_test(self.test_case, test_report) self.started = True # Wait until all operations have replicated. self.fixture.await_repl() success = True sb = [] # String builder. primary = self.fixture.get_primary() primary_conn = utils.new_mongo_client(port=primary.port) for secondary in self.fixture.get_secondaries(): read_preference = pymongo.ReadPreference.SECONDARY secondary_conn = utils.new_mongo_client(port=secondary.port, read_preference=read_preference) # Skip arbiters. if secondary_conn.admin.command("isMaster").get("arbiterOnly", False): continue all_matched = CheckReplDBHash._check_all_db_hashes(primary_conn, secondary_conn, sb) if not all_matched: sb.insert(0, "One or more databases were different between the primary on port %d" " and the secondary on port %d:" % (primary.port, secondary.port)) success = all_matched and success if not success: # Adding failures to a TestReport requires traceback information, so we raise # a 'self.test_case.failureException' that we will catch ourselves. self.test_case.logger.info("\n ".join(sb)) raise self.test_case.failureException("The dbhashes did not match") except self.test_case.failureException as err: self.test_case.logger.exception("The dbhashes did not match.") self.test_case.return_code = 1 test_report.addFailure(self.test_case, sys.exc_info()) test_report.stopTest(self.test_case) raise errors.ServerFailure(err.message) except pymongo.errors.WTimeoutError: self.test_case.logger.exception("Awaiting replication timed out.") self.test_case.return_code = 2 test_report.addError(self.test_case, sys.exc_info()) test_report.stopTest(self.test_case) raise errors.StopExecution("Awaiting replication timed out") def after_suite(self, test_report): """ If we get to this point, the #dbhash# test must have been successful, so add it to the test report. """ if self.started: self.test_case.logger.exception("The dbhashes matched for all tests.") self.test_case.return_code = 0 test_report.addSuccess(self.test_case) # TestReport.stopTest() has already been called if there was a failure. test_report.stopTest(self.test_case) self.started = False @staticmethod def _check_all_db_hashes(primary_conn, secondary_conn, sb): """ Returns true if for each non-local database, the dbhash command returns the same MD5 hash on the primary as it does on the secondary. Returns false otherwise. Logs a message describing the differences if any database's dbhash did not match. """ # Overview of how we'll check that everything replicated correctly between these two nodes: # # - Check whether they have the same databases. # - If not, log which databases are missing where, and dump the contents of any that are # missing. # # - Check whether each database besides "local" gives the same md5 field as the result of # running the dbhash command. # - If not, check whether they have the same collections. # - If not, log which collections are missing where, and dump the contents of any # that are missing. # - If so, check that the hash of each non-capped collection matches. # - If any do not match, log the diff of the collection between the two nodes. success = True if not CheckReplDBHash._check_dbs_present(primary_conn, secondary_conn, sb): return False for db_name in primary_conn.database_names(): if db_name == "local": continue # We don't expect this to match across different nodes. matched = CheckReplDBHash._check_db_hash(primary_conn, secondary_conn, db_name, sb) success = matched and success return success @staticmethod def _check_dbs_present(primary_conn, secondary_conn, sb): """ Returns true if the list of databases on the primary is identical to the list of databases on the secondary, and false otherwise. """ success = True primary_dbs = primary_conn.database_names() # Can't run database_names() on secondary, so instead use the listDatabases command. # TODO: Use database_names() once PYTHON-921 is resolved. list_db_output = secondary_conn.admin.command("listDatabases") secondary_dbs = [db["name"] for db in list_db_output["databases"]] missing_on_primary, missing_on_secondary = CheckReplDBHash._check_difference( set(primary_dbs), set(secondary_dbs), "database", sb) for missing_db in missing_on_secondary: db = primary_conn[missing_db] coll_names = db.collection_names() non_system_colls = [name for name in coll_names if not name.startswith("system.")] # It is only an error if there are any non-system collections in the database, # otherwise it's not well defined whether they should exist or not. if non_system_colls: sb.append("Database %s present on primary but not on secondary." % (missing_db)) CheckReplDBHash._dump_all_collections(db, non_system_colls, sb) success = False for missing_db in missing_on_primary: db = secondary_conn[missing_db] # Can't run collection_names() on secondary, so instead use the listCollections command. # TODO: Always use collection_names() once PYTHON-921 is resolved. Then much of the # logic that is duplicated here can be consolidated. list_coll_output = db.command("listCollections")["cursor"]["firstBatch"] coll_names = [coll["name"] for coll in list_coll_output] non_system_colls = [name for name in coll_names if not name.startswith("system.")] # It is only an error if there are any non-system collections in the database, # otherwise it's not well defined if it should exist or not. if non_system_colls: CheckReplDBHash._dump_all_collections(db, non_system_colls, sb) success = False return success @staticmethod def _check_db_hash(primary_conn, secondary_conn, db_name, sb): """ Returns true if the dbhash for 'db_name' matches on the primary and the secondary, and false otherwise. Appends a message to 'sb' describing the differences if the dbhashes do not match. """ primary_hash = primary_conn[db_name].command("dbhash") secondary_hash = secondary_conn[db_name].command("dbhash") if primary_hash["md5"] == secondary_hash["md5"]: return True success = CheckReplDBHash._check_dbs_eq( primary_conn, secondary_conn, primary_hash, secondary_hash, db_name, sb) if not success: sb.append("Database %s has a different hash on the primary and the secondary" " ([ %s ] != [ %s ]):" % (db_name, primary_hash["md5"], secondary_hash["md5"])) return success @staticmethod def _check_dbs_eq(primary_conn, secondary_conn, primary_hash, secondary_hash, db_name, sb): """ Returns true if all non-capped collections had the same hash in the dbhash response, and false otherwise. Appends information to 'sb' about the differences between the 'db_name' database on the primary and the 'db_name' database on the secondary, if any. """ success = True primary_db = primary_conn[db_name] secondary_db = secondary_conn[db_name] primary_coll_hashes = primary_hash["collections"] secondary_coll_hashes = secondary_hash["collections"] primary_coll_names = set(primary_coll_hashes.keys()) secondary_coll_names = set(secondary_coll_hashes.keys()) missing_on_primary, missing_on_secondary = CheckReplDBHash._check_difference( primary_coll_names, secondary_coll_names, "collection", sb) if missing_on_primary or missing_on_secondary: # 'sb' already describes which collections are missing where. for coll_name in missing_on_primary: CheckReplDBHash._dump_all_documents(primary_db, coll_name, sb) for coll_name in missing_on_secondary: CheckReplDBHash._dump_all_documents(secondary_db, coll_name, sb) return for coll_name in primary_coll_names & secondary_coll_names: primary_coll_hash = primary_coll_hashes[coll_name] secondary_coll_hash = secondary_coll_hashes[coll_name] if primary_coll_hash == secondary_coll_hash: continue # Ignore capped collections because they are not expected to match on all nodes. if primary_db.command({"collStats": coll_name})["capped"]: # Still fail if the collection is not capped on the secondary. if not secondary_db.command({"collStats": coll_name})["capped"]: success = False sb.append("%s.%s collection is capped on primary but not on secondary." % (primary_db.name, coll_name)) sb.append("%s.%s collection is capped, ignoring." % (primary_db.name, coll_name)) continue # Still fail if the collection is capped on the secondary, but not on the primary. elif secondary_db.command({"collStats": coll_name})["capped"]: success = False sb.append("%s.%s collection is capped on secondary but not on primary." % (primary_db.name, coll_name)) continue success = False sb.append("Collection %s.%s has a different hash on the primary and the secondary" " ([ %s ] != [ %s ]):" % (db_name, coll_name, primary_coll_hash, secondary_coll_hash)) CheckReplDBHash._check_colls_eq(primary_db, secondary_db, coll_name, sb) if success: sb.append("All collections that were expected to match did.") return success @staticmethod def _check_colls_eq(primary_db, secondary_db, coll_name, sb): """ Appends information to 'sb' about the differences or between the 'coll_name' collection on the primary and the 'coll_name' collection on the secondary, if any. """ codec_options = bson.CodecOptions(document_class=bson.SON) primary_coll = primary_db.get_collection(coll_name, codec_options=codec_options) secondary_coll = secondary_db.get_collection(coll_name, codec_options=codec_options) primary_docs = CheckReplDBHash._extract_documents(primary_coll) secondary_docs = CheckReplDBHash._extract_documents(secondary_coll) CheckReplDBHash._get_collection_diff(primary_docs, secondary_docs, sb) @staticmethod def _extract_documents(collection): """ Returns a list of all documents in the collection, sorted by their _id. """ return [doc for doc in collection.find().sort("_id", pymongo.ASCENDING)] @staticmethod def _get_collection_diff(primary_docs, secondary_docs, sb): """ Returns true if the documents in 'primary_docs' exactly match the documents in 'secondary_docs', and false otherwise. Appends information to 'sb' about what matched or did not match. """ matched = True # These need to be lists instead of sets because documents aren't hashable. missing_on_primary = [] missing_on_secondary = [] p_idx = 0 # Keep track of our position in 'primary_docs'. s_idx = 0 # Keep track of our position in 'secondary_docs'. while p_idx < len(primary_docs) and s_idx < len(secondary_docs): primary_doc = primary_docs[p_idx] secondary_doc = secondary_docs[s_idx] if primary_doc == secondary_doc: p_idx += 1 s_idx += 1 continue # We have mismatching documents. matched = False if primary_doc["_id"] == secondary_doc["_id"]: sb.append("Mismatching document:") sb.append(" primary: %s" % (primary_doc)) sb.append(" secondary: %s" % (secondary_doc)) p_idx += 1 s_idx += 1 # One node was missing a document. Since the documents are sorted by _id, the doc with # the smaller _id was the one that was skipped. elif primary_doc["_id"] < secondary_doc["_id"]: missing_on_secondary.append(primary_doc) # Only move past the doc that we know was skipped. p_idx += 1 else: # primary_doc["_id"] > secondary_doc["_id"] missing_on_primary.append(secondary_doc) # Only move past the doc that we know was skipped. s_idx += 1 # Check if there are any unmatched documents left. while p_idx < len(primary_docs): matched = False missing_on_secondary.append(primary_docs[p_idx]) p_idx += 1 while s_idx < len(secondary_docs): matched = False missing_on_primary.append(secondary_docs[s_idx]) s_idx += 1 if not matched: CheckReplDBHash._append_differences( missing_on_primary, missing_on_secondary, "document", sb) else: sb.append("All documents matched.") @staticmethod def _check_difference(primary_set, secondary_set, item_type_name, sb): """ Returns true if the contents of 'primary_set' and 'secondary_set' are identical, and false otherwise. The sets contain information about the primary and secondary, respectively, e.g. the database names that exist on each node. Appends information about anything that differed to 'sb'. """ missing_on_primary = set() missing_on_secondary = set() for item in primary_set - secondary_set: missing_on_secondary.add(item) for item in secondary_set - primary_set: missing_on_primary.add(item) CheckReplDBHash._append_differences( missing_on_primary, missing_on_secondary, item_type_name, sb) return (missing_on_primary, missing_on_secondary) @staticmethod def _append_differences(missing_on_primary, missing_on_secondary, item_type_name, sb): """ Given two iterables representing items that were missing on the primary or the secondary respectively, append the information about which items were missing to 'sb', if any. """ if missing_on_primary: sb.append("The following %ss were present on the secondary, but not on the" " primary:" % (item_type_name)) for item in missing_on_primary: sb.append(str(item)) if missing_on_secondary: sb.append("The following %ss were present on the primary, but not on the" " secondary:" % (item_type_name)) for item in missing_on_secondary: sb.append(str(item)) @staticmethod def _dump_all_collections(database, coll_names, sb): """ Appends the contents of each of the collections in 'coll_names' to 'sb'. """ if coll_names: sb.append("Database %s contains the following collections: %s" % (database.name, coll_names)) for coll_name in coll_names: CheckReplDBHash._dump_all_documents(database, coll_name, sb) else: sb.append("No collections in database %s." % (database.name)) @staticmethod def _dump_all_documents(database, coll_name, sb): """ Appends the contents of 'coll_name' to 'sb'. """ docs = CheckReplDBHash._extract_documents(database[coll_name]) if docs: sb.append("Documents in %s.%s:" % (database.name, coll_name)) for doc in docs: sb.append(" %s" % (doc)) else: sb.append("No documents in %s.%s." % (database.name, coll_name)) _CUSTOM_BEHAVIORS = { "CleanEveryN": CleanEveryN, "CheckReplDBHash": CheckReplDBHash, }