mongo/buildscripts/resmokelib/testing/hooks.py

"""
Customize the behavior of a fixture by allowing special code to be
executed before or after each test, and before or after each suite.
"""

from __future__ import absolute_import

import os
import sys

import bson
import pymongo

from . import fixtures
from . import testcases
from .. import errors
from .. import logging
from .. import utils


def make_custom_behavior(class_name, *args, **kwargs):
    """
    Factory function for creating CustomBehavior instances.
    """

    if class_name not in _CUSTOM_BEHAVIORS:
        raise ValueError("Unknown custom behavior class '%s'" % (class_name))
    return _CUSTOM_BEHAVIORS[class_name](*args, **kwargs)


class CustomBehavior(object):
    """
    The common interface all CustomBehaviors will inherit from.
    """

    @staticmethod
    def start_dynamic_test(test_case, test_report):
        """
        If a CustomBehavior wants to add a test case that will show up
        in the test report, it should use this method to add it to the
        report, since we will need to count it as a dynamic test to get
        the stats in the summary information right.
        """
        test_report.startTest(test_case, dynamic=True)

    def __init__(self, logger, fixture):
        """
        Initializes the CustomBehavior with the specified fixture.
        """

        if not isinstance(logger, logging.Logger):
            raise TypeError("logger must be a Logger instance")

        self.logger = logger
        self.fixture = fixture

    def before_suite(self, test_report):
        """
        The test runner calls this exactly once before they start
        running the suite.
        """
        pass

    def after_suite(self, test_report):
        """
        The test runner calls this exactly once after all tests have
        finished executing. Be sure to reset the behavior back to its
        original state so that it can be run again.
        """
        pass

    def before_test(self, test_report):
        """
        Each test will call this before it executes.

        Raises a TestFailure if the test should be marked as a failure,
        or a ServerFailure if the fixture exits uncleanly or
        unexpectedly.
        """
        pass

    def after_test(self, test_report):
        """
        Each test will call this after it executes.

        Raises a TestFailure if the test should be marked as a failure,
        or a ServerFailure if the fixture exits uncleanly or
        unexpectedly.
        """
        pass


class CleanEveryN(CustomBehavior):
    """
    Restarts the fixture after it has ran 'n' tests.
    On mongod-related fixtures, this will clear the dbpath.
    """

    DEFAULT_N = 20

    def __init__(self, logger, fixture, n=DEFAULT_N):
        CustomBehavior.__init__(self, logger, fixture)

        # Try to isolate what test triggers the leak by restarting the fixture each time.
        if "detect_leaks=1" in os.getenv("ASAN_OPTIONS", ""):
            self.logger.info("ASAN_OPTIONS environment variable set to detect leaks, so restarting"
                             " the fixture after each test instead of after every %d.", n)
            n = 1

        self.n = n
        self.tests_run = 0

    def after_test(self, test_report):
        self.tests_run += 1
        if self.tests_run >= self.n:
            self.logger.info("%d tests have been run against the fixture, stopping it...",
                             self.tests_run)
            self.tests_run = 0

            teardown_success = self.fixture.teardown()
            self.logger.info("Starting the fixture back up again...")
            self.fixture.setup()
            self.fixture.await_ready()

            # Raise this after calling setup in case --continueOnFailure was specified.
            if not teardown_success:
                raise errors.TestFailure("%s did not exit cleanly" % (self.fixture))


class CheckReplDBHash(CustomBehavior):
    """
    Waits for replication after each test, then checks that the dbhahses
    of all databases other than "local" match on the primary and all of
    the secondaries. If any dbhashes do not match, logs information
    about what was different (e.g. Different numbers of collections,
    missing documents in a collection, mismatching documents, etc).

    Compatible only with ReplFixture subclasses.
    """

    def __init__(self, logger, fixture):
        if not isinstance(fixture, fixtures.ReplFixture):
            raise TypeError("%s does not support replication" % (fixture.__class__.__name__))

        CustomBehavior.__init__(self, logger, fixture)

        self.test_case = testcases.TestCase(self.logger, "Hook", "#dbhash#")

        self.started = False

    def after_test(self, test_report):
        """
        After each test, check that the dbhash of the test database is
        the same on all nodes in the replica set or master/slave
        fixture.
        """

        try:
            if not self.started:
                CustomBehavior.start_dynamic_test(self.test_case, test_report)
                self.started = True

            # Wait until all operations have replicated.
            self.fixture.await_repl()

            success = True
            sb = []  # String builder.

            primary = self.fixture.get_primary()
            primary_conn = utils.new_mongo_client(port=primary.port)

            for secondary in self.fixture.get_secondaries():
                read_preference = pymongo.ReadPreference.SECONDARY
                secondary_conn = utils.new_mongo_client(port=secondary.port,
                                                        read_preference=read_preference)
                # Skip arbiters.
                if secondary_conn.admin.command("isMaster").get("arbiterOnly", False):
                    continue

                all_matched = CheckReplDBHash._check_all_db_hashes(primary_conn,
                                                                   secondary_conn,
                                                                   sb)
                if not all_matched:
                    sb.insert(0,
                              "One or more databases were different between the primary on port %d"
                              " and the secondary on port %d:"
                              % (primary.port, secondary.port))

                success = all_matched and success

            if not success:
                # Adding failures to a TestReport requires traceback information, so we raise
                # a 'self.test_case.failureException' that we will catch ourselves.
                self.test_case.logger.info("\n    ".join(sb))
                raise self.test_case.failureException("The dbhashes did not match")
        except self.test_case.failureException as err:
            self.test_case.logger.exception("The dbhashes did not match.")
            self.test_case.return_code = 1
            test_report.addFailure(self.test_case, sys.exc_info())
            test_report.stopTest(self.test_case)
            raise errors.ServerFailure(err.message)
        except pymongo.errors.WTimeoutError:
            self.test_case.logger.exception("Awaiting replication timed out.")
            self.test_case.return_code = 2
            test_report.addError(self.test_case, sys.exc_info())
            test_report.stopTest(self.test_case)
            raise errors.StopExecution("Awaiting replication timed out")

    def after_suite(self, test_report):
        """
        If we get to this point, the #dbhash# test must have been
        successful, so add it to the test report.
        """

        if self.started:
            self.test_case.logger.exception("The dbhashes matched for all tests.")
            self.test_case.return_code = 0
            test_report.addSuccess(self.test_case)
            # TestReport.stopTest() has already been called if there was a failure.
            test_report.stopTest(self.test_case)

        self.started = False

    @staticmethod
    def _check_all_db_hashes(primary_conn, secondary_conn, sb):
        """
        Returns true if for each non-local database, the dbhash command
        returns the same MD5 hash on the primary as it does on the
        secondary. Returns false otherwise.

        Logs a message describing the differences if any database's
        dbhash did not match.
        """

        # Overview of how we'll check that everything replicated correctly between these two nodes:
        #
        # - Check whether they have the same databases.
        #     - If not, log which databases are missing where, and dump the contents of any that are
        #       missing.
        #
        # - Check whether each database besides "local" gives the same md5 field as the result of
        #   running the dbhash command.
        #     - If not, check whether they have the same collections.
        #         - If not, log which collections are missing where, and dump the contents of any
        #           that are missing.
        #     - If so, check that the hash of each non-capped collection matches.
        #         - If any do not match, log the diff of the collection between the two nodes.

        success = True

        if not CheckReplDBHash._check_dbs_present(primary_conn, secondary_conn, sb):
            return False

        for db_name in primary_conn.database_names():
            if db_name == "local":
                continue  # We don't expect this to match across different nodes.

            matched = CheckReplDBHash._check_db_hash(primary_conn, secondary_conn, db_name, sb)
            success = matched and success

        return success

    @staticmethod
    def _check_dbs_present(primary_conn, secondary_conn, sb):
        """
        Returns true if the list of databases on the primary is
        identical to the list of databases on the secondary, and false
        otherwise.
        """

        success = True
        primary_dbs = primary_conn.database_names()

        # Can't run database_names() on secondary, so instead use the listDatabases command.
        # TODO: Use database_names() once PYTHON-921 is resolved.
        list_db_output = secondary_conn.admin.command("listDatabases")
        secondary_dbs = [db["name"] for db in list_db_output["databases"]]

        missing_on_primary, missing_on_secondary = CheckReplDBHash._check_difference(
            set(primary_dbs), set(secondary_dbs), "database", sb)

        for missing_db in missing_on_secondary:
            db = primary_conn[missing_db]
            coll_names = db.collection_names()
            non_system_colls = [name for name in coll_names if not name.startswith("system.")]

            # It is only an error if there are any non-system collections in the database,
            # otherwise it's not well defined whether they should exist or not.
            if non_system_colls:
                sb.append("Database %s present on primary but not on secondary." % (missing_db))
                CheckReplDBHash._dump_all_collections(db, non_system_colls, sb)
                success = False

        for missing_db in missing_on_primary:
            db = secondary_conn[missing_db]

            # Can't run collection_names() on secondary, so instead use the listCollections command.
            # TODO: Always use collection_names() once PYTHON-921 is resolved. Then much of the
            # logic that is duplicated here can be consolidated.
            list_coll_output = db.command("listCollections")["cursor"]["firstBatch"]
            coll_names = [coll["name"] for coll in list_coll_output]
            non_system_colls = [name for name in coll_names if not name.startswith("system.")]

            # It is only an error if there are any non-system collections in the database,
            # otherwise it's not well defined if it should exist or not.
            if non_system_colls:
                CheckReplDBHash._dump_all_collections(db, non_system_colls, sb)
                success = False

        return success

    @staticmethod
    def _check_db_hash(primary_conn, secondary_conn, db_name, sb):
        """
        Returns true if the dbhash for 'db_name' matches on the primary
        and the secondary, and false otherwise.

        Appends a message to 'sb' describing the differences if the
        dbhashes do not match.
        """

        primary_hash = primary_conn[db_name].command("dbhash")
        secondary_hash = secondary_conn[db_name].command("dbhash")

        if primary_hash["md5"] == secondary_hash["md5"]:
            return True

        success = CheckReplDBHash._check_dbs_eq(
            primary_conn, secondary_conn, primary_hash, secondary_hash, db_name, sb)

        if not success:
            sb.append("Database %s has a different hash on the primary and the secondary"
                      " ([ %s ] != [ %s ]):"
                      % (db_name, primary_hash["md5"], secondary_hash["md5"]))

        return success

    @staticmethod
    def _check_dbs_eq(primary_conn, secondary_conn, primary_hash, secondary_hash, db_name, sb):
        """
        Returns true if all non-capped collections had the same hash in
        the dbhash response, and false otherwise.

        Appends information to 'sb' about the differences between the
        'db_name' database on the primary and the 'db_name' database on
        the secondary, if any.
        """

        success = True

        primary_db = primary_conn[db_name]
        secondary_db = secondary_conn[db_name]

        primary_coll_hashes = primary_hash["collections"]
        secondary_coll_hashes = secondary_hash["collections"]

        primary_coll_names = set(primary_coll_hashes.keys())
        secondary_coll_names = set(secondary_coll_hashes.keys())

        missing_on_primary, missing_on_secondary = CheckReplDBHash._check_difference(
            primary_coll_names, secondary_coll_names, "collection", sb)

        if missing_on_primary or missing_on_secondary:

            # 'sb' already describes which collections are missing where.
            for coll_name in missing_on_primary:
                CheckReplDBHash._dump_all_documents(secondary_db, coll_name, sb)
            for coll_name in missing_on_secondary:
                CheckReplDBHash._dump_all_documents(primary_db, coll_name, sb)
            return

        for coll_name in primary_coll_names & secondary_coll_names:
            primary_coll_hash = primary_coll_hashes[coll_name]
            secondary_coll_hash = secondary_coll_hashes[coll_name]

            if primary_coll_hash == secondary_coll_hash:
                continue

            # Ignore capped collections because they are not expected to match on all nodes.
            if primary_db.command({"collStats": coll_name})["capped"]:
                # Still fail if the collection is not capped on the secondary.
                if not secondary_db.command({"collStats": coll_name})["capped"]:
                    success = False
                    sb.append("%s.%s collection is capped on primary but not on secondary."
                              % (primary_db.name, coll_name))
                sb.append("%s.%s collection is capped, ignoring." % (primary_db.name, coll_name))
                continue
            # Still fail if the collection is capped on the secondary, but not on the primary.
            elif secondary_db.command({"collStats": coll_name})["capped"]:
                success = False
                sb.append("%s.%s collection is capped on secondary but not on primary."
                          % (primary_db.name, coll_name))
                continue

            success = False
            sb.append("Collection %s.%s has a different hash on the primary and the secondary"
                      " ([ %s ] != [ %s ]):"
                      % (db_name, coll_name, primary_coll_hash, secondary_coll_hash))
            CheckReplDBHash._check_colls_eq(primary_db, secondary_db, coll_name, sb)

        if success:
            sb.append("All collections that were expected to match did.")
        return success

    @staticmethod
    def _check_colls_eq(primary_db, secondary_db, coll_name, sb):
        """
        Appends information to 'sb' about the differences or between
        the 'coll_name' collection on the primary and the 'coll_name'
        collection on the secondary, if any.
        """

        codec_options = bson.CodecOptions(document_class=bson.SON)

        primary_coll = primary_db.get_collection(coll_name, codec_options=codec_options)
        secondary_coll = secondary_db.get_collection(coll_name, codec_options=codec_options)

        primary_docs = CheckReplDBHash._extract_documents(primary_coll)
        secondary_docs = CheckReplDBHash._extract_documents(secondary_coll)

        CheckReplDBHash._get_collection_diff(primary_docs, secondary_docs, sb)

    @staticmethod
    def _extract_documents(collection):
        """
        Returns a list of all documents in the collection, sorted by
        their _id.
        """

        return [doc for doc in collection.find().sort("_id", pymongo.ASCENDING)]

    @staticmethod
    def _get_collection_diff(primary_docs, secondary_docs, sb):
        """
        Returns true if the documents in 'primary_docs' exactly match
        the documents in 'secondary_docs', and false otherwise.

        Appends information to 'sb' about what matched or did not match.
        """

        matched = True

        # These need to be lists instead of sets because documents aren't hashable.
        missing_on_primary = []
        missing_on_secondary = []

        p_idx = 0  # Keep track of our position in 'primary_docs'.
        s_idx = 0  # Keep track of our position in 'secondary_docs'.

        while p_idx < len(primary_docs) and s_idx < len(secondary_docs):
            primary_doc = primary_docs[p_idx]
            secondary_doc = secondary_docs[s_idx]

            if primary_doc == secondary_doc:
                p_idx += 1
                s_idx += 1
                continue

            # We have mismatching documents.
            matched = False

            if primary_doc["_id"] == secondary_doc["_id"]:
                sb.append("Mismatching document:")
                sb.append("    primary:   %s" % (primary_doc))
                sb.append("    secondary: %s" % (secondary_doc))
                p_idx += 1
                s_idx += 1

            # One node was missing a document. Since the documents are sorted by _id, the doc with
            # the smaller _id was the one that was skipped.
            elif primary_doc["_id"] < secondary_doc["_id"]:
                missing_on_secondary.append(primary_doc)

                # Only move past the doc that we know was skipped.
                p_idx += 1

            else:  # primary_doc["_id"] > secondary_doc["_id"]
                missing_on_primary.append(secondary_doc)

                # Only move past the doc that we know was skipped.
                s_idx += 1

        # Check if there are any unmatched documents left.
        while p_idx < len(primary_docs):
            matched = False
            missing_on_secondary.append(primary_docs[p_idx])
            p_idx += 1
        while s_idx < len(secondary_docs):
            matched = False
            missing_on_primary.append(secondary_docs[s_idx])
            s_idx += 1

        if not matched:
            CheckReplDBHash._append_differences(
                missing_on_primary, missing_on_secondary, "document", sb)
        else:
            sb.append("All documents matched.")

    @staticmethod
    def _check_difference(primary_set, secondary_set, item_type_name, sb):
        """
        Returns true if the contents of 'primary_set' and
        'secondary_set' are identical, and false otherwise. The sets
        contain information about the primary and secondary,
        respectively, e.g. the database names that exist on each node.

        Appends information about anything that differed to 'sb'.
        """

        missing_on_primary = set()
        missing_on_secondary = set()

        for item in primary_set - secondary_set:
            missing_on_secondary.add(item)

        for item in secondary_set - primary_set:
            missing_on_primary.add(item)

        CheckReplDBHash._append_differences(
            missing_on_primary, missing_on_secondary, item_type_name, sb)

        return (missing_on_primary, missing_on_secondary)

    @staticmethod
    def _append_differences(missing_on_primary, missing_on_secondary, item_type_name, sb):
        """
        Given two iterables representing items that were missing on the
        primary or the secondary respectively, append the information
        about which items were missing to 'sb', if any.
        """

        if missing_on_primary:
            sb.append("The following %ss were present on the secondary, but not on the"
                      " primary:" % (item_type_name))
            for item in missing_on_primary:
                sb.append(str(item))

        if missing_on_secondary:
            sb.append("The following %ss were present on the primary, but not on the"
                      " secondary:" % (item_type_name))
            for item in missing_on_secondary:
                sb.append(str(item))

    @staticmethod
    def _dump_all_collections(database, coll_names, sb):
        """
        Appends the contents of each of the collections in 'coll_names'
        to 'sb'.
        """

        if coll_names:
            sb.append("Database %s contains the following collections: %s"
                      % (database.name, coll_names))
            for coll_name in coll_names:
                CheckReplDBHash._dump_all_documents(database, coll_name, sb)
        else:
            sb.append("No collections in database %s." % (database.name))

    @staticmethod
    def _dump_all_documents(database, coll_name, sb):
        """
        Appends the contents of 'coll_name' to 'sb'.
        """

        docs = CheckReplDBHash._extract_documents(database[coll_name])
        if docs:
            sb.append("Documents in %s.%s:" % (database.name, coll_name))
            for doc in docs:
                sb.append("    %s" % (doc))
        else:
            sb.append("No documents in %s.%s." % (database.name, coll_name))


_CUSTOM_BEHAVIORS = {
    "CleanEveryN": CleanEveryN,
    "CheckReplDBHash": CheckReplDBHash,
}