mongo/buildscripts/update_test_lifecycle.py

#!/usr/bin/env python

"""Test Failures

Update etc/test_lifecycle.yml to tag unreliable tests based on historic failure rates.
"""
from __future__ import division
from __future__ import print_function

import collections
import copy
import fnmatch
import optparse
import os
import re
import subprocess
import sys
import textwrap

import yaml

# Get relative imports to work when the package is not installed on the PYTHONPATH.
if __name__ == "__main__" and __package__ is None:
    sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from buildscripts import burn_in_tests
from buildscripts import resmokelib
from buildscripts import test_failures as tf


def read_yaml_file(yaml_file):
    """Opens YAML file and returns YAML structure."""
    with open(yaml_file, "r") as fstream:
        return yaml.load(fstream)


def write_yaml_file(yaml_file, object):
    """Writes object to yaml_file."""

    # Setup to preserve order in yaml.dump, see https://stackoverflow.com/a/8661021
    represent_dict_order = lambda self, data: self.represent_mapping(
        'tag:yaml.org,2002:map', data.items())
    yaml.add_representer(collections.OrderedDict, represent_dict_order)
    with open(yaml_file, "w") as fstream:
        comment = ("This file was generated by {} and shouldn't be edited by hand. It was"
                   " generated against commit {} with the following invocation: {}.").format(
            sys.argv[0], callo(["git", "rev-parse", "HEAD"]), " ".join(sys.argv))
        print(textwrap.fill(comment, width=100, initial_indent="# ", subsequent_indent="# "),
              file=fstream)
        yaml.dump(object, fstream, default_flow_style=False)


def get_test_suites_membership():
    """Return a dictionary with keys of all js_tests and list of associated suites."""
    # SERVER-29486 - Non-JS tests can be returned, so we'll filter them here.
    test_membership_map = resmokelib.parser.create_test_membership_map()
    for test in test_membership_map.keys():
        if not resmokelib.utils.is_js_file(test):
            del test_membership_map[test]
    return test_membership_map


def get_suite_tasks_membership(evg_yaml):
    """Return a dictionary with keys of all suites and list of associated tasks."""
    evg = read_yaml_file(evg_yaml)
    suite_membership = collections.defaultdict(list)
    for task in evg["tasks"]:
        resmoke_args = burn_in_tests.get_resmoke_args(task)
        if resmoke_args:
            m = re.search("--suites=(?P<suite>\w+)", resmoke_args)
            if m is not None:
                suite_membership[m.group("suite")].append(task["name"])
    return suite_membership


def get_test_tasks_membership(evg_yaml):
    """Return a dictionary with keys of all tests and list of associated tasks."""
    test_suites_membership = get_test_suites_membership()
    suite_tasks_membership = get_suite_tasks_membership(evg_yaml)
    test_tasks_membership = collections.defaultdict(list)
    for test in test_suites_membership.keys():
        for suite in test_suites_membership[test]:
            test_tasks_membership[test].extend(suite_tasks_membership[suite])
    return test_tasks_membership


def get_tests_from_tasks(tasks, test_tasks_membership):
    """Return a list of tests from list of specified tasks."""
    tests = []
    tasks_set = set(tasks)
    for test in test_tasks_membership.keys():
        if not tasks_set.isdisjoint(test_tasks_membership[test]):
            tests.append(test)
    return tests


def create_test_groups(tests):
    """Return groups of tests by their directory, i.e., jstests/core."""
    test_groups = collections.defaultdict(list)
    for test in tests:
        test_split = test.split("/")
        # If the test does not have a directory, then ignore it.
        if len(test_split) <= 1:
            continue
        test_dir = test_split[1]
        test_groups[test_dir].append(test)
    return test_groups


def create_batch_groups(test_groups, batch_size):
    """Return batch groups list of test_groups."""
    batch_groups = []
    for test_group_name in test_groups:
        test_group = test_groups[test_group_name]
        while test_group:
            batch_groups.append(test_group[:batch_size])
            test_group = test_group[batch_size:]
    return batch_groups


def get_all_tasks(evg_yaml):
    """Returns list of tasks from evg_yaml.

       Note that tasks can be excluded in 'test_lifecycle_excluded_tasks'.
    """
    evg = read_yaml_file(evg_yaml)
    all_tasks = [t["name"] for t in evg["tasks"]]
    # The list of excluded tasks may include "Unix shell-style wildcards",
    # i.e., 'compile*', which matches 'compile', 'compile_all'
    excluded_glob_tasks = evg.get("test_lifecycle_excluded_tasks", [])
    excluded_tasks = []
    for excluded_glob_task in excluded_glob_tasks:
        excluded_tasks.extend(fnmatch.filter(all_tasks, excluded_glob_task))

    return list(set(all_tasks) - set(excluded_tasks))


def callo(args):
    """Call a program, and capture its output."""
    return subprocess.check_output(args)


def git_commit_range_since(since):
    """Returns first and last commit in 'since' period specified.

    Specify 'since' as any acceptable period for git log --since.
    The period can be specified as '4.weeks' or '3.days'.
    """
    git_command = "git log --since={} --pretty=format:%H".format(since)
    commits = callo(git_command.split()).split("\n")
    return commits[-1], commits[0]


def git_commit_prior(revision):
    """Returns commit revision prior to one specified."""
    git_format = "git log -2 {revision} --pretty=format:%H"
    git_command = git_format.format(revision=revision)
    commits = callo(git_command.split()).split("\n")
    return commits[-1]


def unreliable_test(test_fr, unacceptable_fr, test_runs, min_run):
    """Check for an unreliable test.

    A test should be added to the set of tests believed not to run reliably when it has more
    than min_run executions with a failure percentage greater than unacceptable_fr.
    """
    return test_runs >= min_run and test_fr > unacceptable_fr


def reliable_test(test_fr, acceptable_fr, test_runs, min_run):
    """Check for a reliable test.

    A test should then removed from the set of tests believed not to run reliably when it has
    at least min_run executions with a failure percentage less than acceptable_fr.
    """
    return test_runs >= min_run and test_fr < acceptable_fr


def check_fail_rates(fr_name, acceptable_fr, unacceptable_fr):
    """Raise an error if the acceptable_fr > unacceptable_fr."""
    if acceptable_fr > unacceptable_fr:
        raise ValueError("'{}' acceptable failure rate {} must be <= the unacceptable failure rate"
                         " {}".format(fr_name, acceptable_fr, unacceptable_fr))


def check_days(name, days):
    """Raise an error if days < 1."""
    if days < 1:
        raise ValueError("'{}' days must be greater than 0.".format(name))


def js_test_exists(lifecycle, test):
    """Returns True if test exists in js_test and is not a 'None' value."""
    js_test = lifecycle["selector"].get("js_test")
    if js_test:
        return lifecycle["selector"]["js_test"].get(test) is not None
    return False


def get_lifecycle_test_tags(lifecycle, test):
    """Returns list of tags for a js_test from lifecycle object."""
    tags = []
    if js_test_exists(lifecycle, test):
        tags = lifecycle["selector"]["js_test"].get(test)
    return tags


def update_lifecycle_test_tags(lifecycle, test, tags):
    """Returns lifecycle object after creating or updating the tags for 'test'."""
    if not lifecycle["selector"]["js_test"]:
        lifecycle["selector"]["js_test"] = {test: tags}
    else:
        lifecycle["selector"]["js_test"][test] = tags
    return lifecycle


def unreliable_tag(test, task, variant, distro):
    """Returns the unreliable tag."""
    if distro and variant and task and test:
        return "unreliable|{}|{}|{}".format(task, variant, distro)
    elif variant and task and test:
        return "unreliable|{}|{}".format(task, variant)
    elif task and test:
        return "unreliable|{}".format(task)
    elif test:
        return "unreliable"


def _update_lifecycle(lifecycle, add_tags, test, existing_tags, update_tag):
    """Updates the lifecycle object.

    If add_tags is True, creates or updates a test in lifecycle, with update_tag.
    If add_tags is False, removes an empty test (no tags) in lifecycle, or removes the update_tag.
    Returns updated lifecycle YAML.
    """
    existing_tags_set = set(existing_tags)
    if add_tags:
        existing_tags_set.add(update_tag)
    else:
        existing_tags_set.discard(update_tag)
    if existing_tags_set:
        sorted_tags = sorted(list(existing_tags_set), key=lambda x: x.split("|"))
        lifecycle = update_lifecycle_test_tags(lifecycle, test, sorted_tags)
    elif js_test_exists(lifecycle, test):
        # No tags left, so remove test from lifecycle.
        del lifecycle["selector"]["js_test"][test]


def update_lifecycle(lifecycle, report, method_test, add_tags, fail_rate, min_run):
    """Updates the lifecycle object based on the test_method.

    The test_method checks unreliable or reliable fail_rates.
    """
    for summary in report:
        if method_test(summary.fail_rate,
                       fail_rate,
                       summary.num_pass + summary.num_fail,
                       min_run):
            existing_tags = get_lifecycle_test_tags(lifecycle, summary.test)
            update_tag = unreliable_tag(
                summary.test, summary.task, summary.variant, summary.distro)
            _update_lifecycle(lifecycle, add_tags, summary.test, existing_tags, update_tag)


def main():

    required_options = ["project",
                        "reliable_test_min_run",
                        "unreliable_test_min_run",
                        "test_fail_rates",
                        ]
    parser = optparse.OptionParser(description=__doc__,
                                   usage="Usage: %prog [options] test1 test2 ...")
    parser.add_option("--project", dest="project",
                      default=None,
                      help="Evergreen project to analyze [REQUIRED].")
    parser.add_option("--reliableTestMinimumRun", dest="reliable_test_min_run",
                      default=None,
                      type="int",
                      help="Minimum number of tests runs for test to be considered as reliable"
                           " [REQUIRED].")
    parser.add_option("--unreliableTestMinimumRun", dest="unreliable_test_min_run",
                      default=None,
                      type="int",
                      help="Minimum number of tests runs for test to be considered as unreliable"
                           " [REQUIRED].")
    parser.add_option("--testFailRates", dest="test_fail_rates",
                      metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
                      default=None,
                      type="float",
                      nargs=2,
                      help="Test fail rates: acceptable fail rate and unacceptable fail rate"
                           " Specify floating numbers between 0.0 and 1.0 [REQUIRED].")
    parser.add_option("--taskFailRates", dest="task_fail_rates",
                      metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
                      default=None,
                      type="float",
                      nargs=2,
                      help="Task fail rates: acceptable fail rate and unacceptable fail rate."
                           " Specify floating numbers between 0.0 and 1.0."
                           " Uses --test-fail-rates if unspecified.")
    parser.add_option("--variantFailRates", dest="variant_fail_rates",
                      metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
                      default=None,
                      type="float",
                      nargs=2,
                      help="Variant fail rates: acceptable fail rate and unacceptable fail rate."
                           " Specify floating numbers between 0.0 and 1.0."
                           " Uses --task-fail-rates if unspecified.")
    parser.add_option("--distroFailRates", dest="distro_fail_rates",
                      metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
                      default=None,
                      type="float",
                      nargs=2,
                      help="Distro fail rates: acceptable fail rate and unacceptable fail rate."
                           " Specify floating numbers between 0.0 and 1.0."
                           " Uses --variant-fail-rates if unspecified.")
    parser.add_option("--tasks", dest="tasks",
                      default=None,
                      help="Names of tasks to analyze for tagging unreliable tests."
                           " If specified and no tests are specified, then only tests"
                           " associated with the tasks will be analyzed."
                           " If unspecified and no tests are specified, the list of tasks will be"
                           " the non-excluded list of tasks from the file specified by"
                           " '--evergreenYML'.")
    parser.add_option("--variants", dest="variants",
                      default="",
                      help="Names of variants to analyze for tagging unreliable tests.")
    parser.add_option("--distros", dest="distros",
                      default="",
                      help="Names of distros to analyze for tagging unreliable tests [UNUSED].")
    parser.add_option("--evergreenYML", dest="evergreen_yml",
                      default="etc/evergreen.yml",
                      help="Evergreen YML file used to get the list of tasks,"
                           " defaults to '%default'.")
    parser.add_option("--lifecycleFile", dest="lifecycle_file",
                      default="etc/test_lifecycle.yml",
                      help="Evergreen lifecycle file to update, defaults to '%default'.")
    parser.add_option("--reliableDays", dest="reliable_days",
                      default=14,
                      type="int",
                      help="Number of days to check for reliable tests, defaults to '%default'.")
    parser.add_option("--unreliableDays", dest="unreliable_days",
                      default=28,
                      type="int",
                      help="Number of days to check for unreliable tests, defaults to '%default'.")
    parser.add_option("--batchGroupSize", dest="batch_size",
                      default=100,
                      type="int",
                      help="Size of test batch group, defaults to '%default'.")

    (options, tests) = parser.parse_args()

    for option in required_options:
        if not getattr(options, option):
            parser.print_help()
            parser.error("Missing required option")

    use_test_tasks_membership = False

    tasks = options.tasks.split(",") if options.tasks else []
    if not tasks:
        # If no tasks are specified, then the list of tasks is all.
        tasks = get_all_tasks(options.evergreen_yml)
        use_test_tasks_membership = True

    variants = options.variants.split(",") if options.variants else []

    distros = options.distros.split(",") if options.distros else []

    check_fail_rates("Test", options.test_fail_rates[0], options.test_fail_rates[1])
    # The less specific failures rates are optional and default to a lower level value.
    if not options.task_fail_rates:
        options.task_fail_rates = options.test_fail_rates
    else:
        check_fail_rates("Task", options.task_fail_rates[0], options.task_fail_rates[1])
    if not options.variant_fail_rates:
        options.variant_fail_rates = options.task_fail_rates
    else:
        check_fail_rates("Variant", options.variant_fail_rates[0], options.variant_fail_rates[1])
    if not options.distro_fail_rates:
        options.distro_fail_rates = options.variant_fail_rates
    else:
        check_fail_rates("Distro", options.distro_fail_rates[0], options.distro_fail_rates[1])

    check_days("Reliable days", options.reliable_days)
    check_days("Unreliable days", options.unreliable_days)

    orig_lifecycle = read_yaml_file(options.lifecycle_file)
    lifecycle = copy.deepcopy(orig_lifecycle)

    test_tasks_membership = get_test_tasks_membership(options.evergreen_yml)
    # If no tests are specified then the list of tests is generated from the list of tasks.
    if not tests:
        tests = get_tests_from_tasks(tasks, test_tasks_membership)
        if not options.tasks:
            use_test_tasks_membership = True

    commit_first, commit_last = git_commit_range_since("{}.days".format(options.unreliable_days))
    commit_prior = git_commit_prior(commit_first)

    # For efficiency purposes, group the tests and process in batches of batch_size.
    test_groups = create_batch_groups(create_test_groups(tests), options.batch_size)

    for tests in test_groups:
        # Find all associated tasks for the test_group if tasks or tests were not specified.
        if use_test_tasks_membership:
            tasks_set = set()
            for test in tests:
                tasks_set = tasks_set.union(test_tasks_membership[test])
            tasks = list(tasks_set)
        if not tasks:
            print("Warning - No tasks found for tests {}, skipping this group.".format(tests))
            continue
        report = tf.HistoryReport(period_type="revision",
                                  start=commit_prior,
                                  end=commit_last,
                                  group_period=options.reliable_days,
                                  project=options.project,
                                  tests=tests,
                                  tasks=tasks,
                                  variants=variants,
                                  distros=distros)
        view_report = report.generate_report()

        # We build up report_combo to check for more specific test failures rates.
        report_combo = []
        # TODO EVG-1665: Uncomment this line once this has been supported.
        # for combo in ["test", "task", "variant", "distro"]:
        for combo in ["test", "task", "variant"]:
            report_combo.append(combo)
            if combo == "distro":
                acceptable_fail_rate = options.distro_fail_rates[0]
                unacceptable_fail_rate = options.distro_fail_rates[1]
            elif combo == "variant":
                acceptable_fail_rate = options.variant_fail_rates[0]
                unacceptable_fail_rate = options.variant_fail_rates[1]
            elif combo == "task":
                acceptable_fail_rate = options.task_fail_rates[0]
                unacceptable_fail_rate = options.task_fail_rates[1]
            else:
                acceptable_fail_rate = options.test_fail_rates[0]
                unacceptable_fail_rate = options.test_fail_rates[1]

            # Unreliable tests are analyzed from the entire period.
            update_lifecycle(lifecycle,
                             view_report.view_summary(group_on=report_combo),
                             unreliable_test,
                             True,
                             unacceptable_fail_rate,
                             options.unreliable_test_min_run)

            # Reliable tests are analyzed from the last period, i.e., last 14 days.
            (reliable_start_date, reliable_end_date) = view_report.last_period()
            update_lifecycle(lifecycle,
                             view_report.view_summary(group_on=report_combo,
                                                      start_date=reliable_start_date,
                                                      end_date=reliable_end_date),
                             reliable_test,
                             False,
                             acceptable_fail_rate,
                             options.reliable_test_min_run)

    # Update the lifecycle_file only if there have been changes.
    if orig_lifecycle != lifecycle:
        write_yaml_file(options.lifecycle_file, lifecycle)

if __name__ == "__main__":
    main()