Files
mongo/buildscripts/update_test_lifecycle.py

470 lines
20 KiB
Python
Executable File

#!/usr/bin/env python
"""Test Failures
Update etc/test_lifecycle.yml to tag unreliable tests based on historic failure rates.
"""
from __future__ import division
from __future__ import print_function
import collections
import copy
import fnmatch
import optparse
import os
import re
import subprocess
import sys
import textwrap
import yaml
# Get relative imports to work when the package is not installed on the PYTHONPATH.
if __name__ == "__main__" and __package__ is None:
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from buildscripts import burn_in_tests
from buildscripts import resmokelib
from buildscripts import test_failures as tf
def read_yaml_file(yaml_file):
"""Opens YAML file and returns YAML structure."""
with open(yaml_file, "r") as fstream:
return yaml.load(fstream)
def write_yaml_file(yaml_file, object):
"""Writes object to yaml_file."""
# Setup to preserve order in yaml.dump, see https://stackoverflow.com/a/8661021
represent_dict_order = lambda self, data: self.represent_mapping(
'tag:yaml.org,2002:map', data.items())
yaml.add_representer(collections.OrderedDict, represent_dict_order)
with open(yaml_file, "w") as fstream:
comment = ("This file was generated by {} and shouldn't be edited by hand. It was"
" generated against commit {} with the following invocation: {}.").format(
sys.argv[0], callo(["git", "rev-parse", "HEAD"]), " ".join(sys.argv))
print(textwrap.fill(comment, width=100, initial_indent="# ", subsequent_indent="# "),
file=fstream)
yaml.dump(object, fstream, default_flow_style=False)
def get_test_suites_membership():
"""Return a dictionary with keys of all js_tests and list of associated suites."""
# SERVER-29486 - Non-JS tests can be returned, so we'll filter them here.
test_membership_map = resmokelib.parser.create_test_membership_map()
for test in test_membership_map.keys():
if not resmokelib.utils.is_js_file(test):
del test_membership_map[test]
return test_membership_map
def get_suite_tasks_membership(evg_yaml):
"""Return a dictionary with keys of all suites and list of associated tasks."""
evg = read_yaml_file(evg_yaml)
suite_membership = collections.defaultdict(list)
for task in evg["tasks"]:
resmoke_args = burn_in_tests.get_resmoke_args(task)
if resmoke_args:
m = re.search("--suites=(?P<suite>\w+)", resmoke_args)
if m is not None:
suite_membership[m.group("suite")].append(task["name"])
return suite_membership
def get_test_tasks_membership(evg_yaml):
"""Return a dictionary with keys of all tests and list of associated tasks."""
test_suites_membership = get_test_suites_membership()
suite_tasks_membership = get_suite_tasks_membership(evg_yaml)
test_tasks_membership = collections.defaultdict(list)
for test in test_suites_membership.keys():
for suite in test_suites_membership[test]:
test_tasks_membership[test].extend(suite_tasks_membership[suite])
return test_tasks_membership
def get_tests_from_tasks(tasks, test_tasks_membership):
"""Return a list of tests from list of specified tasks."""
tests = []
tasks_set = set(tasks)
for test in test_tasks_membership.keys():
if not tasks_set.isdisjoint(test_tasks_membership[test]):
tests.append(test)
return tests
def create_test_groups(tests):
"""Return groups of tests by their directory, i.e., jstests/core."""
test_groups = collections.defaultdict(list)
for test in tests:
test_split = test.split("/")
# If the test does not have a directory, then ignore it.
if len(test_split) <= 1:
continue
test_dir = test_split[1]
test_groups[test_dir].append(test)
return test_groups
def create_batch_groups(test_groups, batch_size):
"""Return batch groups list of test_groups."""
batch_groups = []
for test_group_name in test_groups:
test_group = test_groups[test_group_name]
while test_group:
batch_groups.append(test_group[:batch_size])
test_group = test_group[batch_size:]
return batch_groups
def get_all_tasks(evg_yaml):
"""Returns list of tasks from evg_yaml.
Note that tasks can be excluded in 'test_lifecycle_excluded_tasks'.
"""
evg = read_yaml_file(evg_yaml)
all_tasks = [t["name"] for t in evg["tasks"]]
# The list of excluded tasks may include "Unix shell-style wildcards",
# i.e., 'compile*', which matches 'compile', 'compile_all'
excluded_glob_tasks = evg.get("test_lifecycle_excluded_tasks", [])
excluded_tasks = []
for excluded_glob_task in excluded_glob_tasks:
excluded_tasks.extend(fnmatch.filter(all_tasks, excluded_glob_task))
return list(set(all_tasks) - set(excluded_tasks))
def callo(args):
"""Call a program, and capture its output."""
return subprocess.check_output(args)
def git_commit_range_since(since):
"""Returns first and last commit in 'since' period specified.
Specify 'since' as any acceptable period for git log --since.
The period can be specified as '4.weeks' or '3.days'.
"""
git_command = "git log --since={} --pretty=format:%H".format(since)
commits = callo(git_command.split()).split("\n")
return commits[-1], commits[0]
def git_commit_prior(revision):
"""Returns commit revision prior to one specified."""
git_format = "git log -2 {revision} --pretty=format:%H"
git_command = git_format.format(revision=revision)
commits = callo(git_command.split()).split("\n")
return commits[-1]
def unreliable_test(test_fr, unacceptable_fr, test_runs, min_run):
"""Check for an unreliable test.
A test should be added to the set of tests believed not to run reliably when it has more
than min_run executions with a failure percentage greater than unacceptable_fr.
"""
return test_runs >= min_run and test_fr > unacceptable_fr
def reliable_test(test_fr, acceptable_fr, test_runs, min_run):
"""Check for a reliable test.
A test should then removed from the set of tests believed not to run reliably when it has
at least min_run executions with a failure percentage less than acceptable_fr.
"""
return test_runs >= min_run and test_fr < acceptable_fr
def check_fail_rates(fr_name, acceptable_fr, unacceptable_fr):
"""Raise an error if the acceptable_fr > unacceptable_fr."""
if acceptable_fr > unacceptable_fr:
raise ValueError("'{}' acceptable failure rate {} must be <= the unacceptable failure rate"
" {}".format(fr_name, acceptable_fr, unacceptable_fr))
def check_days(name, days):
"""Raise an error if days < 1."""
if days < 1:
raise ValueError("'{}' days must be greater than 0.".format(name))
def js_test_exists(lifecycle, test):
"""Returns True if test exists in js_test and is not a 'None' value."""
js_test = lifecycle["selector"].get("js_test")
if js_test:
return lifecycle["selector"]["js_test"].get(test) is not None
return False
def get_lifecycle_test_tags(lifecycle, test):
"""Returns list of tags for a js_test from lifecycle object."""
tags = []
if js_test_exists(lifecycle, test):
tags = lifecycle["selector"]["js_test"].get(test)
return tags
def update_lifecycle_test_tags(lifecycle, test, tags):
"""Returns lifecycle object after creating or updating the tags for 'test'."""
if not lifecycle["selector"]["js_test"]:
lifecycle["selector"]["js_test"] = {test: tags}
else:
lifecycle["selector"]["js_test"][test] = tags
return lifecycle
def unreliable_tag(test, task, variant, distro):
"""Returns the unreliable tag."""
if distro and variant and task and test:
return "unreliable|{}|{}|{}".format(task, variant, distro)
elif variant and task and test:
return "unreliable|{}|{}".format(task, variant)
elif task and test:
return "unreliable|{}".format(task)
elif test:
return "unreliable"
def _update_lifecycle(lifecycle, add_tags, test, existing_tags, update_tag):
"""Updates the lifecycle object.
If add_tags is True, creates or updates a test in lifecycle, with update_tag.
If add_tags is False, removes an empty test (no tags) in lifecycle, or removes the update_tag.
Returns updated lifecycle YAML.
"""
existing_tags_set = set(existing_tags)
if add_tags:
existing_tags_set.add(update_tag)
else:
existing_tags_set.discard(update_tag)
if existing_tags_set:
sorted_tags = sorted(list(existing_tags_set), key=lambda x: x.split("|"))
lifecycle = update_lifecycle_test_tags(lifecycle, test, sorted_tags)
elif js_test_exists(lifecycle, test):
# No tags left, so remove test from lifecycle.
del lifecycle["selector"]["js_test"][test]
def update_lifecycle(lifecycle, report, method_test, add_tags, fail_rate, min_run):
"""Updates the lifecycle object based on the test_method.
The test_method checks unreliable or reliable fail_rates.
"""
for summary in report:
if method_test(summary.fail_rate,
fail_rate,
summary.num_pass + summary.num_fail,
min_run):
existing_tags = get_lifecycle_test_tags(lifecycle, summary.test)
update_tag = unreliable_tag(
summary.test, summary.task, summary.variant, summary.distro)
_update_lifecycle(lifecycle, add_tags, summary.test, existing_tags, update_tag)
def main():
required_options = ["project",
"reliable_test_min_run",
"unreliable_test_min_run",
"test_fail_rates",
]
parser = optparse.OptionParser(description=__doc__,
usage="Usage: %prog [options] test1 test2 ...")
parser.add_option("--project", dest="project",
default=None,
help="Evergreen project to analyze [REQUIRED].")
parser.add_option("--reliableTestMinimumRun", dest="reliable_test_min_run",
default=None,
type="int",
help="Minimum number of tests runs for test to be considered as reliable"
" [REQUIRED].")
parser.add_option("--unreliableTestMinimumRun", dest="unreliable_test_min_run",
default=None,
type="int",
help="Minimum number of tests runs for test to be considered as unreliable"
" [REQUIRED].")
parser.add_option("--testFailRates", dest="test_fail_rates",
metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
default=None,
type="float",
nargs=2,
help="Test fail rates: acceptable fail rate and unacceptable fail rate"
" Specify floating numbers between 0.0 and 1.0 [REQUIRED].")
parser.add_option("--taskFailRates", dest="task_fail_rates",
metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
default=None,
type="float",
nargs=2,
help="Task fail rates: acceptable fail rate and unacceptable fail rate."
" Specify floating numbers between 0.0 and 1.0."
" Uses --test-fail-rates if unspecified.")
parser.add_option("--variantFailRates", dest="variant_fail_rates",
metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
default=None,
type="float",
nargs=2,
help="Variant fail rates: acceptable fail rate and unacceptable fail rate."
" Specify floating numbers between 0.0 and 1.0."
" Uses --task-fail-rates if unspecified.")
parser.add_option("--distroFailRates", dest="distro_fail_rates",
metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
default=None,
type="float",
nargs=2,
help="Distro fail rates: acceptable fail rate and unacceptable fail rate."
" Specify floating numbers between 0.0 and 1.0."
" Uses --variant-fail-rates if unspecified.")
parser.add_option("--tasks", dest="tasks",
default=None,
help="Names of tasks to analyze for tagging unreliable tests."
" If specified and no tests are specified, then only tests"
" associated with the tasks will be analyzed."
" If unspecified and no tests are specified, the list of tasks will be"
" the non-excluded list of tasks from the file specified by"
" '--evergreenYML'.")
parser.add_option("--variants", dest="variants",
default="",
help="Names of variants to analyze for tagging unreliable tests.")
parser.add_option("--distros", dest="distros",
default="",
help="Names of distros to analyze for tagging unreliable tests [UNUSED].")
parser.add_option("--evergreenYML", dest="evergreen_yml",
default="etc/evergreen.yml",
help="Evergreen YML file used to get the list of tasks,"
" defaults to '%default'.")
parser.add_option("--lifecycleFile", dest="lifecycle_file",
default="etc/test_lifecycle.yml",
help="Evergreen lifecycle file to update, defaults to '%default'.")
parser.add_option("--reliableDays", dest="reliable_days",
default=14,
type="int",
help="Number of days to check for reliable tests, defaults to '%default'.")
parser.add_option("--unreliableDays", dest="unreliable_days",
default=28,
type="int",
help="Number of days to check for unreliable tests, defaults to '%default'.")
parser.add_option("--batchGroupSize", dest="batch_size",
default=100,
type="int",
help="Size of test batch group, defaults to '%default'.")
(options, tests) = parser.parse_args()
for option in required_options:
if not getattr(options, option):
parser.print_help()
parser.error("Missing required option")
use_test_tasks_membership = False
tasks = options.tasks.split(",") if options.tasks else []
if not tasks:
# If no tasks are specified, then the list of tasks is all.
tasks = get_all_tasks(options.evergreen_yml)
use_test_tasks_membership = True
variants = options.variants.split(",") if options.variants else []
distros = options.distros.split(",") if options.distros else []
check_fail_rates("Test", options.test_fail_rates[0], options.test_fail_rates[1])
# The less specific failures rates are optional and default to a lower level value.
if not options.task_fail_rates:
options.task_fail_rates = options.test_fail_rates
else:
check_fail_rates("Task", options.task_fail_rates[0], options.task_fail_rates[1])
if not options.variant_fail_rates:
options.variant_fail_rates = options.task_fail_rates
else:
check_fail_rates("Variant", options.variant_fail_rates[0], options.variant_fail_rates[1])
if not options.distro_fail_rates:
options.distro_fail_rates = options.variant_fail_rates
else:
check_fail_rates("Distro", options.distro_fail_rates[0], options.distro_fail_rates[1])
check_days("Reliable days", options.reliable_days)
check_days("Unreliable days", options.unreliable_days)
orig_lifecycle = read_yaml_file(options.lifecycle_file)
lifecycle = copy.deepcopy(orig_lifecycle)
test_tasks_membership = get_test_tasks_membership(options.evergreen_yml)
# If no tests are specified then the list of tests is generated from the list of tasks.
if not tests:
tests = get_tests_from_tasks(tasks, test_tasks_membership)
if not options.tasks:
use_test_tasks_membership = True
commit_first, commit_last = git_commit_range_since("{}.days".format(options.unreliable_days))
commit_prior = git_commit_prior(commit_first)
# For efficiency purposes, group the tests and process in batches of batch_size.
test_groups = create_batch_groups(create_test_groups(tests), options.batch_size)
for tests in test_groups:
# Find all associated tasks for the test_group if tasks or tests were not specified.
if use_test_tasks_membership:
tasks_set = set()
for test in tests:
tasks_set = tasks_set.union(test_tasks_membership[test])
tasks = list(tasks_set)
if not tasks:
print("Warning - No tasks found for tests {}, skipping this group.".format(tests))
continue
report = tf.HistoryReport(period_type="revision",
start=commit_prior,
end=commit_last,
group_period=options.reliable_days,
project=options.project,
tests=tests,
tasks=tasks,
variants=variants,
distros=distros)
view_report = report.generate_report()
# We build up report_combo to check for more specific test failures rates.
report_combo = []
# TODO EVG-1665: Uncomment this line once this has been supported.
# for combo in ["test", "task", "variant", "distro"]:
for combo in ["test", "task", "variant"]:
report_combo.append(combo)
if combo == "distro":
acceptable_fail_rate = options.distro_fail_rates[0]
unacceptable_fail_rate = options.distro_fail_rates[1]
elif combo == "variant":
acceptable_fail_rate = options.variant_fail_rates[0]
unacceptable_fail_rate = options.variant_fail_rates[1]
elif combo == "task":
acceptable_fail_rate = options.task_fail_rates[0]
unacceptable_fail_rate = options.task_fail_rates[1]
else:
acceptable_fail_rate = options.test_fail_rates[0]
unacceptable_fail_rate = options.test_fail_rates[1]
# Unreliable tests are analyzed from the entire period.
update_lifecycle(lifecycle,
view_report.view_summary(group_on=report_combo),
unreliable_test,
True,
unacceptable_fail_rate,
options.unreliable_test_min_run)
# Reliable tests are analyzed from the last period, i.e., last 14 days.
(reliable_start_date, reliable_end_date) = view_report.last_period()
update_lifecycle(lifecycle,
view_report.view_summary(group_on=report_combo,
start_date=reliable_start_date,
end_date=reliable_end_date),
reliable_test,
False,
acceptable_fail_rate,
options.reliable_test_min_run)
# Update the lifecycle_file only if there have been changes.
if orig_lifecycle != lifecycle:
write_yaml_file(options.lifecycle_file, lifecycle)
if __name__ == "__main__":
main()