470 lines
20 KiB
Python
Executable File
470 lines
20 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
"""Test Failures
|
|
|
|
Update etc/test_lifecycle.yml to tag unreliable tests based on historic failure rates.
|
|
"""
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import collections
|
|
import copy
|
|
import fnmatch
|
|
import optparse
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import textwrap
|
|
|
|
import yaml
|
|
|
|
# Get relative imports to work when the package is not installed on the PYTHONPATH.
|
|
if __name__ == "__main__" and __package__ is None:
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
from buildscripts import burn_in_tests
|
|
from buildscripts import resmokelib
|
|
from buildscripts import test_failures as tf
|
|
|
|
|
|
def read_yaml_file(yaml_file):
|
|
"""Opens YAML file and returns YAML structure."""
|
|
with open(yaml_file, "r") as fstream:
|
|
return yaml.load(fstream)
|
|
|
|
|
|
def write_yaml_file(yaml_file, object):
|
|
"""Writes object to yaml_file."""
|
|
|
|
# Setup to preserve order in yaml.dump, see https://stackoverflow.com/a/8661021
|
|
represent_dict_order = lambda self, data: self.represent_mapping(
|
|
'tag:yaml.org,2002:map', data.items())
|
|
yaml.add_representer(collections.OrderedDict, represent_dict_order)
|
|
with open(yaml_file, "w") as fstream:
|
|
comment = ("This file was generated by {} and shouldn't be edited by hand. It was"
|
|
" generated against commit {} with the following invocation: {}.").format(
|
|
sys.argv[0], callo(["git", "rev-parse", "HEAD"]), " ".join(sys.argv))
|
|
print(textwrap.fill(comment, width=100, initial_indent="# ", subsequent_indent="# "),
|
|
file=fstream)
|
|
yaml.dump(object, fstream, default_flow_style=False)
|
|
|
|
|
|
def get_test_suites_membership():
|
|
"""Return a dictionary with keys of all js_tests and list of associated suites."""
|
|
# SERVER-29486 - Non-JS tests can be returned, so we'll filter them here.
|
|
test_membership_map = resmokelib.parser.create_test_membership_map()
|
|
for test in test_membership_map.keys():
|
|
if not resmokelib.utils.is_js_file(test):
|
|
del test_membership_map[test]
|
|
return test_membership_map
|
|
|
|
|
|
def get_suite_tasks_membership(evg_yaml):
|
|
"""Return a dictionary with keys of all suites and list of associated tasks."""
|
|
evg = read_yaml_file(evg_yaml)
|
|
suite_membership = collections.defaultdict(list)
|
|
for task in evg["tasks"]:
|
|
resmoke_args = burn_in_tests.get_resmoke_args(task)
|
|
if resmoke_args:
|
|
m = re.search("--suites=(?P<suite>\w+)", resmoke_args)
|
|
if m is not None:
|
|
suite_membership[m.group("suite")].append(task["name"])
|
|
return suite_membership
|
|
|
|
|
|
def get_test_tasks_membership(evg_yaml):
|
|
"""Return a dictionary with keys of all tests and list of associated tasks."""
|
|
test_suites_membership = get_test_suites_membership()
|
|
suite_tasks_membership = get_suite_tasks_membership(evg_yaml)
|
|
test_tasks_membership = collections.defaultdict(list)
|
|
for test in test_suites_membership.keys():
|
|
for suite in test_suites_membership[test]:
|
|
test_tasks_membership[test].extend(suite_tasks_membership[suite])
|
|
return test_tasks_membership
|
|
|
|
|
|
def get_tests_from_tasks(tasks, test_tasks_membership):
|
|
"""Return a list of tests from list of specified tasks."""
|
|
tests = []
|
|
tasks_set = set(tasks)
|
|
for test in test_tasks_membership.keys():
|
|
if not tasks_set.isdisjoint(test_tasks_membership[test]):
|
|
tests.append(test)
|
|
return tests
|
|
|
|
|
|
def create_test_groups(tests):
|
|
"""Return groups of tests by their directory, i.e., jstests/core."""
|
|
test_groups = collections.defaultdict(list)
|
|
for test in tests:
|
|
test_split = test.split("/")
|
|
# If the test does not have a directory, then ignore it.
|
|
if len(test_split) <= 1:
|
|
continue
|
|
test_dir = test_split[1]
|
|
test_groups[test_dir].append(test)
|
|
return test_groups
|
|
|
|
|
|
def create_batch_groups(test_groups, batch_size):
|
|
"""Return batch groups list of test_groups."""
|
|
batch_groups = []
|
|
for test_group_name in test_groups:
|
|
test_group = test_groups[test_group_name]
|
|
while test_group:
|
|
batch_groups.append(test_group[:batch_size])
|
|
test_group = test_group[batch_size:]
|
|
return batch_groups
|
|
|
|
|
|
def get_all_tasks(evg_yaml):
|
|
"""Returns list of tasks from evg_yaml.
|
|
|
|
Note that tasks can be excluded in 'test_lifecycle_excluded_tasks'.
|
|
"""
|
|
evg = read_yaml_file(evg_yaml)
|
|
all_tasks = [t["name"] for t in evg["tasks"]]
|
|
# The list of excluded tasks may include "Unix shell-style wildcards",
|
|
# i.e., 'compile*', which matches 'compile', 'compile_all'
|
|
excluded_glob_tasks = evg.get("test_lifecycle_excluded_tasks", [])
|
|
excluded_tasks = []
|
|
for excluded_glob_task in excluded_glob_tasks:
|
|
excluded_tasks.extend(fnmatch.filter(all_tasks, excluded_glob_task))
|
|
|
|
return list(set(all_tasks) - set(excluded_tasks))
|
|
|
|
|
|
def callo(args):
|
|
"""Call a program, and capture its output."""
|
|
return subprocess.check_output(args)
|
|
|
|
|
|
def git_commit_range_since(since):
|
|
"""Returns first and last commit in 'since' period specified.
|
|
|
|
Specify 'since' as any acceptable period for git log --since.
|
|
The period can be specified as '4.weeks' or '3.days'.
|
|
"""
|
|
git_command = "git log --since={} --pretty=format:%H".format(since)
|
|
commits = callo(git_command.split()).split("\n")
|
|
return commits[-1], commits[0]
|
|
|
|
|
|
def git_commit_prior(revision):
|
|
"""Returns commit revision prior to one specified."""
|
|
git_format = "git log -2 {revision} --pretty=format:%H"
|
|
git_command = git_format.format(revision=revision)
|
|
commits = callo(git_command.split()).split("\n")
|
|
return commits[-1]
|
|
|
|
|
|
def unreliable_test(test_fr, unacceptable_fr, test_runs, min_run):
|
|
"""Check for an unreliable test.
|
|
|
|
A test should be added to the set of tests believed not to run reliably when it has more
|
|
than min_run executions with a failure percentage greater than unacceptable_fr.
|
|
"""
|
|
return test_runs >= min_run and test_fr > unacceptable_fr
|
|
|
|
|
|
def reliable_test(test_fr, acceptable_fr, test_runs, min_run):
|
|
"""Check for a reliable test.
|
|
|
|
A test should then removed from the set of tests believed not to run reliably when it has
|
|
at least min_run executions with a failure percentage less than acceptable_fr.
|
|
"""
|
|
return test_runs >= min_run and test_fr < acceptable_fr
|
|
|
|
|
|
def check_fail_rates(fr_name, acceptable_fr, unacceptable_fr):
|
|
"""Raise an error if the acceptable_fr > unacceptable_fr."""
|
|
if acceptable_fr > unacceptable_fr:
|
|
raise ValueError("'{}' acceptable failure rate {} must be <= the unacceptable failure rate"
|
|
" {}".format(fr_name, acceptable_fr, unacceptable_fr))
|
|
|
|
|
|
def check_days(name, days):
|
|
"""Raise an error if days < 1."""
|
|
if days < 1:
|
|
raise ValueError("'{}' days must be greater than 0.".format(name))
|
|
|
|
|
|
def js_test_exists(lifecycle, test):
|
|
"""Returns True if test exists in js_test and is not a 'None' value."""
|
|
js_test = lifecycle["selector"].get("js_test")
|
|
if js_test:
|
|
return lifecycle["selector"]["js_test"].get(test) is not None
|
|
return False
|
|
|
|
|
|
def get_lifecycle_test_tags(lifecycle, test):
|
|
"""Returns list of tags for a js_test from lifecycle object."""
|
|
tags = []
|
|
if js_test_exists(lifecycle, test):
|
|
tags = lifecycle["selector"]["js_test"].get(test)
|
|
return tags
|
|
|
|
|
|
def update_lifecycle_test_tags(lifecycle, test, tags):
|
|
"""Returns lifecycle object after creating or updating the tags for 'test'."""
|
|
if not lifecycle["selector"]["js_test"]:
|
|
lifecycle["selector"]["js_test"] = {test: tags}
|
|
else:
|
|
lifecycle["selector"]["js_test"][test] = tags
|
|
return lifecycle
|
|
|
|
|
|
def unreliable_tag(test, task, variant, distro):
|
|
"""Returns the unreliable tag."""
|
|
if distro and variant and task and test:
|
|
return "unreliable|{}|{}|{}".format(task, variant, distro)
|
|
elif variant and task and test:
|
|
return "unreliable|{}|{}".format(task, variant)
|
|
elif task and test:
|
|
return "unreliable|{}".format(task)
|
|
elif test:
|
|
return "unreliable"
|
|
|
|
|
|
def _update_lifecycle(lifecycle, add_tags, test, existing_tags, update_tag):
|
|
"""Updates the lifecycle object.
|
|
|
|
If add_tags is True, creates or updates a test in lifecycle, with update_tag.
|
|
If add_tags is False, removes an empty test (no tags) in lifecycle, or removes the update_tag.
|
|
Returns updated lifecycle YAML.
|
|
"""
|
|
existing_tags_set = set(existing_tags)
|
|
if add_tags:
|
|
existing_tags_set.add(update_tag)
|
|
else:
|
|
existing_tags_set.discard(update_tag)
|
|
if existing_tags_set:
|
|
sorted_tags = sorted(list(existing_tags_set), key=lambda x: x.split("|"))
|
|
lifecycle = update_lifecycle_test_tags(lifecycle, test, sorted_tags)
|
|
elif js_test_exists(lifecycle, test):
|
|
# No tags left, so remove test from lifecycle.
|
|
del lifecycle["selector"]["js_test"][test]
|
|
|
|
|
|
def update_lifecycle(lifecycle, report, method_test, add_tags, fail_rate, min_run):
|
|
"""Updates the lifecycle object based on the test_method.
|
|
|
|
The test_method checks unreliable or reliable fail_rates.
|
|
"""
|
|
for summary in report:
|
|
if method_test(summary.fail_rate,
|
|
fail_rate,
|
|
summary.num_pass + summary.num_fail,
|
|
min_run):
|
|
existing_tags = get_lifecycle_test_tags(lifecycle, summary.test)
|
|
update_tag = unreliable_tag(
|
|
summary.test, summary.task, summary.variant, summary.distro)
|
|
_update_lifecycle(lifecycle, add_tags, summary.test, existing_tags, update_tag)
|
|
|
|
|
|
def main():
|
|
|
|
required_options = ["project",
|
|
"reliable_test_min_run",
|
|
"unreliable_test_min_run",
|
|
"test_fail_rates",
|
|
]
|
|
parser = optparse.OptionParser(description=__doc__,
|
|
usage="Usage: %prog [options] test1 test2 ...")
|
|
parser.add_option("--project", dest="project",
|
|
default=None,
|
|
help="Evergreen project to analyze [REQUIRED].")
|
|
parser.add_option("--reliableTestMinimumRun", dest="reliable_test_min_run",
|
|
default=None,
|
|
type="int",
|
|
help="Minimum number of tests runs for test to be considered as reliable"
|
|
" [REQUIRED].")
|
|
parser.add_option("--unreliableTestMinimumRun", dest="unreliable_test_min_run",
|
|
default=None,
|
|
type="int",
|
|
help="Minimum number of tests runs for test to be considered as unreliable"
|
|
" [REQUIRED].")
|
|
parser.add_option("--testFailRates", dest="test_fail_rates",
|
|
metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
|
|
default=None,
|
|
type="float",
|
|
nargs=2,
|
|
help="Test fail rates: acceptable fail rate and unacceptable fail rate"
|
|
" Specify floating numbers between 0.0 and 1.0 [REQUIRED].")
|
|
parser.add_option("--taskFailRates", dest="task_fail_rates",
|
|
metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
|
|
default=None,
|
|
type="float",
|
|
nargs=2,
|
|
help="Task fail rates: acceptable fail rate and unacceptable fail rate."
|
|
" Specify floating numbers between 0.0 and 1.0."
|
|
" Uses --test-fail-rates if unspecified.")
|
|
parser.add_option("--variantFailRates", dest="variant_fail_rates",
|
|
metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
|
|
default=None,
|
|
type="float",
|
|
nargs=2,
|
|
help="Variant fail rates: acceptable fail rate and unacceptable fail rate."
|
|
" Specify floating numbers between 0.0 and 1.0."
|
|
" Uses --task-fail-rates if unspecified.")
|
|
parser.add_option("--distroFailRates", dest="distro_fail_rates",
|
|
metavar="ACCEPTABLE-FAILRATE UNACCEPTABLE-FAILRATE",
|
|
default=None,
|
|
type="float",
|
|
nargs=2,
|
|
help="Distro fail rates: acceptable fail rate and unacceptable fail rate."
|
|
" Specify floating numbers between 0.0 and 1.0."
|
|
" Uses --variant-fail-rates if unspecified.")
|
|
parser.add_option("--tasks", dest="tasks",
|
|
default=None,
|
|
help="Names of tasks to analyze for tagging unreliable tests."
|
|
" If specified and no tests are specified, then only tests"
|
|
" associated with the tasks will be analyzed."
|
|
" If unspecified and no tests are specified, the list of tasks will be"
|
|
" the non-excluded list of tasks from the file specified by"
|
|
" '--evergreenYML'.")
|
|
parser.add_option("--variants", dest="variants",
|
|
default="",
|
|
help="Names of variants to analyze for tagging unreliable tests.")
|
|
parser.add_option("--distros", dest="distros",
|
|
default="",
|
|
help="Names of distros to analyze for tagging unreliable tests [UNUSED].")
|
|
parser.add_option("--evergreenYML", dest="evergreen_yml",
|
|
default="etc/evergreen.yml",
|
|
help="Evergreen YML file used to get the list of tasks,"
|
|
" defaults to '%default'.")
|
|
parser.add_option("--lifecycleFile", dest="lifecycle_file",
|
|
default="etc/test_lifecycle.yml",
|
|
help="Evergreen lifecycle file to update, defaults to '%default'.")
|
|
parser.add_option("--reliableDays", dest="reliable_days",
|
|
default=14,
|
|
type="int",
|
|
help="Number of days to check for reliable tests, defaults to '%default'.")
|
|
parser.add_option("--unreliableDays", dest="unreliable_days",
|
|
default=28,
|
|
type="int",
|
|
help="Number of days to check for unreliable tests, defaults to '%default'.")
|
|
parser.add_option("--batchGroupSize", dest="batch_size",
|
|
default=100,
|
|
type="int",
|
|
help="Size of test batch group, defaults to '%default'.")
|
|
|
|
(options, tests) = parser.parse_args()
|
|
|
|
for option in required_options:
|
|
if not getattr(options, option):
|
|
parser.print_help()
|
|
parser.error("Missing required option")
|
|
|
|
use_test_tasks_membership = False
|
|
|
|
tasks = options.tasks.split(",") if options.tasks else []
|
|
if not tasks:
|
|
# If no tasks are specified, then the list of tasks is all.
|
|
tasks = get_all_tasks(options.evergreen_yml)
|
|
use_test_tasks_membership = True
|
|
|
|
variants = options.variants.split(",") if options.variants else []
|
|
|
|
distros = options.distros.split(",") if options.distros else []
|
|
|
|
check_fail_rates("Test", options.test_fail_rates[0], options.test_fail_rates[1])
|
|
# The less specific failures rates are optional and default to a lower level value.
|
|
if not options.task_fail_rates:
|
|
options.task_fail_rates = options.test_fail_rates
|
|
else:
|
|
check_fail_rates("Task", options.task_fail_rates[0], options.task_fail_rates[1])
|
|
if not options.variant_fail_rates:
|
|
options.variant_fail_rates = options.task_fail_rates
|
|
else:
|
|
check_fail_rates("Variant", options.variant_fail_rates[0], options.variant_fail_rates[1])
|
|
if not options.distro_fail_rates:
|
|
options.distro_fail_rates = options.variant_fail_rates
|
|
else:
|
|
check_fail_rates("Distro", options.distro_fail_rates[0], options.distro_fail_rates[1])
|
|
|
|
check_days("Reliable days", options.reliable_days)
|
|
check_days("Unreliable days", options.unreliable_days)
|
|
|
|
orig_lifecycle = read_yaml_file(options.lifecycle_file)
|
|
lifecycle = copy.deepcopy(orig_lifecycle)
|
|
|
|
test_tasks_membership = get_test_tasks_membership(options.evergreen_yml)
|
|
# If no tests are specified then the list of tests is generated from the list of tasks.
|
|
if not tests:
|
|
tests = get_tests_from_tasks(tasks, test_tasks_membership)
|
|
if not options.tasks:
|
|
use_test_tasks_membership = True
|
|
|
|
commit_first, commit_last = git_commit_range_since("{}.days".format(options.unreliable_days))
|
|
commit_prior = git_commit_prior(commit_first)
|
|
|
|
# For efficiency purposes, group the tests and process in batches of batch_size.
|
|
test_groups = create_batch_groups(create_test_groups(tests), options.batch_size)
|
|
|
|
for tests in test_groups:
|
|
# Find all associated tasks for the test_group if tasks or tests were not specified.
|
|
if use_test_tasks_membership:
|
|
tasks_set = set()
|
|
for test in tests:
|
|
tasks_set = tasks_set.union(test_tasks_membership[test])
|
|
tasks = list(tasks_set)
|
|
if not tasks:
|
|
print("Warning - No tasks found for tests {}, skipping this group.".format(tests))
|
|
continue
|
|
report = tf.HistoryReport(period_type="revision",
|
|
start=commit_prior,
|
|
end=commit_last,
|
|
group_period=options.reliable_days,
|
|
project=options.project,
|
|
tests=tests,
|
|
tasks=tasks,
|
|
variants=variants,
|
|
distros=distros)
|
|
view_report = report.generate_report()
|
|
|
|
# We build up report_combo to check for more specific test failures rates.
|
|
report_combo = []
|
|
# TODO EVG-1665: Uncomment this line once this has been supported.
|
|
# for combo in ["test", "task", "variant", "distro"]:
|
|
for combo in ["test", "task", "variant"]:
|
|
report_combo.append(combo)
|
|
if combo == "distro":
|
|
acceptable_fail_rate = options.distro_fail_rates[0]
|
|
unacceptable_fail_rate = options.distro_fail_rates[1]
|
|
elif combo == "variant":
|
|
acceptable_fail_rate = options.variant_fail_rates[0]
|
|
unacceptable_fail_rate = options.variant_fail_rates[1]
|
|
elif combo == "task":
|
|
acceptable_fail_rate = options.task_fail_rates[0]
|
|
unacceptable_fail_rate = options.task_fail_rates[1]
|
|
else:
|
|
acceptable_fail_rate = options.test_fail_rates[0]
|
|
unacceptable_fail_rate = options.test_fail_rates[1]
|
|
|
|
# Unreliable tests are analyzed from the entire period.
|
|
update_lifecycle(lifecycle,
|
|
view_report.view_summary(group_on=report_combo),
|
|
unreliable_test,
|
|
True,
|
|
unacceptable_fail_rate,
|
|
options.unreliable_test_min_run)
|
|
|
|
# Reliable tests are analyzed from the last period, i.e., last 14 days.
|
|
(reliable_start_date, reliable_end_date) = view_report.last_period()
|
|
update_lifecycle(lifecycle,
|
|
view_report.view_summary(group_on=report_combo,
|
|
start_date=reliable_start_date,
|
|
end_date=reliable_end_date),
|
|
reliable_test,
|
|
False,
|
|
acceptable_fail_rate,
|
|
options.reliable_test_min_run)
|
|
|
|
# Update the lifecycle_file only if there have been changes.
|
|
if orig_lifecycle != lifecycle:
|
|
write_yaml_file(options.lifecycle_file, lifecycle)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|