Files
mongo/buildscripts/resmokelib/testing/hooks/combine_benchmark_results.py
2019-04-08 14:08:49 -04:00

217 lines
7.2 KiB
Python

"""Module for generating the test results file fed into the perf plugin."""
import collections
import datetime
import json
from buildscripts.resmokelib import config as _config
from buildscripts.resmokelib.testing.hooks import interface
class CombineBenchmarkResults(interface.Hook):
"""CombineBenchmarkResults class.
The CombineBenchmarkResults hook combines test results from
individual benchmark files to a single file. This is useful for
generating the json file to feed into the Evergreen performance
visualization plugin.
"""
DESCRIPTION = "Combine JSON results from individual benchmarks"
def __init__(self, hook_logger, fixture):
"""Initialize CombineBenchmarkResults."""
interface.Hook.__init__(self, hook_logger, fixture, CombineBenchmarkResults.DESCRIPTION)
self.report_file = _config.PERF_REPORT_FILE
# Reports grouped by name without thread.
self.benchmark_reports = {}
self.create_time = None
self.end_time = None
@staticmethod
def _strftime(time):
return time.strftime("%Y-%m-%dT%H:%M:%SZ")
def after_test(self, test, test_report):
"""Update test report."""
if self.report_file is None:
return
bm_report_path = test.report_name()
with open(bm_report_path, "r") as report_file:
report_dict = json.load(report_file)
self._parse_report(report_dict)
def before_suite(self, test_report):
"""Set suite start time."""
self.create_time = datetime.datetime.now()
def after_suite(self, test_report):
"""Update test report."""
if self.report_file is None:
return
self.end_time = datetime.datetime.now()
report = self._generate_perf_plugin_report()
with open(self.report_file, "w") as fh:
json.dump(report, fh)
def _generate_perf_plugin_report(self):
"""Format the data to look like a perf plugin report."""
perf_report = {
"start": self._strftime(self.create_time),
"end": self._strftime(self.end_time),
"errors": [], # There are no errors if we have gotten this far.
"results": []
}
for name, report in list(self.benchmark_reports.items()):
test_report = {
"name": name, "context": report.context._asdict(),
"results": report.generate_perf_plugin_dict()
}
perf_report["results"].append(test_report)
return perf_report
def _parse_report(self, report_dict):
context = report_dict["context"]
for benchmark_res in report_dict["benchmarks"]:
bm_name_obj = _BenchmarkThreadsReport.parse_bm_name(benchmark_res["name"])
# Don't show Benchmark's included statistics to prevent cluttering up the graph.
if bm_name_obj.statistic_type is not None:
continue
if bm_name_obj.base_name not in self.benchmark_reports:
self.benchmark_reports[bm_name_obj.base_name] = _BenchmarkThreadsReport(context)
self.benchmark_reports[bm_name_obj.base_name].add_report(bm_name_obj, benchmark_res)
# Capture information from a Benchmark name in a logical format.
_BenchmarkName = collections.namedtuple("_BenchmarkName",
["base_name", "thread_count", "statistic_type"])
class _BenchmarkThreadsReport(object):
"""_BenchmarkThreadsReport class.
Class representation of a report for all thread levels of a single
benchmark test. Each report is designed to correspond to one graph
in the Evergreen perf plugin.
A raw Benchmark report looks like the following:
{
"context": {
"date": "2015/03/17-18:40:25",
"execuable": "./build/opt/mongo/db/concurrency/lock_manager_bm"
"num_cpus": 40,
"mhz_per_cpu": 2801,
"cpu_scaling_enabled": false,
"caches": [
],
"library_build_type": "debug"
},
"benchmarks": [
{
"name": "BM_SetInsert/arg name:1024/threads:10",
"iterations": 21393,
"real_time": 32724,
"cpu_time": 33355,
"bytes_per_second": 1199226,
"items_per_second": 299807
}
]
}
"""
CONTEXT_FIELDS = [
"date", "cpu_scaling_enabled", "num_cpus", "mhz_per_cpu", "library_build_type",
"executable", "caches"
]
Context = collections.namedtuple("Context", CONTEXT_FIELDS) # type: ignore
def __init__(self, context_dict):
self.context = self.Context(**context_dict)
# list of benchmark runs for each thread.
self.thread_benchmark_map = collections.defaultdict(list)
def add_report(self, bm_name_obj, report):
"""Add to report."""
self.thread_benchmark_map[bm_name_obj.thread_count].append(report)
def generate_perf_plugin_dict(self):
"""Generate perf plugin data points of the following format.
"1": {
"error_values": [
0,
0,
0
],
"ops_per_sec": 9552.108279243452,
"ops_per_sec_values": [
9574.812658450564,
9522.642340821469,
9536.252775275878
]
},
"""
res = {}
for thread_count, reports in list(self.thread_benchmark_map.items()):
thread_report = {
"error_values": [0 for _ in range(len(reports))],
"ops_per_sec_values": [] # This is actually storing latency per op, not ops/s
}
# Take the negative of the latency numbers to preserve the higher is better semantics.
for report in reports:
thread_report["ops_per_sec_values"].append(-1 * report["cpu_time"])
thread_report["ops_per_sec"] = sum(thread_report["ops_per_sec_values"]) / len(reports)
res[thread_count] = thread_report
return res
@staticmethod
def parse_bm_name(name_str):
"""
Split the benchmark name into base_name, thread_count and statistic_type.
The base name is the benchmark name minus the thread count and any statistics.
Testcases of the same group will be shown on a single perf graph.
name_str look like the following:
"BM_SetInsert/arg name:1024/threads:10_mean"
"BM_SetInsert/arg 1/arg 2"
"BM_SetInsert_mean"
"""
base_name = None
thread_count = None
statistic_type = None
# Step 1: get the statistic type.
if name_str.count("_") == 2: # There is statistics.
statistic_type = name_str.rsplit("_", 1)[-1]
# Remove the statistic type suffix from the name.
name_str = name_str[:-len(statistic_type) - 1]
# Step 2: Get the thread count and name.
thread_section = name_str.rsplit("/", 1)[-1]
if thread_section.startswith("threads:"):
base_name = name_str.rsplit("/", 1)[0]
thread_count = thread_section.split(":")[-1]
else: # There is no explicit thread count, so the thread count is 1.
thread_count = "1"
base_name = name_str
return _BenchmarkName(base_name, thread_count, statistic_type)