Files
mongo/buildscripts/perf_regression_check.py

345 lines
13 KiB
Python

import argparse
import json
import sys
import itertools
from dateutil import parser
from datetime import timedelta, datetime
# Example usage:
# perf_regression_check.py -f history_file.json --rev 18808cd923789a34abd7f13d62e7a73fafd5ce5f
# Loads the history json file, and looks for regressions at the revision 18808cd...
# Will exit with status code 1 if any regression is found, 0 otherwise.
def compareOneResultNoise(this_one, reference, label, threadlevel="max", noiseLevel=0,
noiseMultiple=1, minThreshold=0.05):
'''
Take two result series and compare them to see if they are acceptable.
Return true if failed, and false if pass
Uses historical noise data for the comparison.
'''
failed = False;
if not reference:
return failed
ref = ""
current = ""
noise = 0
if threadlevel == "max":
ref = reference["max"]
current = this_one["max"]
else:
# Don't do a comparison if the thread data is missing
if not threadlevel in reference["results"].keys():
return failed
ref = reference["results"][threadlevel]['ops_per_sec']
current = this_one["results"][threadlevel]['ops_per_sec']
noise = noiseLevel * noiseMultiple
delta = minThreshold * ref
if (delta < noise):
delta = noise
# Do the check
if ref - current >= delta:
print ("\tregression found on %s: drop from %.2f ops/sec (commit %s) to %.2f ops/sec for comparison %s. Diff is"
" %.2f ops/sec (%.2f%%), noise level is %.2f ops/sec and multiple is %.2f" %
(threadlevel, ref, reference["revision"][:5], current, label, ref - current,
100*(ref-current)/ref, noiseLevel, noiseMultiple))
failed = True
return failed
def compareResults(this_one, reference, threshold, label, noiseLevels={}, noiseMultiple=1, threadThreshold=None, threadNoiseMultiple=None):
'''
Take two result series and compare them to see if they are acceptable.
Return true if failed, and false if pass
'''
failed = False;
if not reference:
return failed
# Default threadThreshold to the same as the max threshold
if not threadThreshold:
threadThreshold = threshold
if not threadNoiseMultiple :
threadNoiseMultiple = noiseMultiple
# Check max throughput first
noise = 0
# For the max throughput, use the max noise across the thread levels as the noise parameter
if len(noiseLevels.values()) > 0:
noise = max(noiseLevels.values())
if compareOneResultNoise(this_one, reference, label, "max", noiseLevel=noise,
noiseMultiple=noiseMultiple, minThreshold=threshold):
failed = True;
# Check for regression on threading levels
for (level, ops_per_sec) in (((r, this_one["results"][r]['ops_per_sec']) for r in
this_one["results"] if type(this_one["results"][r]) == type({}))):
noise = 0
if level in noiseLevels:
noise = noiseLevels[level]
if compareOneResultNoise(this_one, reference, label, level, noiseLevel=noise,
noiseMultiple=threadNoiseMultiple, minThreshold=threadThreshold):
failed = True
if not failed:
print "\tno regression against %s and githash %s" %(label, reference["revision"][:5])
return failed
def main(args):
parser = argparse.ArgumentParser()
parser.add_argument("-f", "--file", dest="file", help="path to json file containing"
"history data")
parser.add_argument("-t", "--tagFile", dest="tfile", help="path to json file containing"
"tag data")
parser.add_argument("--rev", dest="rev", help="revision to examine for regressions")
parser.add_argument("--ndays", default=7, type=int, dest="ndays", help="Check against"
"commit from n days ago.")
parser.add_argument("--threshold", default=0.05, type=float, dest="threshold", help=
"Don't flag an error if throughput is less than 'threshold'x100 percent off")
parser.add_argument("--noiseLevel", default=1, type=float, dest="noise", help=
"Don't flag an error if throughput is less than 'noise' times the computed noise level off")
parser.add_argument("--threadThreshold", default=0.1, type=float, dest="threadThreshold", help=
"Don't flag an error if thread level throughput is more than"
"'threadThreshold'x100 percent off")
parser.add_argument("--threadNoiseLevel", default=2, type=float, dest="threadNoise", help=
"Don't flag an error if thread level throughput is less than 'noise' times the computed noise level off")
parser.add_argument("--refTag", dest="reference", help=
"Reference tag to compare against. Should be a valid tag name")
parser.add_argument("--overrideFile", dest="overrideFile", help="File to read for comparison override information")
parser.add_argument("--variant", dest="variant", help="Variant to lookup in the override file")
args = parser.parse_args()
tagHistory = ""
j = get_json(args.file)
if args.tfile :
t = get_json(args.tfile)
tagHistory = History(t)
history = History(j)
testnames = history.testnames()
failed = False
failed = 0
results = []
# Default empty override structure
overrides = {'ndays' : {}, 'reference' : {}}
if args.overrideFile :
# Read the overrides file
foverrides = get_json(args.overrideFile)
# Is this variant in the overrides file?
if args.variant in foverrides :
overrides = foverrides[args.variant]
for test in testnames:
# The first entry is valid. The rest is dummy data to match the existing format
result = {'test_file' : test, 'exit_code' : 0, 'elapsed' : 5, 'start': 1441227291.962453, 'end': 1441227293.428761}
this_one = history.seriesAtRevision(test, args.rev)
testFailed = False
print "checking %s.." % (test)
if not this_one:
print "\tno data at this revision, skipping"
continue
#If the new build is 10% lower than the target (3.0 will be
#used as the baseline for 3.2 for instance), consider it
#regressed.
previous = history.seriesItemsNBefore(test, args.rev, 1)
if not previous:
print "\tno previous data, skipping"
continue
if compareResults(this_one, previous[0], args.threshold, "Previous", history.noiseLevels(test),
args.noise, args.threadThreshold, args.threadNoise):
testFailed = True
result['PreviousCompare'] = 'fail'
else :
result['PreviousCompare'] = 'pass'
daysprevious = history.seriesItemsNDaysBefore(test, args.rev,args.ndays)
if test in overrides['ndays']:
print "Override in ndays for test %s" % test
daysprevious = overrides['ndays'][test]
if compareResults(this_one, daysprevious, args.threshold, "NDays", history.noiseLevels(test),
args.noise, args.threadThreshold, args.threadNoise):
testFailed = True
result['NDayCompare'] = 'fail'
else :
result['NDayCompare'] = 'pass'
if tagHistory :
reference = tagHistory.seriesAtTag(test, args.reference)
if not reference :
print "Didn't get any data for test %s with baseline %s" % (test, args.reference)
if test in overrides['reference']:
print "Override in references for test %s" % test
reference = overrides['reference'][test]
if compareResults(this_one, reference, args.threshold, "Baseline Comparison " + args.reference, history.noiseLevels(test),
args.noise, args.threadThreshold, args.threadNoise):
testFailed = True
result['BaselineCompare'] = 'fail'
else :
result['BaselineCompare'] = 'pass'
if testFailed :
result['status'] = 'fail'
failed += 1
else :
result['status'] = 'pass'
results.append(result)
report = {}
report['failures'] = failed
report['results'] = results
reportFile = open('report.json', 'w')
json.dump(report, reportFile, indent=4, separators=(',', ': '))
if failed > 0 :
sys.exit(1)
else:
sys.exit(0)
# We wouldn't need this function if we had numpy installed on the system
def computeRange(result_list):
'''
Compute the max, min, and range (max - min) for the result list
'''
min = max = result_list[0]
for result in result_list:
if result < min:
min = result
if result > max:
max = result
return (max,min,max-min)
def get_json(filename):
jf = open(filename, 'r')
json_obj = json.load(jf)
return json_obj
class History(object):
def __init__(self, jsonobj):
self._raw = sorted(jsonobj, key=lambda d: d["order"])
self._noise = None
def testnames(self):
return set(list(itertools.chain.from_iterable([[z["name"] for z in c["data"]["results"]]
for c in self._raw])))
def seriesAtRevision(self, testname, revision):
s = self.series(testname)
for result in s:
if result["revision"] == revision:
return result
return None
def seriesAtTag(self, testname, tagName):
s = self.series(testname)
for result in s:
if result["tag"] == tagName:
return result
return None
def seriesItemsNBefore(self, testname, revision, n):
"""
Returns the 'n' items in the series under the given test name that
appear prior to the specified revision.
"""
results = []
found = False
s = self.series(testname)
for result in s:
if result["revision"] == revision:
found = True
break
results.append(result)
if found:
return results[-1*n:]
return []
def computeNoiseLevels(self):
"""
For each test, go through all results, and compute the average
noise (max - min) for the series
"""
self._noise = {}
testnames = self.testnames()
for test in testnames:
self._noise[test] = {}
s = self.series(test)
threads = []
for result in s:
threads = result["threads"]
break
# Determine levels from last commit? Probably a better way to do this.
for thread in threads:
s = self.series(test)
self._noise[test][thread] = sum((computeRange(x["results"][thread]["ops_per_sec_values"])[2]
for x in s))
s = self.series(test)
self._noise[test][thread] /= sum(1 for x in s)
def noiseLevels(self, testname):
"""
Returns the average noise level of the given test. Noise levels
are thread specific. Returns an array
"""
# check if noise has been computed. Compute if it hasn't
if not self._noise:
print "Computing noise levels"
self.computeNoiseLevels()
# Look up noise value for test
if not testname in self._noise:
print "Test %s not in self._noise" % (testname)
return self._noise[testname]
def seriesItemsNDaysBefore(self, testname, revision, n):
"""
Returns the items in the series under the given test name that
appear 'n' days prior to the specified revision.
"""
results = {}
# Date for this revision
s = self.seriesAtRevision(testname, revision)
if s==[]:
return []
refdate = parser.parse(s["end"]) - timedelta(days=n)
s = self.series(testname)
for result in s:
if parser.parse(result["end"]) < refdate:
results = result
return results
def series(self, testname):
for commit in self._raw:
# get a copy of the samples for those whose name matches the given testname
matching = filter( lambda x: x["name"]==testname, commit["data"]["results"])
if matching:
result = matching[0]
result["revision"] = commit["revision"]
result["tag"] = commit["tag"]
result["end"] = commit["data"]["end"]
result["order"] = commit["order"]
result["max"] = max(f["ops_per_sec"] for f in result["results"].values()
if type(f) == type({}))
result["threads"] = [f for f in result["results"] if type(result["results"][f])
== type({})]
yield result
class TestResult:
def __init__(self, json):
self._raw = json
#def max(self):
if __name__ == '__main__':
main(sys.argv[1:])