345 lines
13 KiB
Python
345 lines
13 KiB
Python
import argparse
|
|
import json
|
|
import sys
|
|
import itertools
|
|
from dateutil import parser
|
|
from datetime import timedelta, datetime
|
|
|
|
# Example usage:
|
|
# perf_regression_check.py -f history_file.json --rev 18808cd923789a34abd7f13d62e7a73fafd5ce5f
|
|
# Loads the history json file, and looks for regressions at the revision 18808cd...
|
|
# Will exit with status code 1 if any regression is found, 0 otherwise.
|
|
|
|
def compareOneResultNoise(this_one, reference, label, threadlevel="max", noiseLevel=0,
|
|
noiseMultiple=1, minThreshold=0.05):
|
|
'''
|
|
Take two result series and compare them to see if they are acceptable.
|
|
Return true if failed, and false if pass
|
|
Uses historical noise data for the comparison.
|
|
|
|
'''
|
|
failed = False;
|
|
if not reference:
|
|
return failed
|
|
|
|
ref = ""
|
|
current = ""
|
|
noise = 0
|
|
|
|
if threadlevel == "max":
|
|
ref = reference["max"]
|
|
current = this_one["max"]
|
|
else:
|
|
# Don't do a comparison if the thread data is missing
|
|
if not threadlevel in reference["results"].keys():
|
|
return failed
|
|
ref = reference["results"][threadlevel]['ops_per_sec']
|
|
current = this_one["results"][threadlevel]['ops_per_sec']
|
|
|
|
noise = noiseLevel * noiseMultiple
|
|
delta = minThreshold * ref
|
|
if (delta < noise):
|
|
delta = noise
|
|
# Do the check
|
|
if ref - current >= delta:
|
|
print ("\tregression found on %s: drop from %.2f ops/sec (commit %s) to %.2f ops/sec for comparison %s. Diff is"
|
|
" %.2f ops/sec (%.2f%%), noise level is %.2f ops/sec and multiple is %.2f" %
|
|
(threadlevel, ref, reference["revision"][:5], current, label, ref - current,
|
|
100*(ref-current)/ref, noiseLevel, noiseMultiple))
|
|
failed = True
|
|
return failed
|
|
|
|
|
|
def compareResults(this_one, reference, threshold, label, noiseLevels={}, noiseMultiple=1, threadThreshold=None, threadNoiseMultiple=None):
|
|
'''
|
|
Take two result series and compare them to see if they are acceptable.
|
|
Return true if failed, and false if pass
|
|
'''
|
|
|
|
failed = False;
|
|
if not reference:
|
|
return failed
|
|
# Default threadThreshold to the same as the max threshold
|
|
if not threadThreshold:
|
|
threadThreshold = threshold
|
|
if not threadNoiseMultiple :
|
|
threadNoiseMultiple = noiseMultiple
|
|
|
|
# Check max throughput first
|
|
noise = 0
|
|
# For the max throughput, use the max noise across the thread levels as the noise parameter
|
|
if len(noiseLevels.values()) > 0:
|
|
noise = max(noiseLevels.values())
|
|
if compareOneResultNoise(this_one, reference, label, "max", noiseLevel=noise,
|
|
noiseMultiple=noiseMultiple, minThreshold=threshold):
|
|
failed = True;
|
|
# Check for regression on threading levels
|
|
for (level, ops_per_sec) in (((r, this_one["results"][r]['ops_per_sec']) for r in
|
|
this_one["results"] if type(this_one["results"][r]) == type({}))):
|
|
noise = 0
|
|
if level in noiseLevels:
|
|
noise = noiseLevels[level]
|
|
if compareOneResultNoise(this_one, reference, label, level, noiseLevel=noise,
|
|
noiseMultiple=threadNoiseMultiple, minThreshold=threadThreshold):
|
|
failed = True
|
|
if not failed:
|
|
print "\tno regression against %s and githash %s" %(label, reference["revision"][:5])
|
|
return failed
|
|
|
|
|
|
|
|
def main(args):
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("-f", "--file", dest="file", help="path to json file containing"
|
|
"history data")
|
|
parser.add_argument("-t", "--tagFile", dest="tfile", help="path to json file containing"
|
|
"tag data")
|
|
parser.add_argument("--rev", dest="rev", help="revision to examine for regressions")
|
|
parser.add_argument("--ndays", default=7, type=int, dest="ndays", help="Check against"
|
|
"commit from n days ago.")
|
|
parser.add_argument("--threshold", default=0.05, type=float, dest="threshold", help=
|
|
"Don't flag an error if throughput is less than 'threshold'x100 percent off")
|
|
parser.add_argument("--noiseLevel", default=1, type=float, dest="noise", help=
|
|
"Don't flag an error if throughput is less than 'noise' times the computed noise level off")
|
|
parser.add_argument("--threadThreshold", default=0.1, type=float, dest="threadThreshold", help=
|
|
"Don't flag an error if thread level throughput is more than"
|
|
"'threadThreshold'x100 percent off")
|
|
parser.add_argument("--threadNoiseLevel", default=2, type=float, dest="threadNoise", help=
|
|
"Don't flag an error if thread level throughput is less than 'noise' times the computed noise level off")
|
|
parser.add_argument("--refTag", dest="reference", help=
|
|
"Reference tag to compare against. Should be a valid tag name")
|
|
parser.add_argument("--overrideFile", dest="overrideFile", help="File to read for comparison override information")
|
|
parser.add_argument("--variant", dest="variant", help="Variant to lookup in the override file")
|
|
|
|
args = parser.parse_args()
|
|
tagHistory = ""
|
|
j = get_json(args.file)
|
|
if args.tfile :
|
|
t = get_json(args.tfile)
|
|
tagHistory = History(t)
|
|
history = History(j)
|
|
testnames = history.testnames()
|
|
failed = False
|
|
failed = 0
|
|
|
|
results = []
|
|
# Default empty override structure
|
|
overrides = {'ndays' : {}, 'reference' : {}}
|
|
if args.overrideFile :
|
|
# Read the overrides file
|
|
foverrides = get_json(args.overrideFile)
|
|
# Is this variant in the overrides file?
|
|
if args.variant in foverrides :
|
|
overrides = foverrides[args.variant]
|
|
|
|
for test in testnames:
|
|
# The first entry is valid. The rest is dummy data to match the existing format
|
|
result = {'test_file' : test, 'exit_code' : 0, 'elapsed' : 5, 'start': 1441227291.962453, 'end': 1441227293.428761}
|
|
this_one = history.seriesAtRevision(test, args.rev)
|
|
testFailed = False
|
|
print "checking %s.." % (test)
|
|
if not this_one:
|
|
print "\tno data at this revision, skipping"
|
|
continue
|
|
|
|
#If the new build is 10% lower than the target (3.0 will be
|
|
#used as the baseline for 3.2 for instance), consider it
|
|
#regressed.
|
|
previous = history.seriesItemsNBefore(test, args.rev, 1)
|
|
if not previous:
|
|
print "\tno previous data, skipping"
|
|
continue
|
|
if compareResults(this_one, previous[0], args.threshold, "Previous", history.noiseLevels(test),
|
|
args.noise, args.threadThreshold, args.threadNoise):
|
|
testFailed = True
|
|
result['PreviousCompare'] = 'fail'
|
|
else :
|
|
result['PreviousCompare'] = 'pass'
|
|
|
|
daysprevious = history.seriesItemsNDaysBefore(test, args.rev,args.ndays)
|
|
if test in overrides['ndays']:
|
|
print "Override in ndays for test %s" % test
|
|
daysprevious = overrides['ndays'][test]
|
|
if compareResults(this_one, daysprevious, args.threshold, "NDays", history.noiseLevels(test),
|
|
args.noise, args.threadThreshold, args.threadNoise):
|
|
testFailed = True
|
|
result['NDayCompare'] = 'fail'
|
|
else :
|
|
result['NDayCompare'] = 'pass'
|
|
if tagHistory :
|
|
reference = tagHistory.seriesAtTag(test, args.reference)
|
|
if not reference :
|
|
print "Didn't get any data for test %s with baseline %s" % (test, args.reference)
|
|
if test in overrides['reference']:
|
|
print "Override in references for test %s" % test
|
|
reference = overrides['reference'][test]
|
|
if compareResults(this_one, reference, args.threshold, "Baseline Comparison " + args.reference, history.noiseLevels(test),
|
|
args.noise, args.threadThreshold, args.threadNoise):
|
|
testFailed = True
|
|
result['BaselineCompare'] = 'fail'
|
|
else :
|
|
result['BaselineCompare'] = 'pass'
|
|
if testFailed :
|
|
result['status'] = 'fail'
|
|
failed += 1
|
|
else :
|
|
result['status'] = 'pass'
|
|
results.append(result)
|
|
|
|
report = {}
|
|
report['failures'] = failed
|
|
report['results'] = results
|
|
|
|
reportFile = open('report.json', 'w')
|
|
json.dump(report, reportFile, indent=4, separators=(',', ': '))
|
|
if failed > 0 :
|
|
sys.exit(1)
|
|
else:
|
|
sys.exit(0)
|
|
|
|
# We wouldn't need this function if we had numpy installed on the system
|
|
def computeRange(result_list):
|
|
'''
|
|
Compute the max, min, and range (max - min) for the result list
|
|
'''
|
|
min = max = result_list[0]
|
|
for result in result_list:
|
|
if result < min:
|
|
min = result
|
|
if result > max:
|
|
max = result
|
|
return (max,min,max-min)
|
|
|
|
def get_json(filename):
|
|
jf = open(filename, 'r')
|
|
json_obj = json.load(jf)
|
|
return json_obj
|
|
|
|
class History(object):
|
|
def __init__(self, jsonobj):
|
|
self._raw = sorted(jsonobj, key=lambda d: d["order"])
|
|
self._noise = None
|
|
|
|
def testnames(self):
|
|
return set(list(itertools.chain.from_iterable([[z["name"] for z in c["data"]["results"]]
|
|
for c in self._raw])))
|
|
|
|
def seriesAtRevision(self, testname, revision):
|
|
s = self.series(testname)
|
|
for result in s:
|
|
if result["revision"] == revision:
|
|
return result
|
|
return None
|
|
|
|
def seriesAtTag(self, testname, tagName):
|
|
s = self.series(testname)
|
|
for result in s:
|
|
if result["tag"] == tagName:
|
|
return result
|
|
return None
|
|
|
|
def seriesItemsNBefore(self, testname, revision, n):
|
|
"""
|
|
Returns the 'n' items in the series under the given test name that
|
|
appear prior to the specified revision.
|
|
"""
|
|
results = []
|
|
found = False
|
|
s = self.series(testname)
|
|
for result in s:
|
|
if result["revision"] == revision:
|
|
found = True
|
|
break
|
|
results.append(result)
|
|
|
|
if found:
|
|
return results[-1*n:]
|
|
return []
|
|
|
|
def computeNoiseLevels(self):
|
|
"""
|
|
For each test, go through all results, and compute the average
|
|
noise (max - min) for the series
|
|
|
|
"""
|
|
self._noise = {}
|
|
testnames = self.testnames()
|
|
for test in testnames:
|
|
self._noise[test] = {}
|
|
s = self.series(test)
|
|
threads = []
|
|
for result in s:
|
|
threads = result["threads"]
|
|
break
|
|
|
|
# Determine levels from last commit? Probably a better way to do this.
|
|
for thread in threads:
|
|
s = self.series(test)
|
|
self._noise[test][thread] = sum((computeRange(x["results"][thread]["ops_per_sec_values"])[2]
|
|
for x in s))
|
|
s = self.series(test)
|
|
self._noise[test][thread] /= sum(1 for x in s)
|
|
|
|
|
|
def noiseLevels(self, testname):
|
|
"""
|
|
Returns the average noise level of the given test. Noise levels
|
|
are thread specific. Returns an array
|
|
|
|
"""
|
|
# check if noise has been computed. Compute if it hasn't
|
|
if not self._noise:
|
|
print "Computing noise levels"
|
|
self.computeNoiseLevels()
|
|
# Look up noise value for test
|
|
if not testname in self._noise:
|
|
print "Test %s not in self._noise" % (testname)
|
|
return self._noise[testname]
|
|
|
|
|
|
def seriesItemsNDaysBefore(self, testname, revision, n):
|
|
"""
|
|
Returns the items in the series under the given test name that
|
|
appear 'n' days prior to the specified revision.
|
|
"""
|
|
results = {}
|
|
# Date for this revision
|
|
s = self.seriesAtRevision(testname, revision)
|
|
if s==[]:
|
|
return []
|
|
refdate = parser.parse(s["create_time"]) - timedelta(days=n)
|
|
|
|
s = self.series(testname)
|
|
for result in s:
|
|
if parser.parse(result["create_time"]) < refdate:
|
|
results = result
|
|
return results
|
|
|
|
|
|
|
|
def series(self, testname):
|
|
for commit in self._raw:
|
|
# get a copy of the samples for those whose name matches the given testname
|
|
matching = filter( lambda x: x["name"]==testname, commit["data"]["results"])
|
|
if matching:
|
|
result = matching[0]
|
|
result["revision"] = commit["revision"]
|
|
result["tag"] = commit["tag"]
|
|
result["create_time"] = commit["create_time"]
|
|
result["order"] = commit["order"]
|
|
result["max"] = max(f["ops_per_sec"] for f in result["results"].values()
|
|
if type(f) == type({}))
|
|
result["threads"] = [f for f in result["results"] if type(result["results"][f])
|
|
== type({})]
|
|
yield result
|
|
|
|
|
|
class TestResult:
|
|
def __init__(self, json):
|
|
self._raw = json
|
|
|
|
#def max(self):
|
|
|
|
if __name__ == '__main__':
|
|
main(sys.argv[1:])
|