You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
110 lines
3.8 KiB
110 lines
3.8 KiB
import argparse
|
|
import json
|
|
import math
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# ----------- CONFIG -----------
|
|
BENCHMARK_NAME = "startupPrecompiledWithBaselineProfile"
|
|
METRIC_KEY = "timeToInitialDisplayMs"
|
|
# ------------------------------
|
|
|
|
def step_fit(a, b):
|
|
def sum_squared_error(values):
|
|
avg = sum(values) / len(values)
|
|
return sum((v - avg) ** 2 for v in values)
|
|
|
|
if not a or not b:
|
|
return 0.0
|
|
|
|
total_squared_error = sum_squared_error(a) + sum_squared_error(b)
|
|
step_error = math.sqrt(total_squared_error) / (len(a) + len(b))
|
|
if step_error == 0.0:
|
|
return 0.0
|
|
|
|
return (sum(a) / len(a) - sum(b) / len(b)) / step_error
|
|
|
|
def extract_median_from_files(paths):
|
|
medians = []
|
|
|
|
for path in paths:
|
|
with open(path, "r") as f:
|
|
data = json.load(f)
|
|
|
|
found = False
|
|
for bench in data.get("benchmarks", []):
|
|
if bench.get("name") == BENCHMARK_NAME:
|
|
metrics = bench.get("metrics", {})
|
|
metric = metrics.get(METRIC_KEY, {})
|
|
medians.append(metric.get("median"))
|
|
found = True
|
|
|
|
if not found:
|
|
raise ValueError(f"Metric not found in {path}")
|
|
|
|
return medians
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(prog='Comperator', description='Compare between multiple macrobenchmark test results')
|
|
parser.add_argument('baseline_dir', help='Baseline macrobenchmark reports directory')
|
|
parser.add_argument('candidate_dir', help='Candidate macrobenchmark reports directory')
|
|
args = parser.parse_args()
|
|
|
|
baseline_dir = Path(args.baseline_dir)
|
|
candidate_dir = Path(args.candidate_dir)
|
|
baseline_files = sorted(baseline_dir.glob("*.json"))
|
|
candidate_files = sorted(candidate_dir.glob("*.json"))
|
|
|
|
if len(baseline_files) <= 0:
|
|
print('ERR: baseline has no macrobenchmark results', file=sys.stderr)
|
|
exit(1)
|
|
|
|
if len(candidate_files) <= 0:
|
|
print('ERR: candidate has no macrobenchmark results', file=sys.stderr)
|
|
exit(1)
|
|
|
|
min_len = min(len(baseline_files), len(candidate_files))
|
|
if len(baseline_files) != len(candidate_files):
|
|
print(f"WARN: Length mismatch, using first {min_len} samples. baseline: {len(baseline_files)}, candidate: {len(candidate_files)}")
|
|
|
|
print('Macrobenchmark Result Mapping:')
|
|
print('| Index | Baseline | Candidate |')
|
|
print('--------------------------------')
|
|
|
|
mismatch_count = 0
|
|
for i in range(min_len):
|
|
baseline_filename = baseline_files[i].name.upper()
|
|
candidate_filename = candidate_files[i].name.upper()
|
|
if baseline_filename != candidate_filename:
|
|
mismatch_count += 1
|
|
print('* ', end='')
|
|
print(f'{i + 1} {baseline_files[i]} <-> {candidate_files[i]}')
|
|
|
|
print('--------------------------------')
|
|
print(f'# Match : {min_len - mismatch_count}')
|
|
print(f'# Mismatch: {mismatch_count}')
|
|
if mismatch_count > 0:
|
|
print("WARN: filename mapping mismatch detected. Output prediction may be incorrect")
|
|
|
|
baseline_medians = extract_median_from_files(baseline_files[:min_len])
|
|
candidate_medians = extract_median_from_files(candidate_files[:min_len])
|
|
assert (len(baseline_medians) == len(candidate_medians))
|
|
|
|
result = step_fit(baseline_medians, candidate_medians)
|
|
|
|
print("\n-----------------------------")
|
|
print(f"Baseline medians : {baseline_medians}")
|
|
print(f"Candidate medians: {candidate_medians}")
|
|
print(f"Step Fit Result: {result:.4f}")
|
|
print("-----------------------------")
|
|
|
|
if abs(result) <= 25:
|
|
print("➡️ Difference is within noise range (low confidence of real regression)")
|
|
elif result > 0:
|
|
print("⚠️ v2 is slower than v1 (possible regression)")
|
|
else:
|
|
print("🚀 v2 is faster than v1 (possible improvement)")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|