You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
nowinandroid/.github/scripts/step_fit.py

110 lines
3.8 KiB

import argparse
import json
import math
import sys
from pathlib import Path
# ----------- CONFIG -----------
BENCHMARK_NAME = "startupPrecompiledWithBaselineProfile"
METRIC_KEY = "timeToInitialDisplayMs"
# ------------------------------
def step_fit(a, b):
def sum_squared_error(values):
avg = sum(values) / len(values)
return sum((v - avg) ** 2 for v in values)
if not a or not b:
return 0.0
total_squared_error = sum_squared_error(a) + sum_squared_error(b)
step_error = math.sqrt(total_squared_error) / (len(a) + len(b))
if step_error == 0.0:
return 0.0
return (sum(a) / len(a) - sum(b) / len(b)) / step_error
def extract_median_from_files(paths):
medians = []
for path in paths:
with open(path, "r") as f:
data = json.load(f)
found = False
for bench in data.get("benchmarks", []):
if bench.get("name") == BENCHMARK_NAME:
metrics = bench.get("metrics", {})
metric = metrics.get(METRIC_KEY, {})
medians.append(metric.get("median"))
found = True
if not found:
raise ValueError(f"Metric not found in {path}")
return medians
def main():
parser = argparse.ArgumentParser(prog='Comperator', description='Compare between multiple macrobenchmark test results')
parser.add_argument('baseline_dir', help='Baseline macrobenchmark reports directory')
parser.add_argument('candidate_dir', help='Candidate macrobenchmark reports directory')
args = parser.parse_args()
baseline_dir = Path(args.baseline_dir)
candidate_dir = Path(args.candidate_dir)
baseline_files = sorted(baseline_dir.glob("*.json"))
candidate_files = sorted(candidate_dir.glob("*.json"))
if len(baseline_files) <= 0:
print('ERR: baseline has no macrobenchmark results', file=sys.stderr)
exit(1)
if len(candidate_files) <= 0:
print('ERR: candidate has no macrobenchmark results', file=sys.stderr)
exit(1)
min_len = min(len(baseline_files), len(candidate_files))
if len(baseline_files) != len(candidate_files):
print(f"WARN: Length mismatch, using first {min_len} samples. baseline: {len(baseline_files)}, candidate: {len(candidate_files)}")
print('Macrobenchmark Result Mapping:')
print('| Index | Baseline | Candidate |')
print('--------------------------------')
mismatch_count = 0
for i in range(min_len):
baseline_filename = baseline_files[i].name.upper()
candidate_filename = candidate_files[i].name.upper()
if baseline_filename != candidate_filename:
mismatch_count += 1
print('* ', end='')
print(f'{i + 1} {baseline_files[i]} <-> {candidate_files[i]}')
print('--------------------------------')
print(f'# Match : {min_len - mismatch_count}')
print(f'# Mismatch: {mismatch_count}')
if mismatch_count > 0:
print("WARN: filename mapping mismatch detected. Output prediction may be incorrect")
baseline_medians = extract_median_from_files(baseline_files[:min_len])
candidate_medians = extract_median_from_files(candidate_files[:min_len])
assert (len(baseline_medians) == len(candidate_medians))
result = step_fit(baseline_medians, candidate_medians)
print("\n-----------------------------")
print(f"Baseline medians : {baseline_medians}")
print(f"Candidate medians: {candidate_medians}")
print(f"Step Fit Result: {result:.4f}")
print("-----------------------------")
if abs(result) <= 25:
print("➡️ Difference is within noise range (low confidence of real regression)")
elif result > 0:
print("⚠️ v2 is slower than v1 (possible regression)")
else:
print("🚀 v2 is faster than v1 (possible improvement)")
if __name__ == "__main__":
main()