diff --git a/.github/scripts/step_fit.py b/.github/scripts/step_fit.py new file mode 100644 index 000000000..48f062df0 --- /dev/null +++ b/.github/scripts/step_fit.py @@ -0,0 +1,72 @@ +import json +import math +import glob +import os + +# ----------- CONFIG ----------- +RESULTS_DIR = "benchmarks/build/outputs/connected_android_test_additional_output/collected" +BENCHMARK_NAME = "startupPrecompiledWithBaselineProfile" +METRIC_KEY = "timeToInitialDisplayMs" +# ------------------------------ + +def sum_squared_error(values): + avg = sum(values) / len(values) + return sum((v - avg) ** 2 for v in values) + +def step_fit(before, after): + total_squared_error = sum_squared_error(before) + sum_squared_error(after) + step_error = math.sqrt(total_squared_error) / (len(before) + len(after)) + if step_error == 0.0: + return 0.0 + return (sum(before) / len(before) - sum(after) / len(after)) / step_error + +def extract_median_from_file(path): + with open(path, "r") as f: + data = json.load(f) + for bench in data.get("benchmarks", []): + if bench.get("name") == BENCHMARK_NAME: + metrics = bench.get("metrics", {}) + metric = metrics.get(METRIC_KEY, {}) + return metric.get("median") + raise ValueError(f"Metric not found in {path}") + +def main(): + before = [] + after = [] + + json_files = sorted(glob.glob(os.path.join(RESULTS_DIR, "*.json"))) + + if len(json_files) == 0: + raise RuntimeError("No JSON files found.") + + for path in json_files: + median = extract_median_from_file(path) + filename = os.path.basename(path).lower() + if "v1" in filename: + before.append(median) + elif "v2" in filename: + after.append(median) + else: + print(f"Skipping file with unknown label: {filename}") + print(f"{filename}: median={median:.3f} ms") + + if len(before) != 5 or len(after) != 5: + raise RuntimeError(f"Expected 5 runs each, got v1={len(before)}, v2={len(after)}") + + result = step_fit(before, after) + + print("\n-----------------------------") + print(f"v1 medians: {before}") + print(f"v2 medians: {after}") + print(f"Step Fit Result: {result:.4f}") + print("-----------------------------") + + if abs(result) <= 25: + print("➡️ Difference is within noise range (low confidence of real regression)") + elif result > 0: + print("⚠️ v2 is slower than v1 (possible regression)") + else: + print("🚀 v2 is faster than v1 (possible improvement)") + +if __name__ == "__main__": + main() diff --git a/.github/workflows/Build.yaml b/.github/workflows/Build.yaml index 94d252ead..c3b81c731 100644 --- a/.github/workflows/Build.yaml +++ b/.github/workflows/Build.yaml @@ -38,7 +38,7 @@ jobs: - name: Build Benchmark APKs run: ./gradlew :app:assembleDemoBenchmark :benchmarks:assembleDemoBenchmark - - name: Run Benchmarks & Record Video + - name: Run Benchmarks uses: reactivecircus/android-emulator-runner@v2 with: api-level: 30 @@ -48,23 +48,54 @@ jobs: emulator-options: -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -memory 4096 disable-animations: true script: | - # 1. Start recording - # adb shell screenrecord --time-limit 180 /sdcard/benchmark_video.mp4 & - - # 2. Run ONLY the Startup tests - ./gradlew :benchmarks:connectedDemoBenchmarkAndroidTest -Pandroid.testInstrumentationRunnerArguments.class=com.google.samples.apps.nowinandroid.startup.StartupBenchmark -Pandroid.testInstrumentationRunnerArguments.androidx.benchmark.suppressErrors=EMULATOR || true - - # 3. Pull the video (This will run even if the tests above fail) - # echo "Pulling video file..." - # adb shell pkill -2 screenrecord || true - # sleep 5 - # adb pull /sdcard/benchmark_video.mp4 benchmark_video.mp4 + set -e + + OUTPUT_DIR="benchmarks/build/outputs/connected_android_test_additional_output" + COLLECTED_DIR="$OUTPUT_DIR/collected" + mkdir -p "$COLLECTED_DIR" + + run_benchmark () { + VERSION_LABEL=$1 # v1 or v2 + RUN_NUMBER=$2 # 1..5 + + echo "==============================" + echo "Running benchmark for $VERSION_LABEL run $RUN_NUMBER" + echo "==============================" + + # Clear app data to keep runs consistent + adb shell pm clear com.google.samples.apps.nowinandroid || true + + # Run only the Startup benchmark + ./gradlew :benchmarks:connectedDemoBenchmarkAndroidTest \ + -Pandroid.testInstrumentationRunnerArguments.class=com.google.samples.apps.nowinandroid.startup.StartupBenchmark \ + -Pandroid.testInstrumentationRunnerArguments.androidx.benchmark.suppressErrors=EMULATOR + + # Find the newest JSON result file + LATEST_JSON=$(find "$OUTPUT_DIR" -name "*.json" -type f -printf "%T@ %p\n" | sort -nr | head -n1 | cut -d' ' -f2-) + + if [ -z "$LATEST_JSON" ]; then + echo "Error: No benchmark JSON file found" + exit 1 + fi + + NEW_NAME="$COLLECTED_DIR/benchmark_${VERSION_LABEL}_run${RUN_NUMBER}.json" + cp "$LATEST_JSON" "$NEW_NAME" + + echo "Saved result to $NEW_NAME" + } + + # Alternate runs: v1, v2, v1, v2 ... + for i in 1 2 3 4 5; do + run_benchmark "v1" "$i" + run_benchmark "v2" "$i" + done + + - name: Run step fit + run: python3 .github/scripts/step_fit.py - name: Upload Artifacts if: always() uses: actions/upload-artifact@v4 with: name: benchmark-results - path: | - # benchmark_video.mp4 - benchmarks/build/outputs/connected_android_test_additional_output/ \ No newline at end of file + path: benchmarks/build/outputs/connected_android_test_additional_output/collected/ \ No newline at end of file