feat: testing ways to mitigate variability

These ways are: alternate benchmarking and the step fit algorithm, I also removed the comments for emulator screen recording, and I now only store the JSON files that we care about without the other perfetto traces and text files
4 months ago · 9e0861d7c2
parent 8134706afc
commit 9e0861d7c2
2 changed files with 118 additions and 15 deletions
--- a/.github/scripts/step_fit.py
+++ b/.github/scripts/step_fit.py
@ -0,0 +1,72 @@
+import json
+import math
+import glob
+import os
+
+# ----------- CONFIG -----------
+RESULTS_DIR = "benchmarks/build/outputs/connected_android_test_additional_output/collected"
+BENCHMARK_NAME = "startupPrecompiledWithBaselineProfile"
+METRIC_KEY = "timeToInitialDisplayMs"
+# ------------------------------
+
+def sum_squared_error(values):
+    avg = sum(values) / len(values)
+    return sum((v - avg) ** 2 for v in values)
+
+def step_fit(before, after):
+    total_squared_error = sum_squared_error(before) + sum_squared_error(after)
+    step_error = math.sqrt(total_squared_error) / (len(before) + len(after))
+    if step_error == 0.0:
+        return 0.0
+    return (sum(before) / len(before) - sum(after) / len(after)) / step_error
+
+def extract_median_from_file(path):
+    with open(path, "r") as f:
+        data = json.load(f)
+    for bench in data.get("benchmarks", []):
+        if bench.get("name") == BENCHMARK_NAME:
+            metrics = bench.get("metrics", {})
+            metric = metrics.get(METRIC_KEY, {})
+            return metric.get("median")
+    raise ValueError(f"Metric not found in {path}")
+
+def main():
+    before = []
+    after = []
+
+    json_files = sorted(glob.glob(os.path.join(RESULTS_DIR, "*.json")))
+
+    if len(json_files) == 0:
+        raise RuntimeError("No JSON files found.")
+
+    for path in json_files:
+        median = extract_median_from_file(path)
+        filename = os.path.basename(path).lower()
+        if "v1" in filename:
+            before.append(median)
+        elif "v2" in filename:
+            after.append(median)
+        else:
+            print(f"Skipping file with unknown label: {filename}")
+        print(f"{filename}: median={median:.3f} ms")
+
+    if len(before) != 5 or len(after) != 5:
+        raise RuntimeError(f"Expected 5 runs each, got v1={len(before)}, v2={len(after)}")
+
+    result = step_fit(before, after)
+
+    print("\n-----------------------------")
+    print(f"v1 medians: {before}")
+    print(f"v2 medians: {after}")
+    print(f"Step Fit Result: {result:.4f}")
+    print("-----------------------------")
+
+    if abs(result) <= 25:
+        print("➡️ Difference is within noise range (low confidence of real regression)")
+    elif result > 0:
+        print("⚠️ v2 is slower than v1 (possible regression)")
+    else:
+        print("🚀 v2 is faster than v1 (possible improvement)")
+
+if __name__ == "__main__":
+    main()
--- a/.github/workflows/Build.yaml
+++ b/.github/workflows/Build.yaml
@ -38,7 +38,7 @@ jobs:
      - name: Build Benchmark APKs
        run: ./gradlew :app:assembleDemoBenchmark :benchmarks:assembleDemoBenchmark

-      - name: Run Benchmarks & Record Video
+      - name: Run Benchmarks
        uses: reactivecircus/android-emulator-runner@v2
        with:
          api-level: 30
@ -48,23 +48,54 @@ jobs:
          emulator-options: -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -memory 4096
          disable-animations: true 
          script: |
-            # 1. Start recording
-            # adb shell screenrecord --time-limit 180 /sdcard/benchmark_video.mp4 &
-            
-            # 2. Run ONLY the Startup tests
-            ./gradlew :benchmarks:connectedDemoBenchmarkAndroidTest -Pandroid.testInstrumentationRunnerArguments.class=com.google.samples.apps.nowinandroid.startup.StartupBenchmark -Pandroid.testInstrumentationRunnerArguments.androidx.benchmark.suppressErrors=EMULATOR || true
-
-            # 3. Pull the video (This will run even if the tests above fail)
-            # echo "Pulling video file..."
-            # adb shell pkill -2 screenrecord || true
-            # sleep 5
-            # adb pull /sdcard/benchmark_video.mp4 benchmark_video.mp4
+            set -e
+
+            OUTPUT_DIR="benchmarks/build/outputs/connected_android_test_additional_output"
+            COLLECTED_DIR="$OUTPUT_DIR/collected"
+            mkdir -p "$COLLECTED_DIR"
+
+            run_benchmark () {
+              VERSION_LABEL=$1   # v1 or v2
+              RUN_NUMBER=$2      # 1..5
+
+              echo "=============================="
+              echo "Running benchmark for $VERSION_LABEL run $RUN_NUMBER"
+              echo "=============================="
+
+              # Clear app data to keep runs consistent
+              adb shell pm clear com.google.samples.apps.nowinandroid || true
+
+              # Run only the Startup benchmark
+              ./gradlew :benchmarks:connectedDemoBenchmarkAndroidTest \
+                -Pandroid.testInstrumentationRunnerArguments.class=com.google.samples.apps.nowinandroid.startup.StartupBenchmark \
+                -Pandroid.testInstrumentationRunnerArguments.androidx.benchmark.suppressErrors=EMULATOR
+
+              # Find the newest JSON result file
+              LATEST_JSON=$(find "$OUTPUT_DIR" -name "*.json" -type f -printf "%T@ %p\n" | sort -nr | head -n1 | cut -d' ' -f2-)
+
+              if [ -z "$LATEST_JSON" ]; then
+                echo "Error: No benchmark JSON file found"
+                exit 1
+              fi
+
+              NEW_NAME="$COLLECTED_DIR/benchmark_${VERSION_LABEL}_run${RUN_NUMBER}.json"
+              cp "$LATEST_JSON" "$NEW_NAME"
+
+              echo "Saved result to $NEW_NAME"
+            }
+
+            # Alternate runs: v1, v2, v1, v2 ...
+            for i in 1 2 3 4 5; do
+              run_benchmark "v1" "$i"
+              run_benchmark "v2" "$i"
+            done
+      
+      - name: Run step fit
+        run: python3 .github/scripts/step_fit.py

      - name: Upload Artifacts
        if: always()
        uses: actions/upload-artifact@v4
        with:
          name: benchmark-results
-          path: |
-            # benchmark_video.mp4
-            benchmarks/build/outputs/connected_android_test_additional_output/
+          path: benchmarks/build/outputs/connected_android_test_additional_output/collected/