From 650ada81e8f0acc871e183a5bbc1dd60118813cb Mon Sep 17 00:00:00 2001 From: Tony Medhat Date: Wed, 18 Feb 2026 17:36:56 +0200 Subject: [PATCH] build: migrate to the latest benchcomp, benchmark.sh, and workflow versions --- .github/scripts/benchmark.sh | 151 +++++++++++++++++++++++++ .github/scripts/run_macrobenchmarks.sh | 92 --------------- .github/scripts/step_fit.py | 111 ------------------ .github/workflows/Build.yaml | 16 +-- 4 files changed, 160 insertions(+), 210 deletions(-) create mode 100644 .github/scripts/benchmark.sh delete mode 100644 .github/scripts/run_macrobenchmarks.sh delete mode 100644 .github/scripts/step_fit.py diff --git a/.github/scripts/benchmark.sh b/.github/scripts/benchmark.sh new file mode 100644 index 000000000..cd75d5c35 --- /dev/null +++ b/.github/scripts/benchmark.sh @@ -0,0 +1,151 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Configuration & Defaults +NUMBER_OF_RUNS=1 +PATH_APK_BASELINE="" +PATH_APK_CANDIDATE="" +PATH_APK_BENCHMARK="" +INSTRUMENT_PASSTHROUGH_ARGS=() +OUTPUT_DIR="./macrobenchmark_results" +TEST_RUNNER="androidx.test.runner.AndroidJUnitRunner" +EMULATOR_BENCHMARK_RESULT_DIR="/sdcard/Download" + +# Cleanup +TEMP_DIR="$(mktemp -d)" +trap 'rm -rf "${TEMP_DIR}"' EXIT + +die() { + echo "err: $*" 1>&2 + exit 1 +} + +print_usage() { + cat << EOF +Usage: $(basename "$0") [OPTIONS] --baseline_apk --candidate_apk --benchmark_apk [-- INSTRUMENT_ARGS] + +Automated benchmark script for APKs. + +Options: + -o, --output-dir Directory where benchmark results will be saved. (Default: "${OUTPUT_DIR}") + --baseline-apk Path to the baseline APK file. + --candidate-apk Path to the candidate APK file. + --benchmark-apk Path to the benchmark APK. Must contain instrumented tests. + -n, --runs Set number of runs per benchmark. (Default: 1) + -h, --help Display this help message and exit. + +Additional Arguments: + -- Everything after '--' is passed directly to + the adb instrumentation command. + +Example: + $(basename "$0") -o ./macrobenchmark_results --baseline_apk base.apk --candidate_apk candidate.apk -- -e androidx.benchmark.profiling.mode none +EOF +} + +get_pkg_name() { + local apk="${1}" + apkanalyzer manifest application-id "${apk}" +} + +install_apk() { + local apk="${1}" + echo "Installing APK: ${apk}" + adb install -d "${apk}" > /dev/null || die "failed to install apk '${apk}'" + adb shell pm clear "${APP_PKG_NAME}" > /dev/null 2>&1 || true + adb shell pm clear "${BENCHMARK_PKG_NAME}" > /dev/null 2>&1 || true + adb shell "rm -rf ${EMULATOR_BENCHMARK_RESULT_DIR} && mkdir -p ${EMULATOR_BENCHMARK_RESULT_DIR}" > /dev/null || true +} + +run_benchmark() { + echo "Running benchmarks..." + adb shell am instrument -w \ + -e androidx.benchmark.suppressErrors EMULATOR \ + -e androidx.benchmark.profiling.mode none \ + -e no-isolated-storage true \ + -e additionalTestOutputDir "${EMULATOR_BENCHMARK_RESULT_DIR}" \ + "${INSTRUMENT_PASSTHROUGH_ARGS[@]}" \ + "${BENCHMARK_PKG_NAME}/$TEST_RUNNER" +} + +write_benchmark_result() { + local dest_path="${1}" + local pull_temp="${TEMP_DIR}/pull_$(date +%s)" + adb pull "${EMULATOR_BENCHMARK_RESULT_DIR}/." "${pull_temp}" > /dev/null + mkdir -p $(dirname "${dest_path}") && mv "${pull_temp}/"*.json "${dest_path}" +} + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) + print_usage + exit 0 + ;; + -o|--output-dir) + OUTPUT_DIR="$2" + shift 2 + ;; + --baseline-apk) + PATH_APK_BASELINE="$2" + shift 2 + ;; + --candidate-apk) + PATH_APK_CANDIDATE="$2" + shift 2 + ;; + --benchmark-apk) + PATH_APK_BENCHMARK="$2" + shift 2 + ;; + -n|--runs) + NUMBER_OF_RUNS="$2" + if ! [[ "$NUMBER_OF_RUNS" -eq "$NUMBER_OF_RUNS" ]] 2> /dev/null; then + print_usage + exit 1 + fi + shift 2 + ;; + --) + shift + INSTRUMENT_PASSTHROUGH_ARGS+=("$@") + break + ;; + *) + echo "$(basename "$0"): invalid option -- '$1'" + echo "Try '$(basename "$0") --help' for more information" + exit 1 + ;; + esac +done + +if [[ -z "${PATH_APK_BASELINE}" || -z "${PATH_APK_CANDIDATE}" || -z "${PATH_APK_BENCHMARK}" ]]; then + print_usage + exit 1 +fi + +APP_PKG_NAME=$(get_pkg_name "${PATH_APK_BASELINE}") +BENCHMARK_PKG_NAME=$(get_pkg_name "${PATH_APK_BENCHMARK}") + +install_apk "${PATH_APK_BENCHMARK}" +for ((i=1; i<=${NUMBER_OF_RUNS}; i++)); do + echo "--- Starting benchmark run (${i} / ${NUMBER_OF_RUNS}) ---" + + start_time=$SECONDS + output_filename="${BENCHMARK_PKG_NAME}_$(date +"%Y-%m-%dT%H-%M-%S").json" + + # Baseline + install_apk "${PATH_APK_BASELINE}" + run_benchmark + write_benchmark_result "${OUTPUT_DIR}/baseline/${output_filename}" + + # Candidate + install_apk "${PATH_APK_CANDIDATE}" + run_benchmark + write_benchmark_result "${OUTPUT_DIR}/candidate/${output_filename}" + + duration=$((SECONDS - start_time)) + echo "--- Ending benchmark run (${i} / ${NUMBER_OF_RUNS}) took ${duration}s ---" +done + +echo "Benchmark completed. Results in '$OUTPUT_DIR'" diff --git a/.github/scripts/run_macrobenchmarks.sh b/.github/scripts/run_macrobenchmarks.sh deleted file mode 100644 index e15984a67..000000000 --- a/.github/scripts/run_macrobenchmarks.sh +++ /dev/null @@ -1,92 +0,0 @@ -#!/usr/bin/env bash - -set -euo pipefail - -# TODO: pass number of runs as a commandline argument -NUMBER_OF_RUNS=2 - -APP_PKG="com.google.samples.apps.nowinandroid" -BENCHMARK_PKG="com.google.samples.apps.nowinandroid.benchmarks" -TEST_RUNNER="androidx.test.runner.AndroidJUnitRunner" -EMULATOR_BENCHMARK_RESULT_DIR="/sdcard/Download" - -PATH_APK_BASELINE="${1:-}" -PATH_APK_CANDIDATE="${2:-}" -OUTPUT_DIR="${3:-./macrobenchmark_results}" - -TEMP_DIR="$(mktemp -d)" -trap 'rm -rf "${TEMP_DIR}"' EXIT - -install_apk() { - local apk_path="${1}" - - adb install -r "${apk_path}" - - adb shell pm clear "$APP_PKG" || true - adb shell pm clear "${BENCHMARK_PKG}" || true - adb shell rm -rf "${EMULATOR_BENCHMARK_RESULT_DIR}" || true - adb shell mkdir -p "${EMULATOR_BENCHMARK_RESULT_DIR}" -} - -run_benchmark() { - adb shell am instrument -w \ - -e class com.google.samples.apps.nowinandroid.startup.StartupBenchmark#startupPrecompiledWithBaselineProfile \ - -e androidx.benchmark.suppressErrors EMULATOR \ - -e androidx.benchmark.profiling.mode none \ - -e no-isolated-storage true \ - -e additionalTestOutputDir "${EMULATOR_BENCHMARK_RESULT_DIR}" \ - "$BENCHMARK_PKG/$TEST_RUNNER" -} - -write_benchmark_result() { - local output_path="${1}" - - adb pull "${EMULATOR_BENCHMARK_RESULT_DIR}/." "${TEMP_DIR}/pull_out/" - - mv "${TEMP_DIR}/pull_out/"*.json "${output_path}" - rm -rf "${TEMP_DIR}/pull_out/" -} - -if [[ -z "${PATH_APK_BASELINE}" || -z "${PATH_APK_CANDIDATE}" ]]; then - echo "Usage: $0 [output_dir]" - exit 1 -fi - -mkdir -p "${OUTPUT_DIR}/baseline" "${OUTPUT_DIR}/candidate" - -# Alternate runs: v1, v2, v1, v2 ... -for ((i=1; i<=${NUMBER_OF_RUNS}; i++)); do - start_time=$(date +%s) - - timestamp=$(date +"%Y-%m-%dT%H-%M-%S") - output_filename="${BENCHMARK_PKG}_${timestamp}.json" - baseline_output_path="${OUTPUT_DIR}/baseline/${output_filename}" - candidate_output_path="${OUTPUT_DIR}/candidate/${output_filename}" - - echo "==============================" - echo "Start iteration (${i} / ${NUMBER_OF_RUNS})" - echo "==============================" - - echo "Starting Baseline Benchmark:" - echo " >> APK file : ${PATH_APK_BASELINE}" - echo " >> Output file path: ${baseline_output_path}" - - install_apk "${PATH_APK_BASELINE}" - run_benchmark - write_benchmark_result "${baseline_output_path}" - - echo "Starting Candidate Benchmark:" - echo " >> APK file : ${PATH_APK_CANDIDATE}" - echo " >> Output file path: ${candidate_output_path}" - - install_apk "${PATH_APK_CANDIDATE}" - run_benchmark - write_benchmark_result "${candidate_output_path}" - - end_time=$(date +%s) - duration=$((end_time - start_time)) - - echo "==============================" - echo "End iteration (${i} / ${NUMBER_OF_RUNS}) took ${duration}s" - echo "==============================" -done diff --git a/.github/scripts/step_fit.py b/.github/scripts/step_fit.py deleted file mode 100644 index ef1691ece..000000000 --- a/.github/scripts/step_fit.py +++ /dev/null @@ -1,111 +0,0 @@ -import argparse -import json -import math -import sys -from pathlib import Path - -# ----------- CONFIG ----------- -BENCHMARK_NAME = "startupPrecompiledWithBaselineProfile" -METRIC_KEY = "timeToInitialDisplayMs" -# ------------------------------ - -def step_fit(a, b): - def sum_squared_error(values): - avg = sum(values) / len(values) - return sum((v - avg) ** 2 for v in values) - - if not a or not b: - return 0.0 - - total_squared_error = sum_squared_error(a) + sum_squared_error(b) - step_error = math.sqrt(total_squared_error) / (len(a) + len(b)) - if step_error == 0.0: - return 0.0 - - return (sum(a) / len(a) - sum(b) / len(b)) / step_error - -def extract_median_from_files(paths): - medians = [] - - for path in paths: - with open(path, "r") as f: - data = json.load(f) - - found = False - for bench in data.get("benchmarks", []): - if bench.get("name") == BENCHMARK_NAME: - metrics = bench.get("metrics", {}) - metric = metrics.get(METRIC_KEY, {}) - medians.append(metric.get("median")) - found = True - - if not found: - raise ValueError(f"Metric not found in {path}") - - return medians - -def main(): - parser = argparse.ArgumentParser(prog='Comperator', description='Compare between multiple macrobenchmark test results') - parser.add_argument('baseline_dir', help='Baseline macrobenchmark reports directory') - parser.add_argument('candidate_dir', help='Candidate macrobenchmark reports directory') - args = parser.parse_args() - - baseline_dir = Path(args.baseline_dir) - candidate_dir = Path(args.candidate_dir) - baseline_files = sorted(baseline_dir.glob("*.json")) - candidate_files = sorted(candidate_dir.glob("*.json")) - - if len(baseline_files) <= 0: - print('ERR: baseline has no macrobenchmark results', file=sys.stderr) - exit(1) - - if len(candidate_files) <= 0: - print('ERR: candidate has no macrobenchmark results', file=sys.stderr) - exit(1) - - min_len = min(len(baseline_files), len(candidate_files)) - if len(baseline_files) != len(candidate_files): - print(f"WARN: Length mismatch, using first {min_len} samples. baseline: {len(baseline_files)}, candidate: {len(candidate_files)}") - - print('Macrobenchmark Result Mapping:') - print('| Index | Baseline | Candidate |') - print('--------------------------------') - - mismatch_count = 0 - for i in range(min_len): - baseline_filename = baseline_files[i].name.upper() - candidate_filename = candidate_files[i].name.upper() - if baseline_filename != candidate_filename: - mismatch_count += 1 - print('* ', end='') - print(f'{i + 1} {baseline_files[i]} <-> {candidate_files[i]}') - - print('--------------------------------') - print(f'# Match : {min_len - mismatch_count}') - print(f'# Mismatch: {mismatch_count}') - if mismatch_count > 0: - print("WARN: filename mapping mismatch detected. Output prediction may be incorrect") - print() - - baseline_medians = extract_median_from_files(baseline_files[:min_len]) - candidate_medians = extract_median_from_files(candidate_files[:min_len]) - assert (len(baseline_medians) == len(candidate_medians)) - - print(f"Benchmark : {BENCHMARK_NAME}") - print(f"Metric : {METRIC_KEY}") - print(f"Baseline medians : {baseline_medians}") - print(f"Candidate medians: {candidate_medians}") - print("-----------------------------") - print("Result: ", end="") - - result = step_fit(baseline_medians, candidate_medians) - if abs(result) <= 25: - print("Within noise range", end="") - elif result < 0: - print("POSSIBLE REGRESSION", end="") - else: - print("POSSIBLE IMPROVEMENT", end="") - print(f" (Step fit: {result:.4})") - -if __name__ == "__main__": - main() diff --git a/.github/workflows/Build.yaml b/.github/workflows/Build.yaml index edd2c70cf..54f643493 100644 --- a/.github/workflows/Build.yaml +++ b/.github/workflows/Build.yaml @@ -5,7 +5,6 @@ on: env: BASELINE_BRANCH: main - GRADLE_BUILD_CACHE_DIR: ${{ github.workspace }}/.common-gradle-cache jobs: benchmark-android: @@ -49,6 +48,11 @@ jobs: # Jobs on other branches will read entries from the cache but will not write updated entries. cache-read-only: false + - name: Setup Python + uses: actions/setup-python@v6 + with: + python-version: '3.13' + - name: Build APKs (Baseline) working-directory: ./baseline run: | @@ -71,15 +75,13 @@ jobs: emulator-options: -no-snapshot -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -memory 4096 disable-animations: true script: | - adb install ./candidate/benchmarks/build/outputs/apk/demo/benchmarkRelease/benchmarks-demo-benchmarkRelease.apk - chmod +x ./candidate/.github/scripts/run_macrobenchmarks.sh - ./candidate/.github/scripts/run_macrobenchmarks.sh "./baseline/app/build/outputs/apk/demo/benchmarkRelease/app-demo-benchmarkRelease.apk" "./candidate/app/build/outputs/apk/demo/benchmarkRelease/app-demo-benchmarkRelease.apk" + chmod +x ./candidate/.github/scripts/benchmark.sh + ./candidate/.github/scripts/benchmark.sh --baseline-apk "./baseline/app/build/outputs/apk/demo/benchmarkRelease/app-demo-benchmarkRelease.apk" --candidate-apk "./candidate/app/build/outputs/apk/demo/benchmarkRelease/app-demo-benchmarkRelease.apk" --benchmark-apk "./candidate/benchmarks/build/outputs/apk/demo/benchmarkRelease/benchmarks-demo-benchmarkRelease.apk" - name: Compare macrobenchmark results run: | - python3 ./candidate/.github/scripts/step_fit.py \ - "./macrobenchmark_results/baseline" \ - "./macrobenchmark_results/candidate" + pip install git+https://github.com/Frozen-Bytes/benchcomp.git@v1.0.0 + benchcomp --verbose "./macrobenchmark_results/baseline" "./macrobenchmark_results/candidate" - name: Upload Artifacts if: always()