build: migrate to the latest benchcomp, benchmark.sh, and workflow versions

pull/2098/head^2
Tony Medhat 3 months ago
parent f7ceb01e83
commit 650ada81e8

@ -0,0 +1,151 @@
#!/usr/bin/env bash
set -euo pipefail
# Configuration & Defaults
NUMBER_OF_RUNS=1
PATH_APK_BASELINE=""
PATH_APK_CANDIDATE=""
PATH_APK_BENCHMARK=""
INSTRUMENT_PASSTHROUGH_ARGS=()
OUTPUT_DIR="./macrobenchmark_results"
TEST_RUNNER="androidx.test.runner.AndroidJUnitRunner"
EMULATOR_BENCHMARK_RESULT_DIR="/sdcard/Download"
# Cleanup
TEMP_DIR="$(mktemp -d)"
trap 'rm -rf "${TEMP_DIR}"' EXIT
die() {
echo "err: $*" 1>&2
exit 1
}
print_usage() {
cat << EOF
Usage: $(basename "$0") [OPTIONS] --baseline_apk <path> --candidate_apk <path> --benchmark_apk <path> [-- INSTRUMENT_ARGS]
Automated benchmark script for APKs.
Options:
-o, --output-dir <path> Directory where benchmark results will be saved. (Default: "${OUTPUT_DIR}")
--baseline-apk <path> Path to the baseline APK file.
--candidate-apk <path> Path to the candidate APK file.
--benchmark-apk <path> Path to the benchmark APK. Must contain instrumented tests.
-n, --runs <number> Set number of runs per benchmark. (Default: 1)
-h, --help Display this help message and exit.
Additional Arguments:
-- Everything after '--' is passed directly to
the adb instrumentation command.
Example:
$(basename "$0") -o ./macrobenchmark_results --baseline_apk base.apk --candidate_apk candidate.apk -- -e androidx.benchmark.profiling.mode none
EOF
}
get_pkg_name() {
local apk="${1}"
apkanalyzer manifest application-id "${apk}"
}
install_apk() {
local apk="${1}"
echo "Installing APK: ${apk}"
adb install -d "${apk}" > /dev/null || die "failed to install apk '${apk}'"
adb shell pm clear "${APP_PKG_NAME}" > /dev/null 2>&1 || true
adb shell pm clear "${BENCHMARK_PKG_NAME}" > /dev/null 2>&1 || true
adb shell "rm -rf ${EMULATOR_BENCHMARK_RESULT_DIR} && mkdir -p ${EMULATOR_BENCHMARK_RESULT_DIR}" > /dev/null || true
}
run_benchmark() {
echo "Running benchmarks..."
adb shell am instrument -w \
-e androidx.benchmark.suppressErrors EMULATOR \
-e androidx.benchmark.profiling.mode none \
-e no-isolated-storage true \
-e additionalTestOutputDir "${EMULATOR_BENCHMARK_RESULT_DIR}" \
"${INSTRUMENT_PASSTHROUGH_ARGS[@]}" \
"${BENCHMARK_PKG_NAME}/$TEST_RUNNER"
}
write_benchmark_result() {
local dest_path="${1}"
local pull_temp="${TEMP_DIR}/pull_$(date +%s)"
adb pull "${EMULATOR_BENCHMARK_RESULT_DIR}/." "${pull_temp}" > /dev/null
mkdir -p $(dirname "${dest_path}") && mv "${pull_temp}/"*.json "${dest_path}"
}
while [[ $# -gt 0 ]]; do
case "$1" in
-h|--help)
print_usage
exit 0
;;
-o|--output-dir)
OUTPUT_DIR="$2"
shift 2
;;
--baseline-apk)
PATH_APK_BASELINE="$2"
shift 2
;;
--candidate-apk)
PATH_APK_CANDIDATE="$2"
shift 2
;;
--benchmark-apk)
PATH_APK_BENCHMARK="$2"
shift 2
;;
-n|--runs)
NUMBER_OF_RUNS="$2"
if ! [[ "$NUMBER_OF_RUNS" -eq "$NUMBER_OF_RUNS" ]] 2> /dev/null; then
print_usage
exit 1
fi
shift 2
;;
--)
shift
INSTRUMENT_PASSTHROUGH_ARGS+=("$@")
break
;;
*)
echo "$(basename "$0"): invalid option -- '$1'"
echo "Try '$(basename "$0") --help' for more information"
exit 1
;;
esac
done
if [[ -z "${PATH_APK_BASELINE}" || -z "${PATH_APK_CANDIDATE}" || -z "${PATH_APK_BENCHMARK}" ]]; then
print_usage
exit 1
fi
APP_PKG_NAME=$(get_pkg_name "${PATH_APK_BASELINE}")
BENCHMARK_PKG_NAME=$(get_pkg_name "${PATH_APK_BENCHMARK}")
install_apk "${PATH_APK_BENCHMARK}"
for ((i=1; i<=${NUMBER_OF_RUNS}; i++)); do
echo "--- Starting benchmark run (${i} / ${NUMBER_OF_RUNS}) ---"
start_time=$SECONDS
output_filename="${BENCHMARK_PKG_NAME}_$(date +"%Y-%m-%dT%H-%M-%S").json"
# Baseline
install_apk "${PATH_APK_BASELINE}"
run_benchmark
write_benchmark_result "${OUTPUT_DIR}/baseline/${output_filename}"
# Candidate
install_apk "${PATH_APK_CANDIDATE}"
run_benchmark
write_benchmark_result "${OUTPUT_DIR}/candidate/${output_filename}"
duration=$((SECONDS - start_time))
echo "--- Ending benchmark run (${i} / ${NUMBER_OF_RUNS}) took ${duration}s ---"
done
echo "Benchmark completed. Results in '$OUTPUT_DIR'"

@ -1,92 +0,0 @@
#!/usr/bin/env bash
set -euo pipefail
# TODO: pass number of runs as a commandline argument
NUMBER_OF_RUNS=2
APP_PKG="com.google.samples.apps.nowinandroid"
BENCHMARK_PKG="com.google.samples.apps.nowinandroid.benchmarks"
TEST_RUNNER="androidx.test.runner.AndroidJUnitRunner"
EMULATOR_BENCHMARK_RESULT_DIR="/sdcard/Download"
PATH_APK_BASELINE="${1:-}"
PATH_APK_CANDIDATE="${2:-}"
OUTPUT_DIR="${3:-./macrobenchmark_results}"
TEMP_DIR="$(mktemp -d)"
trap 'rm -rf "${TEMP_DIR}"' EXIT
install_apk() {
local apk_path="${1}"
adb install -r "${apk_path}"
adb shell pm clear "$APP_PKG" || true
adb shell pm clear "${BENCHMARK_PKG}" || true
adb shell rm -rf "${EMULATOR_BENCHMARK_RESULT_DIR}" || true
adb shell mkdir -p "${EMULATOR_BENCHMARK_RESULT_DIR}"
}
run_benchmark() {
adb shell am instrument -w \
-e class com.google.samples.apps.nowinandroid.startup.StartupBenchmark#startupPrecompiledWithBaselineProfile \
-e androidx.benchmark.suppressErrors EMULATOR \
-e androidx.benchmark.profiling.mode none \
-e no-isolated-storage true \
-e additionalTestOutputDir "${EMULATOR_BENCHMARK_RESULT_DIR}" \
"$BENCHMARK_PKG/$TEST_RUNNER"
}
write_benchmark_result() {
local output_path="${1}"
adb pull "${EMULATOR_BENCHMARK_RESULT_DIR}/." "${TEMP_DIR}/pull_out/"
mv "${TEMP_DIR}/pull_out/"*.json "${output_path}"
rm -rf "${TEMP_DIR}/pull_out/"
}
if [[ -z "${PATH_APK_BASELINE}" || -z "${PATH_APK_CANDIDATE}" ]]; then
echo "Usage: $0 <path_to_baseline.apk> <path_to_candidate.apk> [output_dir]"
exit 1
fi
mkdir -p "${OUTPUT_DIR}/baseline" "${OUTPUT_DIR}/candidate"
# Alternate runs: v1, v2, v1, v2 ...
for ((i=1; i<=${NUMBER_OF_RUNS}; i++)); do
start_time=$(date +%s)
timestamp=$(date +"%Y-%m-%dT%H-%M-%S")
output_filename="${BENCHMARK_PKG}_${timestamp}.json"
baseline_output_path="${OUTPUT_DIR}/baseline/${output_filename}"
candidate_output_path="${OUTPUT_DIR}/candidate/${output_filename}"
echo "=============================="
echo "Start iteration (${i} / ${NUMBER_OF_RUNS})"
echo "=============================="
echo "Starting Baseline Benchmark:"
echo " >> APK file : ${PATH_APK_BASELINE}"
echo " >> Output file path: ${baseline_output_path}"
install_apk "${PATH_APK_BASELINE}"
run_benchmark
write_benchmark_result "${baseline_output_path}"
echo "Starting Candidate Benchmark:"
echo " >> APK file : ${PATH_APK_CANDIDATE}"
echo " >> Output file path: ${candidate_output_path}"
install_apk "${PATH_APK_CANDIDATE}"
run_benchmark
write_benchmark_result "${candidate_output_path}"
end_time=$(date +%s)
duration=$((end_time - start_time))
echo "=============================="
echo "End iteration (${i} / ${NUMBER_OF_RUNS}) took ${duration}s"
echo "=============================="
done

@ -1,111 +0,0 @@
import argparse
import json
import math
import sys
from pathlib import Path
# ----------- CONFIG -----------
BENCHMARK_NAME = "startupPrecompiledWithBaselineProfile"
METRIC_KEY = "timeToInitialDisplayMs"
# ------------------------------
def step_fit(a, b):
def sum_squared_error(values):
avg = sum(values) / len(values)
return sum((v - avg) ** 2 for v in values)
if not a or not b:
return 0.0
total_squared_error = sum_squared_error(a) + sum_squared_error(b)
step_error = math.sqrt(total_squared_error) / (len(a) + len(b))
if step_error == 0.0:
return 0.0
return (sum(a) / len(a) - sum(b) / len(b)) / step_error
def extract_median_from_files(paths):
medians = []
for path in paths:
with open(path, "r") as f:
data = json.load(f)
found = False
for bench in data.get("benchmarks", []):
if bench.get("name") == BENCHMARK_NAME:
metrics = bench.get("metrics", {})
metric = metrics.get(METRIC_KEY, {})
medians.append(metric.get("median"))
found = True
if not found:
raise ValueError(f"Metric not found in {path}")
return medians
def main():
parser = argparse.ArgumentParser(prog='Comperator', description='Compare between multiple macrobenchmark test results')
parser.add_argument('baseline_dir', help='Baseline macrobenchmark reports directory')
parser.add_argument('candidate_dir', help='Candidate macrobenchmark reports directory')
args = parser.parse_args()
baseline_dir = Path(args.baseline_dir)
candidate_dir = Path(args.candidate_dir)
baseline_files = sorted(baseline_dir.glob("*.json"))
candidate_files = sorted(candidate_dir.glob("*.json"))
if len(baseline_files) <= 0:
print('ERR: baseline has no macrobenchmark results', file=sys.stderr)
exit(1)
if len(candidate_files) <= 0:
print('ERR: candidate has no macrobenchmark results', file=sys.stderr)
exit(1)
min_len = min(len(baseline_files), len(candidate_files))
if len(baseline_files) != len(candidate_files):
print(f"WARN: Length mismatch, using first {min_len} samples. baseline: {len(baseline_files)}, candidate: {len(candidate_files)}")
print('Macrobenchmark Result Mapping:')
print('| Index | Baseline | Candidate |')
print('--------------------------------')
mismatch_count = 0
for i in range(min_len):
baseline_filename = baseline_files[i].name.upper()
candidate_filename = candidate_files[i].name.upper()
if baseline_filename != candidate_filename:
mismatch_count += 1
print('* ', end='')
print(f'{i + 1} {baseline_files[i]} <-> {candidate_files[i]}')
print('--------------------------------')
print(f'# Match : {min_len - mismatch_count}')
print(f'# Mismatch: {mismatch_count}')
if mismatch_count > 0:
print("WARN: filename mapping mismatch detected. Output prediction may be incorrect")
print()
baseline_medians = extract_median_from_files(baseline_files[:min_len])
candidate_medians = extract_median_from_files(candidate_files[:min_len])
assert (len(baseline_medians) == len(candidate_medians))
print(f"Benchmark : {BENCHMARK_NAME}")
print(f"Metric : {METRIC_KEY}")
print(f"Baseline medians : {baseline_medians}")
print(f"Candidate medians: {candidate_medians}")
print("-----------------------------")
print("Result: ", end="")
result = step_fit(baseline_medians, candidate_medians)
if abs(result) <= 25:
print("Within noise range", end="")
elif result < 0:
print("POSSIBLE REGRESSION", end="")
else:
print("POSSIBLE IMPROVEMENT", end="")
print(f" (Step fit: {result:.4})")
if __name__ == "__main__":
main()

@ -5,7 +5,6 @@ on:
env:
BASELINE_BRANCH: main
GRADLE_BUILD_CACHE_DIR: ${{ github.workspace }}/.common-gradle-cache
jobs:
benchmark-android:
@ -49,6 +48,11 @@ jobs:
# Jobs on other branches will read entries from the cache but will not write updated entries.
cache-read-only: false
- name: Setup Python
uses: actions/setup-python@v6
with:
python-version: '3.13'
- name: Build APKs (Baseline)
working-directory: ./baseline
run: |
@ -71,15 +75,13 @@ jobs:
emulator-options: -no-snapshot -no-window -gpu swiftshader_indirect -noaudio -no-boot-anim -memory 4096
disable-animations: true
script: |
adb install ./candidate/benchmarks/build/outputs/apk/demo/benchmarkRelease/benchmarks-demo-benchmarkRelease.apk
chmod +x ./candidate/.github/scripts/run_macrobenchmarks.sh
./candidate/.github/scripts/run_macrobenchmarks.sh "./baseline/app/build/outputs/apk/demo/benchmarkRelease/app-demo-benchmarkRelease.apk" "./candidate/app/build/outputs/apk/demo/benchmarkRelease/app-demo-benchmarkRelease.apk"
chmod +x ./candidate/.github/scripts/benchmark.sh
./candidate/.github/scripts/benchmark.sh --baseline-apk "./baseline/app/build/outputs/apk/demo/benchmarkRelease/app-demo-benchmarkRelease.apk" --candidate-apk "./candidate/app/build/outputs/apk/demo/benchmarkRelease/app-demo-benchmarkRelease.apk" --benchmark-apk "./candidate/benchmarks/build/outputs/apk/demo/benchmarkRelease/benchmarks-demo-benchmarkRelease.apk"
- name: Compare macrobenchmark results
run: |
python3 ./candidate/.github/scripts/step_fit.py \
"./macrobenchmark_results/baseline" \
"./macrobenchmark_results/candidate"
pip install git+https://github.com/Frozen-Bytes/benchcomp.git@v1.0.0
benchcomp --verbose "./macrobenchmark_results/baseline" "./macrobenchmark_results/candidate"
- name: Upload Artifacts
if: always()

Loading…
Cancel
Save