Compare commits

...

13 Commits

Author SHA1 Message Date
J. Nick Koston
dd345106f7 lock 2025-06-12 22:37:26 -05:00
J. Nick Koston
9763821d68 lock 2025-06-12 22:34:03 -05:00
J. Nick Koston
3d358cf431 lock 2025-06-12 22:21:15 -05:00
J. Nick Koston
1cebeb53d3 lock 2025-06-12 22:10:40 -05:00
J. Nick Koston
09bfa7f527 lock 2025-06-12 22:09:53 -05:00
J. Nick Koston
a61138c4f7 lock 2025-06-12 22:05:31 -05:00
J. Nick Koston
b23445e1c3 lock 2025-06-12 22:00:28 -05:00
J. Nick Koston
b887c1bf08 lock 2025-06-12 21:58:52 -05:00
J. Nick Koston
9206888966 label 2025-06-12 21:46:38 -05:00
J. Nick Koston
567cba4510 speed up 2025-06-12 21:37:22 -05:00
J. Nick Koston
7da5e02388 tweak 2025-06-12 21:34:03 -05:00
J. Nick Koston
1dd189cf36 tweak 2025-06-12 21:33:53 -05:00
J. Nick Koston
9e5dc01fd4 tweak 2025-06-12 21:33:29 -05:00
5 changed files with 409 additions and 76 deletions

View File

@@ -377,7 +377,15 @@ jobs:
id: list-components
run: |
. venv/bin/activate
components=$(script/list-components.py --changed --branch ${{ steps.target-branch.outputs.branch }})
# Check if we should test all components (via label)
if [[ "${{ contains(github.event.pull_request.labels.*.name, 'test-all-components') }}" == "true" ]]; then
echo "Label 'test-all-components' found - testing ALL components"
components=$(script/list-components.py)
else
components=$(script/list-components.py --changed --branch ${{ steps.target-branch.outputs.branch }})
fi
output_components=$(echo "$components" | jq -R -s -c 'split("\n")[:-1] | map(select(length > 0))')
count=$(echo "$output_components" | jq length)
@@ -415,11 +423,15 @@ jobs:
- name: test_build_components -e config -c ${{ matrix.file }}
run: |
. venv/bin/activate
./script/test_build_components -e config -c ${{ matrix.file }}
# Use 4 parallel jobs for config validation
./script/test_build_components -e config -c ${{ matrix.file }} -j 4 -f
- name: test_build_components -e compile -c ${{ matrix.file }}
run: |
. venv/bin/activate
./script/test_build_components -e compile -c ${{ matrix.file }}
mkdir -p build_cache
export PLATFORMIO_BUILD_CACHE_DIR=$PWD/build_cache
# Use 2 parallel jobs for compilation (resource intensive)
./script/test_build_components -e compile -c ${{ matrix.file }} -j 2 -f -b $PWD/build_cache
test-build-components-splitter:
name: Split components for testing into 20 groups maximum
@@ -471,17 +483,28 @@ jobs:
- name: Validate config
run: |
. venv/bin/activate
for component in ${{ matrix.components }}; do
./script/test_build_components -e config -c $component
# Process all components in parallel for config validation
components="${{ matrix.components }}"
# Convert space-separated list to multiple -c flags
component_args=""
for component in $components; do
component_args="$component_args -c $component"
done
# Use 8 parallel jobs for lightweight config validation
./script/test_build_components -e config $component_args -j 8 -f
- name: Compile config
run: |
. venv/bin/activate
mkdir build_cache
mkdir -p build_cache
export PLATFORMIO_BUILD_CACHE_DIR=$PWD/build_cache
for component in ${{ matrix.components }}; do
./script/test_build_components -e compile -c $component
# Process all components in parallel for compilation
components="${{ matrix.components }}"
component_args=""
for component in $components; do
component_args="$component_args -c $component"
done
# Use 4 parallel jobs for resource-intensive compilation
./script/test_build_components -e compile $component_args -j 4 -f -b $PWD/build_cache
ci-status:
name: CI Status

View File

@@ -10,6 +10,7 @@ import urllib.parse
import esphome.config_validation as cv
from esphome.core import CORE, TimePeriodSeconds
from esphome.git_lock import git_operation_lock
_LOGGER = logging.getLogger(__name__)
@@ -59,66 +60,72 @@ def clone_or_update(
)
repo_dir = _compute_destination_path(key, domain)
if not repo_dir.is_dir():
_LOGGER.info("Cloning %s", key)
_LOGGER.debug("Location: %s", repo_dir)
cmd = ["git", "clone", "--depth=1"]
cmd += ["--", url, str(repo_dir)]
run_git_command(cmd)
if ref is not None:
# We need to fetch the PR branch first, otherwise git will complain
# about missing objects
_LOGGER.info("Fetching %s", ref)
run_git_command(["git", "fetch", "--", "origin", ref], str(repo_dir))
run_git_command(["git", "reset", "--hard", "FETCH_HEAD"], str(repo_dir))
if submodules is not None:
_LOGGER.info(
"Initialising submodules (%s) for %s", ", ".join(submodules), key
)
run_git_command(
["git", "submodule", "update", "--init"] + submodules, str(repo_dir)
)
else:
# Check refresh needed
file_timestamp = Path(repo_dir / ".git" / "FETCH_HEAD")
# On first clone, FETCH_HEAD does not exists
if not file_timestamp.exists():
file_timestamp = Path(repo_dir / ".git" / "HEAD")
age = datetime.now() - datetime.fromtimestamp(file_timestamp.stat().st_mtime)
if refresh is None or age.total_seconds() > refresh.total_seconds:
old_sha = run_git_command(["git", "rev-parse", "HEAD"], str(repo_dir))
_LOGGER.info("Updating %s", key)
# Use lock to prevent concurrent access to the same repository
with git_operation_lock(key):
if not repo_dir.is_dir():
_LOGGER.info("Cloning %s", key)
_LOGGER.debug("Location: %s", repo_dir)
# Stash local changes (if any)
run_git_command(
["git", "stash", "push", "--include-untracked"], str(repo_dir)
)
# Fetch remote ref
cmd = ["git", "fetch", "--", "origin"]
cmd = ["git", "clone", "--depth=1"]
cmd += ["--", url, str(repo_dir)]
run_git_command(cmd)
if ref is not None:
cmd.append(ref)
run_git_command(cmd, str(repo_dir))
# Hard reset to FETCH_HEAD (short-lived git ref corresponding to most recent fetch)
run_git_command(["git", "reset", "--hard", "FETCH_HEAD"], str(repo_dir))
# We need to fetch the PR branch first, otherwise git will complain
# about missing objects
_LOGGER.info("Fetching %s", ref)
run_git_command(["git", "fetch", "--", "origin", ref], str(repo_dir))
run_git_command(["git", "reset", "--hard", "FETCH_HEAD"], str(repo_dir))
if submodules is not None:
_LOGGER.info(
"Updating submodules (%s) for %s", ", ".join(submodules), key
"Initialising submodules (%s) for %s", ", ".join(submodules), key
)
run_git_command(
["git", "submodule", "update", "--init"] + submodules, str(repo_dir)
)
def revert():
_LOGGER.info("Reverting changes to %s -> %s", key, old_sha)
run_git_command(["git", "reset", "--hard", old_sha], str(repo_dir))
else:
# Check refresh needed
file_timestamp = Path(repo_dir / ".git" / "FETCH_HEAD")
# On first clone, FETCH_HEAD does not exists
if not file_timestamp.exists():
file_timestamp = Path(repo_dir / ".git" / "HEAD")
age = datetime.now() - datetime.fromtimestamp(
file_timestamp.stat().st_mtime
)
if refresh is None or age.total_seconds() > refresh.total_seconds:
old_sha = run_git_command(["git", "rev-parse", "HEAD"], str(repo_dir))
_LOGGER.info("Updating %s", key)
_LOGGER.debug("Location: %s", repo_dir)
# Stash local changes (if any)
run_git_command(
["git", "stash", "push", "--include-untracked"], str(repo_dir)
)
# Fetch remote ref
cmd = ["git", "fetch", "--", "origin"]
if ref is not None:
cmd.append(ref)
run_git_command(cmd, str(repo_dir))
# Hard reset to FETCH_HEAD (short-lived git ref corresponding to most recent fetch)
run_git_command(["git", "reset", "--hard", "FETCH_HEAD"], str(repo_dir))
return repo_dir, revert
if submodules is not None:
_LOGGER.info(
"Updating submodules (%s) for %s", ", ".join(submodules), key
)
run_git_command(
["git", "submodule", "update", "--init"] + submodules,
str(repo_dir),
)
return repo_dir, None
def revert():
_LOGGER.info("Reverting changes to %s -> %s", key, old_sha)
run_git_command(["git", "reset", "--hard", old_sha], str(repo_dir))
return repo_dir, revert
return repo_dir, None
GIT_DOMAINS = {

141
esphome/git_lock.py Normal file
View File

@@ -0,0 +1,141 @@
"""File locking for git operations to prevent race conditions."""
from contextlib import contextmanager
import hashlib
import logging
from pathlib import Path
import sys
import tempfile
import time
# Platform-specific imports
if sys.platform == "win32":
import msvcrt
else:
import fcntl
_LOGGER = logging.getLogger(__name__)
# Global lock directory
LOCK_DIR = Path(tempfile.gettempdir()) / "esphome_git_locks"
LOCK_DIR.mkdir(exist_ok=True)
def _acquire_lock_unix(lock_file, timeout, identifier):
"""Acquire lock on Unix systems using fcntl."""
start_time = time.time()
last_log_time = start_time
while True:
try:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
return True
except OSError:
elapsed = time.time() - start_time
if elapsed > timeout:
raise TimeoutError(
f"Could not acquire lock for {identifier} within {timeout}s"
)
# Log progress every 10 seconds
if time.time() - last_log_time > 10:
_LOGGER.info(
f"Still waiting for lock {identifier} ({elapsed:.1f}s elapsed)..."
)
last_log_time = time.time()
time.sleep(0.1)
def _release_lock_unix(lock_file):
"""Release lock on Unix systems."""
try:
fcntl.flock(lock_file.fileno(), fcntl.LOCK_UN)
except Exception:
pass
def _acquire_lock_windows(lock_file, timeout, identifier):
"""Acquire lock on Windows systems using msvcrt."""
start_time = time.time()
while True:
try:
msvcrt.locking(lock_file.fileno(), msvcrt.LK_NBLCK, 1)
return True
except OSError:
if time.time() - start_time > timeout:
raise TimeoutError(
f"Could not acquire lock for {identifier} within {timeout}s"
)
time.sleep(0.1)
def _release_lock_windows(lock_file):
"""Release lock on Windows systems."""
try:
msvcrt.locking(lock_file.fileno(), msvcrt.LK_UNLCK, 1)
except Exception:
pass
@contextmanager
def git_operation_lock(identifier: str, timeout: float = 30.0):
"""
Acquire a file lock for a git operation.
:param identifier: Unique identifier for the operation (e.g., repo URL or path)
:param timeout: Maximum time to wait for the lock in seconds
"""
# Create a safe filename from the identifier
lock_name = hashlib.sha256(identifier.encode()).hexdigest()[:16]
lock_path = LOCK_DIR / f"{lock_name}.lock"
# Ensure lock file exists
lock_path.touch(exist_ok=True)
lock_file = None
acquired = False
try:
# Open in binary mode for Windows compatibility
lock_file = open(lock_path, "r+b")
# Platform-specific lock acquisition
if sys.platform == "win32":
acquired = _acquire_lock_windows(lock_file, timeout, identifier)
else:
acquired = _acquire_lock_unix(lock_file, timeout, identifier)
if acquired:
_LOGGER.debug(f"Acquired lock for {identifier}")
yield
finally:
if lock_file:
if acquired:
# Platform-specific lock release
if sys.platform == "win32":
_release_lock_windows(lock_file)
else:
_release_lock_unix(lock_file)
_LOGGER.debug(f"Released lock for {identifier}")
lock_file.close()
@contextmanager
def platformio_init_lock(timeout: float = 30.0):
"""Lock for PlatformIO initialization to prevent race conditions."""
with git_operation_lock("platformio_init", timeout=timeout):
yield
@contextmanager
def platformio_install_lock(package_name: str, timeout: float = 300.0):
"""Lock for PlatformIO package installation to prevent race conditions."""
_LOGGER.info(
f"Waiting for PlatformIO package installation lock ({package_name})..."
)
with git_operation_lock(f"platformio_install_{package_name}", timeout=timeout):
_LOGGER.info(f"Acquired PlatformIO package installation lock ({package_name})")
yield
_LOGGER.info(f"Released PlatformIO package installation lock ({package_name})")

View File

@@ -86,9 +86,28 @@ def run_platformio_cli(*args, **kwargs) -> str | int:
if os.environ.get("ESPHOME_USE_SUBPROCESS") is not None:
return run_external_process(*cmd, **kwargs)
import platformio.__main__
# Import with minimal locking to prevent initialization race conditions
from esphome.git_lock import platformio_init_lock
with platformio_init_lock():
import platformio.__main__
patch_structhash()
# For first-time PlatformIO runs, use a lock to prevent directory creation conflicts
home_pio = Path.home() / ".platformio"
if not home_pio.exists() and len(args) > 0 and args[0] == "run":
from esphome.git_lock import platformio_install_lock
_LOGGER.info("First PlatformIO run detected, using initialization lock...")
with platformio_install_lock("first_run", timeout=120.0):
# Create the directory if it still doesn't exist
home_pio.mkdir(exist_ok=True)
result = run_external_command(platformio.__main__.main, *cmd, **kwargs)
_LOGGER.info("First PlatformIO run completed")
return result
# Normal execution without locking
return run_external_command(platformio.__main__.main, *cmd, **kwargs)

View File

@@ -3,25 +3,37 @@
set -e
help() {
echo "Usage: $0 [-e <config|compile|clean>] [-c <string>] [-t <string>]" 1>&2
echo "Usage: $0 [-e <config|compile|clean>] [-c <string>] [-t <string>] [-j <number>] [-p <string>] [-f]" 1>&2
echo 1>&2
echo " - e - Parameter for esphome command. Default compile. Common alternative is config." 1>&2
echo " - c - Component folder name to test. Default *. E.g. '-c logger'." 1>&2
echo " - t - Target name to test. Put '-t list' to display all possibilities. E.g. '-t esp32-s2-idf-51'." 1>&2
echo " - j - Number of parallel jobs. Default is number of CPU cores." 1>&2
echo " - p - Platform filter. E.g. '-p esp32' to test only ESP32 platforms." 1>&2
echo " - f - Fail fast. Exit on first failure." 1>&2
echo " - b - Build cache directory. E.g. '-b /tmp/esphome_cache'." 1>&2
exit 1
}
# Parse parameter:
# - `e` - Parameter for `esphome` command. Default `compile`. Common alternative is `config`.
# - `c` - Component folder name to test. Default `*`.
esphome_command="compile"
target_component="*"
while getopts e:c:t: flag
num_jobs=$(nproc 2>/dev/null || sysctl -n hw.ncpu 2>/dev/null || echo 4)
platform_filter=""
fail_fast=false
build_cache_dir=""
while getopts e:c:t:j:p:b:fh flag
do
case $flag in
e) esphome_command=${OPTARG};;
c) target_component=${OPTARG};;
t) requested_target_platform=${OPTARG};;
j) num_jobs=${OPTARG};;
p) platform_filter=${OPTARG};;
f) fail_fast=true;;
b) build_cache_dir=${OPTARG};;
h) help;;
\?) help;;
esac
done
@@ -29,16 +41,66 @@ done
cd "$(dirname "$0")/.."
if ! [ -d "./tests/test_build_components/build" ]; then
mkdir ./tests/test_build_components/build
mkdir -p ./tests/test_build_components/build
fi
# Export build cache directory if specified
if [ -n "$build_cache_dir" ]; then
export PLATFORMIO_BUILD_CACHE_DIR="$build_cache_dir"
mkdir -p "$build_cache_dir"
echo "Using build cache directory: $build_cache_dir"
fi
# Track PIDs for parallel execution
pids=()
failed_builds=()
build_count=0
total_builds=0
# Function to wait for jobs and handle failures
wait_for_jobs() {
local max_jobs=$1
while [ ${#pids[@]} -ge $max_jobs ]; do
for i in "${!pids[@]}"; do
if ! kill -0 "${pids[$i]}" 2>/dev/null; then
wait "${pids[$i]}"
exit_code=$?
if [ $exit_code -ne 0 ]; then
failed_builds+=("${build_info[$i]}")
if [ "$fail_fast" = true ]; then
echo "Build failed, exiting due to fail-fast mode"
# Kill remaining jobs
for pid in "${pids[@]}"; do
kill -TERM "$pid" 2>/dev/null || true
done
exit 1
fi
fi
unset pids[$i]
unset build_info[$i]
# Reindex arrays
pids=("${pids[@]}")
build_info=("${build_info[@]}")
break
fi
done
sleep 0.1
done
}
start_esphome() {
if [ -n "$requested_target_platform" ] && [ "$requested_target_platform" != "$target_platform_with_version" ]; then
echo "Skipping $target_platform_with_version"
return
fi
# Apply platform filter if specified
if [ -n "$platform_filter" ] && [[ ! "$target_platform_with_version" =~ ^$platform_filter ]]; then
echo "Skipping $target_platform_with_version (filtered)"
return
fi
# create dynamic yaml file in `build` folder.
# `./tests/test_build_components/build/[target_component].[test_name].[target_platform_with_version].yaml`
component_test_file="./tests/test_build_components/build/$target_component.$test_name.$target_platform_with_version.yaml"
cp $target_platform_file $component_test_file
@@ -49,17 +111,79 @@ start_esphome() {
sed -i "s!\$component_test_file!../../.$f!g" $component_test_file
fi
# Start esphome process
echo "> [$target_component] [$test_name] [$target_platform_with_version]"
set -x
# TODO: Validate escape of Command line substitution value
python3 -m esphome -s component_name $target_component -s component_dir ../../components/$target_component -s test_name $test_name -s target_platform $target_platform $esphome_command $component_test_file
{ set +x; } 2>/dev/null
# Start esphome process in background
build_count=$((build_count + 1))
echo "> [$build_count/$total_builds] [$target_component] [$test_name] [$target_platform_with_version]"
(
# Add compile process limit for ESPHome internal parallelization
export ESPHOME_COMPILE_PROCESS_LIMIT=2
# For compilation, add a small random delay to reduce thundering herd effect
# This helps stagger the package installation requests
if [ "$esphome_command" = "compile" ]; then
sleep $((RANDOM % 5))
fi
python3 -m esphome -s component_name $target_component -s component_dir ../../components/$target_component -s test_name $test_name -s target_platform $target_platform $esphome_command $component_test_file
) &
local pid=$!
pids+=($pid)
build_info+=("$target_component/$test_name/$target_platform_with_version")
# Wait if we've reached the job limit
wait_for_jobs $num_jobs
}
# Find all test yaml files.
# - `./tests/components/[target_component]/[test_name].[target_platform].yaml`
# - `./tests/components/[target_component]/[test_name].all.yaml`
# First pass: count total builds
echo "Calculating total number of builds..."
for f in ./tests/components/$target_component/*.*.yaml; do
[ -f "$f" ] || continue
IFS='/' read -r -a folder_name <<< "$f"
IFS='.' read -r -a file_name <<< "${folder_name[4]}"
target_platform="${file_name[1]}"
file_name_parts=${#file_name[@]}
if [ "$target_platform" = "all" ] || [ $file_name_parts = 2 ]; then
for target_platform_file in ./tests/test_build_components/build_components_base.*.yaml; do
IFS='/' read -r -a folder_name <<< "$target_platform_file"
IFS='.' read -r -a file_name <<< "${folder_name[3]}"
target_platform="${file_name[1]}"
target_platform_with_version=${target_platform_file:52}
target_platform_with_version=${target_platform_with_version%.*}
if [ -n "$platform_filter" ] && [[ ! "$target_platform_with_version" =~ ^$platform_filter ]]; then
continue
fi
if [ -n "$requested_target_platform" ] && [ "$requested_target_platform" != "$target_platform_with_version" ]; then
continue
fi
total_builds=$((total_builds + 1))
done
else
target_platform_file="./tests/test_build_components/build_components_base.$target_platform.yaml"
if [ -f "$target_platform_file" ]; then
for target_platform_file in ./tests/test_build_components/build_components_base.$target_platform*.yaml; do
target_platform_with_version=${target_platform_file:52}
target_platform_with_version=${target_platform_with_version%.*}
if [ -n "$platform_filter" ] && [[ ! "$target_platform_with_version" =~ ^$platform_filter ]]; then
continue
fi
if [ -n "$requested_target_platform" ] && [ "$requested_target_platform" != "$target_platform_with_version" ]; then
continue
fi
total_builds=$((total_builds + 1))
done
fi
fi
done
echo "Total builds to execute: $total_builds with $num_jobs parallel jobs"
echo
# Second pass: execute builds
for f in ./tests/components/$target_component/*.*.yaml; do
[ -f "$f" ] || continue
IFS='/' read -r -a folder_name <<< "$f"
@@ -72,22 +196,21 @@ for f in ./tests/components/$target_component/*.*.yaml; do
if [ "$target_platform" = "all" ] || [ $file_name_parts = 2 ]; then
# Test has *not* defined a specific target platform. Need to run tests for all possible target platforms.
for target_platform_file in ./tests/test_build_components/build_components_base.*.yaml; do
IFS='/' read -r -a folder_name <<< "$target_platform_file"
IFS='.' read -r -a file_name <<< "${folder_name[3]}"
target_platform="${file_name[1]}"
target_platform_with_version=${target_platform_file:52}
target_platform_with_version=${target_platform_with_version%.*}
start_esphome
done
else
# Test has defined a specific target platform.
# Validate we have a base test yaml for selected platform.
# The target_platform is sourced from the following location.
# 1. `./tests/test_build_components/build_components_base.[target_platform].yaml`
# 2. `./tests/test_build_components/build_components_base.[target_platform]-ard.yaml`
target_platform_file="./tests/test_build_components/build_components_base.$target_platform.yaml"
if ! [ -f "$target_platform_file" ]; then
echo "No base test file [./tests/test_build_components/build_components_base.$target_platform.yaml] for component test [$f] found."
@@ -104,3 +227,23 @@ for f in ./tests/components/$target_component/*.*.yaml; do
done
fi
done
# Wait for all remaining jobs
wait_for_jobs 1
echo
echo "============================================"
echo "Build Summary:"
echo "Total builds: $total_builds"
echo "Failed builds: ${#failed_builds[@]}"
if [ ${#failed_builds[@]} -gt 0 ]; then
echo
echo "Failed builds:"
for build in "${failed_builds[@]}"; do
echo " - $build"
done
exit 1
else
echo "All builds completed successfully!"
fi