From 4c0d21f53f2ca78b525e31dbbf9cc9cfb818a2e3 Mon Sep 17 00:00:00 2001
From: Terry Howe <terrylhowe@gmail.com>
Date: Tue, 7 Apr 2026 04:26:12 -0600
Subject: [PATCH 1/2] test(kube): fix flaky WaitForDelete timing in status wait
 tests

TestMethodContextOverridesGeneralContext/WaitForDelete used a 1s
timeout with a 500ms deletion delay, leaving only ~500ms for the
fake watcher to propagate the delete event. On loaded CI runners
this window is too tight and causes intermittent failures.

Increase the timeout to 5s and reduce the deletion delay to 100ms
so there is ample headroom. Apply the same deletion-delay reduction
to TestStatusWaitForDelete which shares the same pattern.

Signed-off-by: Terry Howe <terrylhowe@gmail.com>
---
 pkg/kube/statuswait_test.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pkg/kube/statuswait_test.go b/pkg/kube/statuswait_test.go
index 0639e07fc..3ce6b0fd6 100644
--- a/pkg/kube/statuswait_test.go
+++ b/pkg/kube/statuswait_test.go
@@ -317,7 +317,7 @@ func TestStatusWaitForDelete(t *testing.T) {
 			t.Parallel()
 			c := newTestClient(t)
 			timeout := time.Second
-			timeUntilPodDelete := time.Millisecond * 500
+			timeUntilPodDelete := time.Millisecond * 100
 			fakeClient := dynamicfake.NewSimpleDynamicClient(scheme.Scheme)
 			fakeMapper := testutil.NewFakeRESTMapper(
 				v1.SchemeGroupVersion.WithKind("Pod"),
@@ -1680,8 +1680,8 @@ func TestMethodContextOverridesGeneralContext(t *testing.T) {
 	t.Run("method-specific context overrides general context for WaitForDelete", func(t *testing.T) {
 		t.Parallel()
 		c := newTestClient(t)
-		timeout := time.Second
-		timeUntilPodDelete := time.Millisecond * 500
+		timeout := 5 * time.Second
+		timeUntilPodDelete := time.Millisecond * 100
 		fakeClient := dynamicfake.NewSimpleDynamicClient(scheme.Scheme)
 		fakeMapper := testutil.NewFakeRESTMapper(
 			v1.SchemeGroupVersion.WithKind("Pod"),

From a7f84439aacd3864b40055b60a3c3e54292d1646 Mon Sep 17 00:00:00 2001
From: Terry Howe <terrylhowe@gmail.com>
Date: Tue, 7 Apr 2026 08:23:39 -0600
Subject: [PATCH 2/2] test(kube): fix flaky WaitForDelete test by avoiding
 informer sync race
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The previous fix (increasing timeout / reducing deletion delay) did not
work because the flakiness is not a timing problem at all.

Root cause: fluxcd/cli-utils HasSynced() returns true after the initial
list item is *popped* from DeltaFIFO, which is before AddFunc delivers
the ResourceUpdateEvent to the collector. This creates a race where the
SyncEvent can arrive at the statusObserver *before* the pod's Current
status is recorded. When that happens:
  - statusObserver sees pod as Unknown
  - Unknown is skipped for WaitForDelete (by design, to handle resources
    that were already deleted before watching started)
  - AggregateStatus([], NotFoundStatus) == NotFoundStatus → cancel()
  - The watch context is cancelled before DeleteFunc can fire
  - Final check: pod still Current → error

The test intent is to verify that waitForDeleteCtx (not the cancelled
generalCtx) is selected. A non-existent resource satisfies this:
  - With waitForDeleteCtx=Background(): informer syncs with empty list
    → Unknown → cancel → success ✓
  - With generalCtx (cancelled, wrong): context immediately done
    → ctx.Err() appended → error returned ✓

Remove the goroutine-based deletion and the pod creation to eliminate
the race while preserving the context-selection assertion.

Signed-off-by: Terry Howe <terrylhowe@gmail.com>
---
 pkg/kube/statuswait_test.go | 29 +++++++----------------------
 1 file changed, 7 insertions(+), 22 deletions(-)

diff --git a/pkg/kube/statuswait_test.go b/pkg/kube/statuswait_test.go
index 3ce6b0fd6..73a424720 100644
--- a/pkg/kube/statuswait_test.go
+++ b/pkg/kube/statuswait_test.go
@@ -317,7 +317,7 @@ func TestStatusWaitForDelete(t *testing.T) {
 			t.Parallel()
 			c := newTestClient(t)
 			timeout := time.Second
-			timeUntilPodDelete := time.Millisecond * 100
+			timeUntilPodDelete := time.Millisecond * 500
 			fakeClient := dynamicfake.NewSimpleDynamicClient(scheme.Scheme)
 			fakeMapper := testutil.NewFakeRESTMapper(
 				v1.SchemeGroupVersion.WithKind("Pod"),
@@ -1680,8 +1680,6 @@ func TestMethodContextOverridesGeneralContext(t *testing.T) {
 	t.Run("method-specific context overrides general context for WaitForDelete", func(t *testing.T) {
 		t.Parallel()
 		c := newTestClient(t)
-		timeout := 5 * time.Second
-		timeUntilPodDelete := time.Millisecond * 100
 		fakeClient := dynamicfake.NewSimpleDynamicClient(scheme.Scheme)
 		fakeMapper := testutil.NewFakeRESTMapper(
 			v1.SchemeGroupVersion.WithKind("Pod"),
@@ -1698,27 +1696,14 @@ func TestMethodContextOverridesGeneralContext(t *testing.T) {
 			waitForDeleteCtx: context.Background(), // Not cancelled - should be used
 		}
 
+		// Use a non-existent resource: WaitForDelete should return immediately since
+		// the pod is already in the desired "deleted" state.
+		// This also validates context selection: if generalCtx (cancelled) were
+		// incorrectly used instead of waitForDeleteCtx, the watch context would be
+		// immediately cancelled and the call would return a context error.
 		objs := getRuntimeObjFromManifests(t, []string{podCurrentManifest})
-		for _, obj := range objs {
-			u := obj.(*unstructured.Unstructured)
-			gvr := getGVR(t, fakeMapper, u)
-			err := fakeClient.Tracker().Create(gvr, u, u.GetNamespace())
-			require.NoError(t, err)
-		}
-
-		// Schedule deletion
-		for _, obj := range objs {
-			u := obj.(*unstructured.Unstructured)
-			gvr := getGVR(t, fakeMapper, u)
-			go func(gvr schema.GroupVersionResource, u *unstructured.Unstructured) {
-				time.Sleep(timeUntilPodDelete)
-				err := fakeClient.Tracker().Delete(gvr, u.GetNamespace(), u.GetName())
-				assert.NoError(t, err)
-			}(gvr, u)
-		}
-
 		resourceList := getResourceListFromRuntimeObjs(t, c, objs)
-		err := sw.WaitForDelete(resourceList, timeout)
+		err := sw.WaitForDelete(resourceList, time.Second)
 		// Should succeed because method context is used and it's not cancelled
 		assert.NoError(t, err)
 	})