Merge pull request #32081 from TerryHowe/fix/statuswait-delete-race

fix(kube): prevent spurious early exit in WaitForDelete during informer sync
pull/32129/merge
George Jenkins 2 weeks ago committed by GitHub
commit fc2e27e448
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -167,7 +167,15 @@ func (w *statusWaiter) waitForDelete(ctx context.Context, resourceList ResourceL
rs.Identifier.GroupKind.Kind, rs.Identifier.Namespace, rs.Identifier.Name, rs.Status, rs.Message))
}
if err := ctx.Err(); err != nil {
errs = append(errs, err)
// context.Canceled and other non-deadline errors always propagate: they signal an
// external interruption regardless of resource state.
// context.DeadlineExceeded is only added when there are resource-specific errors;
// if all resources are Unknown or NotFound the timeout is not itself a failure for
// a delete wait (e.g. resources deleted before the watch started stay Unknown in
// the fake client but are effectively gone).
if !errors.Is(err, context.DeadlineExceeded) || len(errs) > 0 {
errs = append(errs, err)
}
}
if len(errs) > 0 {
return errors.Join(errs...)
@ -240,6 +248,7 @@ func statusObserver(cancel context.CancelFunc, desired status.Status, logger *sl
return func(statusCollector *collector.ResourceStatusCollector, _ event.Event) {
var rss []*event.ResourceStatus
var nonDesiredResources []*event.ResourceStatus
var unknownSkipped int
for _, rs := range statusCollector.ResourceStatuses {
if rs == nil {
continue
@ -247,6 +256,7 @@ func statusObserver(cancel context.CancelFunc, desired status.Status, logger *sl
// If a resource is already deleted before waiting has started, it will show as unknown.
// This check ensures we don't wait forever for a resource that is already deleted.
if rs.Status == status.UnknownStatus && desired == status.NotFoundStatus {
unknownSkipped++
continue
}
// Failed is a terminal state. This check ensures we don't wait forever for a resource
@ -260,6 +270,14 @@ func statusObserver(cancel context.CancelFunc, desired status.Status, logger *sl
}
}
// During informer initialization there is a brief window where existing resources
// appear as Unknown before their real status is delivered. If every resource was
// skipped as Unknown, we cannot yet distinguish "all deleted" from "not yet synced",
// so hold off on the early-cancel to avoid a spurious success or premature exit.
if unknownSkipped > 0 && len(rss) == 0 {
return
}
if aggregator.AggregateStatus(rss, desired) == desired {
logger.Debug("all resources achieved desired status", "desiredStatus", desired, "resourceCount", len(rss))
cancel()

Loading…
Cancel
Save