Merge pull request #11790 from Nordix/waitRetry

Add robustness to wait status checks
pull/12380/merge
Matt Farina 10 months ago committed by GitHub
commit 2dea5bf335
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -19,6 +19,7 @@ package kube // import "helm.sh/helm/v3/pkg/kube"
import ( import (
"context" "context"
"fmt" "fmt"
"net/http"
"time" "time"
"github.com/pkg/errors" "github.com/pkg/errors"
@ -32,6 +33,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
"k8s.io/cli-runtime/pkg/resource"
"k8s.io/apimachinery/pkg/util/wait" "k8s.io/apimachinery/pkg/util/wait"
) )
@ -50,10 +52,27 @@ func (w *waiter) waitForResources(created ResourceList) error {
ctx, cancel := context.WithTimeout(context.Background(), w.timeout) ctx, cancel := context.WithTimeout(context.Background(), w.timeout)
defer cancel() defer cancel()
numberOfErrors := make([]int, len(created))
for i := range numberOfErrors {
numberOfErrors[i] = 0
}
return wait.PollUntilContextCancel(ctx, 2*time.Second, true, func(ctx context.Context) (bool, error) { return wait.PollUntilContextCancel(ctx, 2*time.Second, true, func(ctx context.Context) (bool, error) {
for _, v := range created { waitRetries := 30
for i, v := range created {
ready, err := w.c.IsReady(ctx, v) ready, err := w.c.IsReady(ctx, v)
if !ready || err != nil {
if waitRetries > 0 && w.isRetryableError(err, v) {
numberOfErrors[i]++
if numberOfErrors[i] > waitRetries {
w.log("Max number of retries reached")
return false, err
}
w.log("Retrying as current number of retries %d less than max number of retries %d", numberOfErrors[i]-1, waitRetries)
return false, nil
}
numberOfErrors[i] = 0
if !ready {
return false, err return false, err
} }
} }
@ -61,6 +80,25 @@ func (w *waiter) waitForResources(created ResourceList) error {
}) })
} }
func (w *waiter) isRetryableError(err error, resource *resource.Info) bool {
if err == nil {
return false
}
w.log("Error received when checking status of resource %s. Error: '%s', Resource details: '%s'", resource.Name, err, resource)
if ev, ok := err.(*apierrors.StatusError); ok {
statusCode := ev.Status().Code
retryable := w.isRetryableHTTPStatusCode(statusCode)
w.log("Status code received: %d. Retryable error? %t", statusCode, retryable)
return retryable
}
w.log("Retryable error? %t", true)
return true
}
func (w *waiter) isRetryableHTTPStatusCode(httpStatusCode int32) bool {
return httpStatusCode == 0 || httpStatusCode == http.StatusTooManyRequests || (httpStatusCode >= 500 && httpStatusCode != http.StatusNotImplemented)
}
// waitForDeletedResources polls to check if all the resources are deleted or a timeout is reached // waitForDeletedResources polls to check if all the resources are deleted or a timeout is reached
func (w *waiter) waitForDeletedResources(deleted ResourceList) error { func (w *waiter) waitForDeletedResources(deleted ResourceList) error {
w.log("beginning wait for %d resources to be deleted with timeout of %v", len(deleted), w.timeout) w.log("beginning wait for %d resources to be deleted with timeout of %v", len(deleted), w.timeout)

Loading…
Cancel
Save