Merge pull request #9950 from jeffrosenberg/error-on-failed-jobs-9285

Throw an error from jobReady() if the job exceeds its BackoffLimit
pull/12208/head
Joe Julian 12 months ago committed by GitHub
commit 99e1dce8c8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -18,6 +18,7 @@ package kube // import "helm.sh/helm/v3/pkg/kube"
import (
"context"
"fmt"
appsv1 "k8s.io/api/apps/v1"
appsv1beta1 "k8s.io/api/apps/v1beta1"
@ -83,8 +84,8 @@ type ReadyChecker struct {
// IsReady checks if v is ready. It supports checking readiness for pods,
// deployments, persistent volume claims, services, daemon sets, custom
// resource definitions, stateful sets, replication controllers, and replica
// sets. All other resource kinds are always considered ready.
// resource definitions, stateful sets, replication controllers, jobs (optional),
// and replica sets. All other resource kinds are always considered ready.
//
// IsReady will fetch the latest state of the object from the server prior to
// performing readiness checks, and it will return any error encountered.
@ -105,9 +106,11 @@ func (c *ReadyChecker) IsReady(ctx context.Context, v *resource.Info) (bool, err
case *batchv1.Job:
if c.checkJobs {
job, err := c.client.BatchV1().Jobs(v.Namespace).Get(ctx, v.Name, metav1.GetOptions{})
if err != nil || !c.jobReady(job) {
if err != nil {
return false, err
}
ready, err := c.jobReady(job)
return ready, err
}
case *appsv1.Deployment, *appsv1beta1.Deployment, *appsv1beta2.Deployment, *extensionsv1beta1.Deployment:
currentDeployment, err := c.client.AppsV1().Deployments(v.Namespace).Get(ctx, v.Name, metav1.GetOptions{})
@ -222,16 +225,17 @@ func (c *ReadyChecker) isPodReady(pod *corev1.Pod) bool {
return false
}
func (c *ReadyChecker) jobReady(job *batchv1.Job) bool {
func (c *ReadyChecker) jobReady(job *batchv1.Job) (bool, error) {
if job.Status.Failed > *job.Spec.BackoffLimit {
c.log("Job is failed: %s/%s", job.GetNamespace(), job.GetName())
return false
// If a job is failed, it can't recover, so throw an error
return false, fmt.Errorf("job is failed: %s/%s", job.GetNamespace(), job.GetName())
}
if job.Spec.Completions != nil && job.Status.Succeeded < *job.Spec.Completions {
c.log("Job is not completed: %s/%s", job.GetNamespace(), job.GetName())
return false
return false, nil
}
return true
return true, nil
}
func (c *ReadyChecker) serviceReady(s *corev1.Service) bool {

@ -4,7 +4,6 @@ Copyright The Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
@ -270,44 +269,52 @@ func Test_ReadyChecker_jobReady(t *testing.T) {
job *batchv1.Job
}
tests := []struct {
name string
args args
want bool
name string
args args
want bool
wantErr bool
}{
{
name: "job is completed",
args: args{job: newJob("foo", 1, intToInt32(1), 1, 0)},
want: true,
name: "job is completed",
args: args{job: newJob("foo", 1, intToInt32(1), 1, 0)},
want: true,
wantErr: false,
},
{
name: "job is incomplete",
args: args{job: newJob("foo", 1, intToInt32(1), 0, 0)},
want: false,
name: "job is incomplete",
args: args{job: newJob("foo", 1, intToInt32(1), 0, 0)},
want: false,
wantErr: false,
},
{
name: "job is failed",
args: args{job: newJob("foo", 1, intToInt32(1), 0, 1)},
want: false,
name: "job is failed but within BackoffLimit",
args: args{job: newJob("foo", 1, intToInt32(1), 0, 1)},
want: false,
wantErr: false,
},
{
name: "job is completed with retry",
args: args{job: newJob("foo", 1, intToInt32(1), 1, 1)},
want: true,
name: "job is completed with retry",
args: args{job: newJob("foo", 1, intToInt32(1), 1, 1)},
want: true,
wantErr: false,
},
{
name: "job is failed with retry",
args: args{job: newJob("foo", 1, intToInt32(1), 0, 2)},
want: false,
name: "job is failed and beyond BackoffLimit",
args: args{job: newJob("foo", 1, intToInt32(1), 0, 2)},
want: false,
wantErr: true,
},
{
name: "job is completed single run",
args: args{job: newJob("foo", 0, intToInt32(1), 1, 0)},
want: true,
name: "job is completed single run",
args: args{job: newJob("foo", 0, intToInt32(1), 1, 0)},
want: true,
wantErr: false,
},
{
name: "job is failed single run",
args: args{job: newJob("foo", 0, intToInt32(1), 0, 1)},
want: false,
name: "job is failed single run",
args: args{job: newJob("foo", 0, intToInt32(1), 0, 1)},
want: false,
wantErr: true,
},
{
name: "job with null completions",
@ -318,7 +325,12 @@ func Test_ReadyChecker_jobReady(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
c := NewReadyChecker(fake.NewSimpleClientset(), nil)
if got := c.jobReady(tt.args.job); got != tt.want {
got, err := c.jobReady(tt.args.job)
if (err != nil) != tt.wantErr {
t.Errorf("jobReady() error = %v, wantErr %v", err, tt.wantErr)
return
}
if got != tt.want {
t.Errorf("jobReady() = %v, want %v", got, tt.want)
}
})

Loading…
Cancel
Save