Return errors on upgrade when deletion fails

This is a rebase of https://github.com/helm/helm/pull/12299
as the pull request was tagged for Helm v4.

Closes: https://github.com/helm/helm/issues/11375

Related: https://github.com/helm/helm/pull/7929

It was a pain to reproduce, here is a script:

```
set -u

NS=default
RELEASE=test-release
CHART=./test-chart
SA=limited-helm-sa
HELM=${HELM:-./bin/helm}

echo "Helm: $($HELM version)"
echo "Cleaning…"
$HELM uninstall "$RELEASE" -n "$NS" >/dev/null 2>&1 || true
kubectl -n "$NS" delete sa "$SA" role "${SA}-role" rolebinding "${SA}-rb" >/dev/null 2>&1 || true
kubectl -n "$NS" delete cronjob "$RELEASE-test-chart-cronjob" >/dev/null 2>&1 || true
rm -rf "$CHART" /tmp/limited-helm-kubeconfig

echo "Create minimal chart with only a CronJob"
$HELM create "$CHART" >/dev/null
rm -f "$CHART"/templates/{deployment.yaml,service.yaml,hpa.yaml,tests/test-connection.yaml,serviceaccount.yaml}
cat > "$CHART/templates/cronjob.yaml" <<'YAML'
apiVersion: batch/v1
kind: CronJob
metadata:
  name: {{ include "test-chart.fullname" . }}-cronjob
spec:
  schedule: "*/5 * * * *"
  jobTemplate:
    spec:
      template:
        spec:
          restartPolicy: OnFailure
          containers:
          - name: hello
            image: busybox
            command: ["/bin/sh","-c","date; echo Hello from CronJob"]
YAML

echo "RBAC: allow Helm storage, basic reads/creates, but NO delete on cronjobs"
kubectl -n "$NS" apply -f - >/dev/null <<EOF
apiVersion: v1
kind: ServiceAccount
metadata:
  name: $SA
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
  name: ${SA}-role
rules:
- apiGroups: [""]
  resources: ["secrets","configmaps"]
  verbs: ["get","list","watch","create","patch","update","delete"]
- apiGroups: [""]
  resources: ["pods","events"]
  verbs: ["get","list","watch"]
- apiGroups: ["batch"]
  resources: ["cronjobs"]
  verbs: ["get","list","watch","create","patch","update"]
EOF
kubectl -n "$NS" apply -f - >/dev/null <<EOF
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
  name: ${SA}-rb
subjects:
- kind: ServiceAccount
  name: $SA
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: Role
  name: ${SA}-role
EOF

echo "Create kubeconfig for that SA"
TOKEN=$(kubectl -n "$NS" create token "$SA")
SERVER=$(kubectl config view --minify -o jsonpath='{.clusters[0].cluster.server}')
CA_DATA=$(kubectl config view --minify --raw -o jsonpath='{.clusters[0].cluster.certificate-authority-data}')
KCFG=/tmp/limited-helm-kubeconfig
cat > "$KCFG" <<EOF
apiVersion: v1
kind: Config
clusters:
- name: local
  cluster:
    server: $SERVER
    certificate-authority-data: $CA_DATA
contexts:
- name: limited
  context:
    cluster: local
    namespace: $NS
    user: $SA
current-context: limited
users:
- name: $SA
  user:
    token: $TOKEN
EOF

set +e

echo "Install (as limited SA)"
KUBECONFIG="$KCFG" $HELM upgrade --install "$RELEASE" "$CHART" -n "$NS" --wait
echo "CronJob after install:"
kubectl -n "$NS" get cronjob "$RELEASE-test-chart-cronjob" || true

echo "Remove CronJob from chart and add a small ConfigMap to force an upgrade"
rm -f "$CHART/templates/cronjob.yaml"
cat > "$CHART/templates/configmap.yaml" <<'YAML'
apiVersion: v1
kind: ConfigMap
metadata:
  name: {{ include "test-chart.fullname" . }}-config
data:
  hello: world
YAML

echo "Upgrade without CronJob (as limited SA)"
KUBECONFIG="$KCFG" $HELM upgrade --install "$RELEASE" "$CHART" -n "$NS"
RC=$?

echo "Post-upgrade verification"
if kubectl -n "$NS" get cronjob "$RELEASE-test-chart-cronjob" >/dev/null 2>&1; then
  echo "OK: Stale CronJob still present: $RELEASE-test-chart-cronjob"
else
  echo "NO_OK: CronJob deleted"
fi

echo "Helm exit code: $RC"
exit 0
```

With the current build:
```sh
./reproduce-helm-issue.sh
Helm: version.BuildInfo{Version:"v4.0+unreleased", GitCommit:"f19bb9cd4c99943f7a4980d6670de44affe3e472", GitTreeState:"dirty", GoVersion:"go1.24.0"}
Cleaning…
Create minimal chart with CronJob + ConfigMap (we will remove both in v2)
RBAC: allow Helm storage + delete for configmaps, but NO delete on cronjobs
Create kubeconfig for that SA
Install v1 (as limited SA)
Release "test-release" does not exist. Installing it now.
NAME: test-release
LAST DEPLOYED: Tue Oct 14 18:55:57 2025
NAMESPACE: default
STATUS: deployed
REVISION: 1
DESCRIPTION: Install complete
TEST SUITE: None
NOTES:
1. Get the application URL by running these commands:
export POD_NAME=$(kubectl get pods --namespace default -l "app.kubernetes.io/name=test-chart,app.kubernetes.io/instance=test-release" -o jsonpath="{.items[0].metadata.name}")
export CONTAINER_PORT=$(kubectl get pod --namespace default $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
echo "Visit http://127.0.0.1:8080 to use your application"
kubectl --namespace default port-forward $POD_NAME 8080:$CONTAINER_PORT
Verify v1 objects exist
NAME                              SCHEDULE      TIMEZONE   SUSPEND   ACTIVE   LAST SCHEDULE   AGE
test-release-test-chart-cronjob   */5 * * * *   <none>     False     0        <none>          0s
NAME                             DATA   AGE
test-release-test-chart-config   1      0s
Prepare v2: remove BOTH CronJob and ConfigMap from the chart
Upgrade to v2 (as limited SA) — expecting CronJob delete first, then ConfigMap
- CronJob delete should FAIL (no delete permission)
- ConfigMap delete should SUCCEED (delete allowed) — proves 'continue on error' and inverted order
level=DEBUG msg="getting history for release" release=test-release
level=DEBUG msg="getting release history" name=test-release
level=DEBUG msg="preparing upgrade" name=test-release
level=DEBUG msg="getting last revision" name=test-release
level=DEBUG msg="getting release history" name=test-release
level=DEBUG msg="number of dependencies in the chart" dependencies=0
level=DEBUG msg="determined release apply method" server_side_apply=true previous_release_apply_method=ssa
level=DEBUG msg="performing update" name=test-release
level=DEBUG msg="creating upgraded release" name=test-release
level=DEBUG msg="creating release" key=sh.helm.release.v1.test-release.v2
level=DEBUG msg="getting release history" name=test-release
level=DEBUG msg="using server-side apply for resource update" forceConflicts=false dryRun=false fieldValidationDirective=Strict upgradeClientSideFieldManager=false
level=DEBUG msg="checking resources for changes" resources=0
level=DEBUG msg="deleting resource" namespace=default name=test-release-test-chart-config kind=ConfigMap
level=DEBUG msg="deleting resource" namespace=default name=test-release-test-chart-cronjob kind=CronJob
level=DEBUG msg="failed to delete resource" namespace=default name=test-release-test-chart-cronjob kind=CronJob error="cronjobs.batch \"test-release-test-chart-cronjob\" is forbidden: User \"system:serviceaccount:default:limited-helm-sa\" cannot delete resource \"cronjobs\" in API group \"batch\" in the namespace \"default\""
level=INFO msg="update completed" created=0 updated=0 deleted=1
level=WARN msg="update completed with errors" errors=1
level=DEBUG msg="updating release" key=sh.helm.release.v1.test-release.v1
level=WARN msg="upgrade failed" name=test-release error="failed to delete resource test-release-test-chart-cronjob: cronjobs.batch \"test-release-test-chart-cronjob\" is forbidden: User \"system:serviceaccount:default:limited-helm-sa\" cannot delete resource \"cronjobs\" in API group \"batch\" in the namespace \"default\""
level=DEBUG msg="updating release" key=sh.helm.release.v1.test-release.v2
Error: UPGRADE FAILED: failed to delete resource test-release-test-chart-cronjob: cronjobs.batch "test-release-test-chart-cronjob" is forbidden: User "system:serviceaccount:default:limited-helm-sa" cannot delete resource "cronjobs" in API group "batch" in the namespace "default"
Post-upgrade verification
Stale CronJob still present: test-release-test-chart-cronjob (expected if delete is forbidden)
ConfigMap deleted as expected: test-release-test-chart-config (and after CronJob attempt)
Helm exit code: 1
```

With last version v3.19:
```
HELM=/usr/local/bin/helm ./reproduce-helm-issue.sh
Helm: version.BuildInfo{Version:"v3.19.0", GitCommit:"3d8990f0836691f0229297773f3524598f46bda6", GitTreeState:"clean", GoVersion:"go1.24.7"}
Cleaning…
Create minimal chart with only a CronJob
RBAC: allow Helm storage, basic reads/creates, but NO delete on cronjobs
Create kubeconfig for that SA
Install (as limited SA)
Release "test-release" does not exist. Installing it now.
NAME: test-release
LAST DEPLOYED: Tue Oct 14 19:07:01 2025
NAMESPACE: default
STATUS: deployed
REVISION: 1
TEST SUITE: None
NOTES:
1. Get the application URL by running these commands:
  export POD_NAME=$(kubectl get pods --namespace default -l "app.kubernetes.io/name=test-chart,app.kubernetes.io/instance=test-release" -o jsonpath="{.items[0].metadata.name}")
  export CONTAINER_PORT=$(kubectl get pod --namespace default $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
  echo "Visit http://127.0.0.1:8080 to use your application"
  kubectl --namespace default port-forward $POD_NAME 8080:$CONTAINER_PORT
CronJob after install:
NAME                              SCHEDULE      TIMEZONE   SUSPEND   ACTIVE   LAST SCHEDULE   AGE
test-release-test-chart-cronjob   */5 * * * *   <none>     False     0        <none>          0s
Remove CronJob from chart and add a small ConfigMap to force an upgrade
Upgrade without CronJob (as limited SA)
Release "test-release" has been upgraded. Happy Helming!
NAME: test-release
LAST DEPLOYED: Tue Oct 14 19:07:01 2025
NAMESPACE: default
STATUS: deployed
REVISION: 2
TEST SUITE: None
NOTES:
1. Get the application URL by running these commands:
  export POD_NAME=$(kubectl get pods --namespace default -l "app.kubernetes.io/name=test-chart,app.kubernetes.io/instance=test-release" -o jsonpath="{.items[0].metadata.name}")
  export CONTAINER_PORT=$(kubectl get pod --namespace default $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}")
  echo "Visit http://127.0.0.1:8080 to use your application"
  kubectl --namespace default port-forward $POD_NAME 8080:$CONTAINER_PORT
Post-upgrade verification
OK: Stale CronJob still present: test-release-test-chart-cronjob
Helm exit code: 0
```

Co-authored-by: dayeguilaiye <979014041@qq.com>
Signed-off-by: Benoit Tigeot <benoit.tigeot@lifen.fr>
pull/31393/head
Benoit Tigeot 2 months ago
parent f19bb9cd4c
commit 054eabddd7
No known key found for this signature in database
GPG Key ID: 8E6D4FC8AEBDA62C

@ -569,10 +569,17 @@ func (c *Client) update(originals, targets ResourceList, updateApplyFunc UpdateA
}
if err := deleteResource(info, metav1.DeletePropagationBackground); err != nil {
slog.Debug("failed to delete resource", "namespace", info.Namespace, "name", info.Name, "kind", info.Mapping.GroupVersionKind.Kind, slog.Any("error", err))
if !apierrors.IsNotFound(err) {
updateErrors = append(updateErrors, fmt.Errorf("failed to delete resource %s: %w", info.Name, err))
}
continue
}
res.Deleted = append(res.Deleted, info)
}
if len(updateErrors) != 0 {
return res, joinErrors(updateErrors, " && ")
}
return res, nil
}

@ -336,6 +336,8 @@ func TestUpdate(t *testing.T) {
"/namespaces/default/pods:POST", // retry due to 409
"/namespaces/default/pods/squid:GET",
"/namespaces/default/pods/squid:DELETE",
"/namespaces/default/pods/notfound:GET",
"/namespaces/default/pods/notfound:DELETE",
}
expectedActionsServerSideApply := []string{
@ -351,11 +353,13 @@ func TestUpdate(t *testing.T) {
"/namespaces/default/pods:POST", // retry due to 409
"/namespaces/default/pods/squid:GET",
"/namespaces/default/pods/squid:DELETE",
"/namespaces/default/pods/notfound:GET",
"/namespaces/default/pods/notfound:DELETE",
}
testCases := map[string]testCase{
"client-side apply": {
OriginalPods: newPodList("starfish", "otter", "squid"),
OriginalPods: newPodList("starfish", "otter", "squid", "notfound"),
TargetPods: func() v1.PodList {
listTarget := newPodList("starfish", "otter", "dolphin")
listTarget.Items[0].Spec.Containers[0].Ports = []v1.ContainerPort{{Name: "https", ContainerPort: 443}}
@ -367,7 +371,7 @@ func TestUpdate(t *testing.T) {
ExpectedActions: expectedActionsClientSideApply,
},
"client-side apply (three-way merge for unstructured)": {
OriginalPods: newPodList("starfish", "otter", "squid"),
OriginalPods: newPodList("starfish", "otter", "squid", "notfound"),
TargetPods: func() v1.PodList {
listTarget := newPodList("starfish", "otter", "dolphin")
listTarget.Items[0].Spec.Containers[0].Ports = []v1.ContainerPort{{Name: "https", ContainerPort: 443}}
@ -379,7 +383,7 @@ func TestUpdate(t *testing.T) {
ExpectedActions: expectedActionsClientSideApply,
},
"serverSideApply": {
OriginalPods: newPodList("starfish", "otter", "squid"),
OriginalPods: newPodList("starfish", "otter", "squid", "notfound"),
TargetPods: func() v1.PodList {
listTarget := newPodList("starfish", "otter", "dolphin")
listTarget.Items[0].Spec.Containers[0].Ports = []v1.ContainerPort{{Name: "https", ContainerPort: 443}}
@ -444,6 +448,12 @@ func TestUpdate(t *testing.T) {
return newResponse(http.StatusOK, &listTarget.Items[1])
case p == "/namespaces/default/pods/squid" && m == http.MethodGet:
return newResponse(http.StatusOK, &listTarget.Items[2])
case p == "/namespaces/default/pods/notfound" && m == http.MethodGet:
// Resource exists in original but will simulate not found on delete
return newResponse(http.StatusOK, &listOriginal.Items[3])
case p == "/namespaces/default/pods/notfound" && m == http.MethodDelete:
// Simulate a not found during deletion; should not cause update to fail
return newResponse(http.StatusNotFound, notFoundBody())
default:
}

Loading…
Cancel
Save