diff --git a/cmd/helm/install.go b/cmd/helm/install.go index 719dc9014..6cad1d268 100644 --- a/cmd/helm/install.go +++ b/cmd/helm/install.go @@ -139,6 +139,7 @@ func addInstallFlags(f *pflag.FlagSet, client *action.Install, valueOpts *values f.BoolVar(&client.CreateNamespace, "create-namespace", false, "create the release namespace if not present") f.BoolVar(&client.DryRun, "dry-run", false, "simulate an install") f.BoolVar(&client.DisableHooks, "no-hooks", false, "prevent hooks from running during install") + f.IntVar(&client.HookParallelism, "hook-parallelism", 1, "maximum number of hooks to execute in parallel") f.BoolVar(&client.Replace, "replace", false, "re-use the given name, only if that name is a deleted release which remains in the history. This is unsafe in production") f.DurationVar(&client.Timeout, "timeout", 300*time.Second, "time to wait for any individual Kubernetes operation (like Jobs for hooks)") f.BoolVar(&client.Wait, "wait", false, "if set, will wait until all Pods, PVCs, Services, and minimum number of Pods of a Deployment, StatefulSet, or ReplicaSet are in a ready state before marking the release as successful. It will wait for as long as --timeout") diff --git a/cmd/helm/release_testing.go b/cmd/helm/release_testing.go index e4690b9d4..840b22d41 100644 --- a/cmd/helm/release_testing.go +++ b/cmd/helm/release_testing.go @@ -82,6 +82,7 @@ func newReleaseTestCmd(cfg *action.Configuration, out io.Writer) *cobra.Command f := cmd.Flags() f.DurationVar(&client.Timeout, "timeout", 300*time.Second, "time to wait for any individual Kubernetes operation (like Jobs for hooks)") + f.IntVar(&client.HookParallelism, "hook-parallelism", 1, "maximum number of hooks to execute in parallel") f.BoolVar(&outputLogs, "logs", false, "Dump the logs from test pods (this runs after all tests are complete, but before any cleanup)") return cmd diff --git a/cmd/helm/rollback.go b/cmd/helm/rollback.go index 745e910b2..ec78f8aa0 100644 --- a/cmd/helm/rollback.go +++ b/cmd/helm/rollback.go @@ -78,6 +78,7 @@ func newRollbackCmd(cfg *action.Configuration, out io.Writer) *cobra.Command { f.BoolVar(&client.Recreate, "recreate-pods", false, "performs pods restart for the resource if applicable") f.BoolVar(&client.Force, "force", false, "force resource update through delete/recreate if needed") f.BoolVar(&client.DisableHooks, "no-hooks", false, "prevent hooks from running during rollback") + f.IntVar(&client.HookParallelism, "hook-parallelism", 1, "maximum number of hooks to execute in parallel") f.DurationVar(&client.Timeout, "timeout", 300*time.Second, "time to wait for any individual Kubernetes operation (like Jobs for hooks)") f.BoolVar(&client.Wait, "wait", false, "if set, will wait until all Pods, PVCs, Services, and minimum number of Pods of a Deployment, StatefulSet, or ReplicaSet are in a ready state before marking the release as successful. It will wait for as long as --timeout") f.BoolVar(&client.CleanupOnFail, "cleanup-on-fail", false, "allow deletion of new resources created in this rollback when rollback fails") diff --git a/cmd/helm/uninstall.go b/cmd/helm/uninstall.go index 85fa822bd..24ffe501c 100644 --- a/cmd/helm/uninstall.go +++ b/cmd/helm/uninstall.go @@ -76,6 +76,7 @@ func newUninstallCmd(cfg *action.Configuration, out io.Writer) *cobra.Command { f := cmd.Flags() f.BoolVar(&client.DryRun, "dry-run", false, "simulate a uninstall") f.BoolVar(&client.DisableHooks, "no-hooks", false, "prevent hooks from running during uninstallation") + f.IntVar(&client.HookParallelism, "hook-parallelism", 1, "maximum number of hooks to execute in parallel") f.BoolVar(&client.KeepHistory, "keep-history", false, "remove all associated resources and mark the release as deleted, but retain the release history") f.DurationVar(&client.Timeout, "timeout", 300*time.Second, "time to wait for any individual Kubernetes operation (like Jobs for hooks)") f.StringVar(&client.Description, "description", "", "add a custom description") diff --git a/cmd/helm/upgrade.go b/cmd/helm/upgrade.go index af8ff68e3..ce52f572b 100644 --- a/cmd/helm/upgrade.go +++ b/cmd/helm/upgrade.go @@ -104,6 +104,7 @@ func newUpgradeCmd(cfg *action.Configuration, out io.Writer) *cobra.Command { instClient.ChartPathOptions = client.ChartPathOptions instClient.DryRun = client.DryRun instClient.DisableHooks = client.DisableHooks + instClient.HookParallelism = client.HookParallelism instClient.SkipCRDs = client.SkipCRDs instClient.Timeout = client.Timeout instClient.Wait = client.Wait @@ -171,6 +172,7 @@ func newUpgradeCmd(cfg *action.Configuration, out io.Writer) *cobra.Command { f.MarkDeprecated("recreate-pods", "functionality will no longer be updated. Consult the documentation for other methods to recreate pods") f.BoolVar(&client.Force, "force", false, "force resource updates through a replacement strategy") f.BoolVar(&client.DisableHooks, "no-hooks", false, "disable pre/post upgrade hooks") + f.IntVar(&client.HookParallelism, "hook-parallelism", 1, "maximum number of hooks to execute in parallel") f.BoolVar(&client.DisableOpenAPIValidation, "disable-openapi-validation", false, "if set, the upgrade process will not validate rendered templates against the Kubernetes OpenAPI Schema") f.BoolVar(&client.SkipCRDs, "skip-crds", false, "if set, no CRDs will be installed when an upgrade is performed with install flag enabled. By default, CRDs are installed if not already present, when an upgrade is performed with install flag enabled") f.DurationVar(&client.Timeout, "timeout", 300*time.Second, "time to wait for any individual Kubernetes operation (like Jobs for hooks)") diff --git a/pkg/action/hooks.go b/pkg/action/hooks.go index a161f9377..b3d369608 100644 --- a/pkg/action/hooks.go +++ b/pkg/action/hooks.go @@ -18,6 +18,7 @@ package action import ( "bytes" "sort" + "sync" "time" "github.com/pkg/errors" @@ -26,99 +27,143 @@ import ( helmtime "helm.sh/helm/v3/pkg/time" ) -// execHook executes all of the hooks for the given hook event. -func (cfg *Configuration) execHook(rl *release.Release, hook release.HookEvent, timeout time.Duration) error { - executingHooks := []*release.Hook{} +// execHookEvent executes all of the hooks for the given hook event. +func (cfg *Configuration) execHookEvent(rl *release.Release, event release.HookEvent, timeout time.Duration, parallelism int) error { + if parallelism < 1 { + parallelism = 1 + } + weightedHooks := make(map[int][]*release.Hook) for _, h := range rl.Hooks { for _, e := range h.Events { - if e == hook { - executingHooks = append(executingHooks, h) + if e == event { + // Set default delete policy to before-hook-creation + if h.DeletePolicies == nil || len(h.DeletePolicies) == 0 { + // TODO(jlegrone): Only apply before-hook-creation delete policy to run to completion + // resources. For all other resource types update in place if a + // resource with the same name already exists and is owned by the + // current release. + h.DeletePolicies = []release.HookDeletePolicy{release.HookBeforeHookCreation} + } + weightedHooks[h.Weight] = append(weightedHooks[h.Weight], h) } } } - - sort.Sort(hookByWeight(executingHooks)) - - for _, h := range executingHooks { - // Set default delete policy to before-hook-creation - if h.DeletePolicies == nil || len(h.DeletePolicies) == 0 { - // TODO(jlegrone): Only apply before-hook-creation delete policy to run to completion - // resources. For all other resource types update in place if a - // resource with the same name already exists and is owned by the - // current release. - h.DeletePolicies = []release.HookDeletePolicy{release.HookBeforeHookCreation} - } - - if err := cfg.deleteHookByPolicy(h, release.HookBeforeHookCreation); err != nil { - return err - } - - resources, err := cfg.KubeClient.Build(bytes.NewBufferString(h.Manifest), true) - if err != nil { - return errors.Wrapf(err, "unable to build kubernetes object for %s hook %s", hook, h.Path) + var weights []int + for w := range weightedHooks { + weights = append(weights, w) + // sort hooks in each weighted group by name + sort.Slice(weightedHooks[w], func(i, j int) bool { + return weightedHooks[w][i].Name < weightedHooks[w][j].Name + }) + } + sort.Ints(weights) + + var mut sync.RWMutex + for _, w := range weights { + sem := make(chan int, parallelism) + errsChan := make(chan error) + errs := make([]error, 0) + for _, h := range weightedHooks[w] { + // execute hooks in parallel (with limited parallelism enforced by semaphore) + go func(h *release.Hook) { + sem <- 1 + errsChan <- cfg.execHook(rl, h, &mut, timeout) + <-sem + }(h) } - // Record the time at which the hook was applied to the cluster - h.LastRun = release.HookExecution{ - StartedAt: helmtime.Now(), - Phase: release.HookPhaseRunning, + // collect errors + for range weightedHooks[w] { + if err := <-errsChan; err != nil { + errs = append(errs, err) + } } - cfg.recordRelease(rl) - - // As long as the implementation of WatchUntilReady does not panic, HookPhaseFailed or HookPhaseSucceeded - // should always be set by this function. If we fail to do that for any reason, then HookPhaseUnknown is - // the most appropriate value to surface. - h.LastRun.Phase = release.HookPhaseUnknown - - // Create hook resources - if _, err := cfg.KubeClient.Create(resources); err != nil { - h.LastRun.CompletedAt = helmtime.Now() - h.LastRun.Phase = release.HookPhaseFailed - return errors.Wrapf(err, "warning: Hook %s %s failed", hook, h.Path) + if len(errs) > 0 { + return errors.Errorf("%s hook event failed with %d error(s): %s", event, len(errs), joinErrors(errs)) } + } - // Watch hook resources until they have completed - err = cfg.KubeClient.WatchUntilReady(resources, timeout) - // Note the time of success/failure - h.LastRun.CompletedAt = helmtime.Now() - // Mark hook as succeeded or failed - if err != nil { - h.LastRun.Phase = release.HookPhaseFailed - // If a hook is failed, check the annotation of the hook to determine whether the hook should be deleted - // under failed condition. If so, then clear the corresponding resource object in the hook - if err := cfg.deleteHookByPolicy(h, release.HookFailed); err != nil { + // If all hooks are successful, check the annotation of each hook to determine whether the hook should be deleted + // under succeeded condition. If so, then clear the corresponding resource object in each hook + for _, w := range weights { + for _, h := range weightedHooks[w] { + if err := cfg.deleteHookByPolicy(h, release.HookSucceeded); err != nil { return err } - return err } - h.LastRun.Phase = release.HookPhaseSucceeded } - // If all hooks are successful, check the annotation of each hook to determine whether the hook should be deleted - // under succeeded condition. If so, then clear the corresponding resource object in each hook - for _, h := range executingHooks { - if err := cfg.deleteHookByPolicy(h, release.HookSucceeded); err != nil { - return err + return nil +} + +// execHook executes a hook. +func (cfg *Configuration) execHook(rl *release.Release, h *release.Hook, mut *sync.RWMutex, timeout time.Duration) (err error) { + if err := cfg.deleteHookByPolicy(h, release.HookBeforeHookCreation); err != nil { + return err + } + + resources, err := cfg.KubeClient.Build(bytes.NewBufferString(h.Manifest), true) + if err != nil { + return errors.Wrapf(err, "unable to build kubernetes object for applying hook %s", h.Path) + } + + // Record the time at which the hook was applied to the cluster + updateHookPhase(h, mut, release.HookPhaseRunning) + // Thread safety: exclusive lock is necessary to ensure that none of the hook structs are modified during recordRelease + mut.Lock() + cfg.recordRelease(rl) + mut.Unlock() + + // As long as the implementation of WatchUntilReady does not panic, HookPhaseFailed or HookPhaseSucceeded + // should always be set by this function. If we fail to do that for any reason, then HookPhaseUnknown is + // the most appropriate value to surface. + defer func() { + if panic := recover(); panic != nil { + updateHookPhase(h, mut, release.HookPhaseUnknown) + err = errors.Errorf("panicked while executing hook %s", h.Path) } + }() + + // Create hook resources + if _, err := cfg.KubeClient.Create(resources); err != nil { + updateHookPhase(h, mut, release.HookPhaseFailed) + return errors.Wrapf(err, "warning: hook %s failed", h.Path) } + // Watch hook resources until they have completed + err = cfg.KubeClient.WatchUntilReady(resources, timeout) + // Mark hook as succeeded or failed + if err != nil { + updateHookPhase(h, mut, release.HookPhaseFailed) + // If a hook is failed, check the annotation of the hook to determine whether the hook should be deleted + // under failed condition. If so, then clear the corresponding resource object in the hook. + if err := cfg.deleteHookByPolicy(h, release.HookFailed); err != nil { + return err + } + return err + } + updateHookPhase(h, mut, release.HookPhaseSucceeded) return nil } -// hookByWeight is a sorter for hooks -type hookByWeight []*release.Hook - -func (x hookByWeight) Len() int { return len(x) } -func (x hookByWeight) Swap(i, j int) { x[i], x[j] = x[j], x[i] } -func (x hookByWeight) Less(i, j int) bool { - if x[i].Weight == x[j].Weight { - return x[i].Name < x[j].Name +// updateHookPhase updates the phase of a hook in a thread-safe manner. +func updateHookPhase(h *release.Hook, mut *sync.RWMutex, phase release.HookPhase) { + // Thread safety: shared lock is sufficient because each execHook goroutine operates on a different hook + completedAtTime := helmtime.Now() + mut.RLock() + startedAtTime := helmtime.Now() + switch phase { + case release.HookPhaseRunning: + h.LastRun.StartedAt = startedAtTime + case release.HookPhaseSucceeded, release.HookPhaseFailed: + h.LastRun.CompletedAt = completedAtTime } - return x[i].Weight < x[j].Weight + h.LastRun.Phase = phase + mut.RUnlock() } -// deleteHookByPolicy deletes a hook if the hook policy instructs it to +// deleteHookByPolicy deletes a hook if the hook policy instructs it to. func (cfg *Configuration) deleteHookByPolicy(h *release.Hook, policy release.HookDeletePolicy) error { // Never delete CustomResourceDefinitions; this could cause lots of // cascading garbage collection. diff --git a/pkg/action/install.go b/pkg/action/install.go index 4b4dd9214..3eaf18ed3 100644 --- a/pkg/action/install.go +++ b/pkg/action/install.go @@ -76,6 +76,7 @@ type Install struct { CreateNamespace bool DryRun bool DisableHooks bool + HookParallelism int Replace bool Wait bool Devel bool @@ -322,7 +323,7 @@ func (i *Install) Run(chrt *chart.Chart, vals map[string]interface{}) (*release. // pre-install hooks if !i.DisableHooks { - if err := i.cfg.execHook(rel, release.HookPreInstall, i.Timeout); err != nil { + if err := i.cfg.execHookEvent(rel, release.HookPreInstall, i.Timeout, i.HookParallelism); err != nil { return i.failRelease(rel, fmt.Errorf("failed pre-install: %s", err)) } } @@ -348,7 +349,7 @@ func (i *Install) Run(chrt *chart.Chart, vals map[string]interface{}) (*release. } if !i.DisableHooks { - if err := i.cfg.execHook(rel, release.HookPostInstall, i.Timeout); err != nil { + if err := i.cfg.execHookEvent(rel, release.HookPostInstall, i.Timeout, i.HookParallelism); err != nil { return i.failRelease(rel, fmt.Errorf("failed post-install: %s", err)) } } @@ -379,6 +380,7 @@ func (i *Install) failRelease(rel *release.Release, err error) (*release.Release i.cfg.Log("Install failed and atomic is set, uninstalling release") uninstall := NewUninstall(i.cfg) uninstall.DisableHooks = i.DisableHooks + uninstall.HookParallelism = i.HookParallelism uninstall.KeepHistory = false uninstall.Timeout = i.Timeout if _, uninstallErr := uninstall.Run(i.ReleaseName); uninstallErr != nil { diff --git a/pkg/action/release_testing.go b/pkg/action/release_testing.go index b7a1da757..4b024c3b1 100644 --- a/pkg/action/release_testing.go +++ b/pkg/action/release_testing.go @@ -31,8 +31,9 @@ import ( // // It provides the implementation of 'helm test'. type ReleaseTesting struct { - cfg *Configuration - Timeout time.Duration + cfg *Configuration + Timeout time.Duration + HookParallelism int // Used for fetching logs from test pods Namespace string } @@ -60,7 +61,7 @@ func (r *ReleaseTesting) Run(name string) (*release.Release, error) { return rel, err } - if err := r.cfg.execHook(rel, release.HookTest, r.Timeout); err != nil { + if err := r.cfg.execHookEvent(rel, release.HookTest, r.Timeout, r.HookParallelism); err != nil { r.cfg.Releases.Update(rel) return rel, err } diff --git a/pkg/action/rollback.go b/pkg/action/rollback.go index 942c9d8af..7ace309c2 100644 --- a/pkg/action/rollback.go +++ b/pkg/action/rollback.go @@ -34,14 +34,15 @@ import ( type Rollback struct { cfg *Configuration - Version int - Timeout time.Duration - Wait bool - DisableHooks bool - DryRun bool - Recreate bool // will (if true) recreate pods after a rollback. - Force bool // will (if true) force resource upgrade through uninstall/recreate if needed - CleanupOnFail bool + Version int + Timeout time.Duration + Wait bool + DisableHooks bool + HookParallelism int + DryRun bool + Recreate bool // will (if true) recreate pods after a rollback. + Force bool // will (if true) force resource upgrade through uninstall/recreate if needed + CleanupOnFail bool } // NewRollback creates a new Rollback object with the given configuration. @@ -152,7 +153,7 @@ func (r *Rollback) performRollback(currentRelease, targetRelease *release.Releas // pre-rollback hooks if !r.DisableHooks { - if err := r.cfg.execHook(targetRelease, release.HookPreRollback, r.Timeout); err != nil { + if err := r.cfg.execHookEvent(targetRelease, release.HookPreRollback, r.Timeout, r.HookParallelism); err != nil { return targetRelease, err } } else { @@ -205,7 +206,7 @@ func (r *Rollback) performRollback(currentRelease, targetRelease *release.Releas // post-rollback hooks if !r.DisableHooks { - if err := r.cfg.execHook(targetRelease, release.HookPostRollback, r.Timeout); err != nil { + if err := r.cfg.execHookEvent(targetRelease, release.HookPostRollback, r.Timeout, r.HookParallelism); err != nil { return targetRelease, err } } diff --git a/pkg/action/uninstall.go b/pkg/action/uninstall.go index dfaa98472..0182c6e06 100644 --- a/pkg/action/uninstall.go +++ b/pkg/action/uninstall.go @@ -33,11 +33,12 @@ import ( type Uninstall struct { cfg *Configuration - DisableHooks bool - DryRun bool - KeepHistory bool - Timeout time.Duration - Description string + DisableHooks bool + HookParallelism int + DryRun bool + KeepHistory bool + Timeout time.Duration + Description string } // NewUninstall creates a new Uninstall object with the given configuration. @@ -96,7 +97,7 @@ func (u *Uninstall) Run(name string) (*release.UninstallReleaseResponse, error) res := &release.UninstallReleaseResponse{Release: rel} if !u.DisableHooks { - if err := u.cfg.execHook(rel, release.HookPreDelete, u.Timeout); err != nil { + if err := u.cfg.execHookEvent(rel, release.HookPreDelete, u.Timeout, u.HookParallelism); err != nil { return res, err } } else { @@ -113,7 +114,7 @@ func (u *Uninstall) Run(name string) (*release.UninstallReleaseResponse, error) res.Info = kept if !u.DisableHooks { - if err := u.cfg.execHook(rel, release.HookPostDelete, u.Timeout); err != nil { + if err := u.cfg.execHookEvent(rel, release.HookPostDelete, u.Timeout, u.HookParallelism); err != nil { errs = append(errs, err) } } diff --git a/pkg/action/upgrade.go b/pkg/action/upgrade.go index c8e71c6d4..03ed5366c 100644 --- a/pkg/action/upgrade.go +++ b/pkg/action/upgrade.go @@ -46,14 +46,15 @@ type Upgrade struct { Devel bool Namespace string // SkipCRDs skip installing CRDs when install flag is enabled during upgrade - SkipCRDs bool - Timeout time.Duration - Wait bool - DisableHooks bool - DryRun bool - Force bool - ResetValues bool - ReuseValues bool + SkipCRDs bool + Timeout time.Duration + Wait bool + DisableHooks bool + HookParallelism int + DryRun bool + Force bool + ResetValues bool + ReuseValues bool // Recreate will (if true) recreate pods after a rollback. Recreate bool // MaxHistory limits the maximum number of revisions saved per release @@ -258,7 +259,7 @@ func (u *Upgrade) performUpgrade(originalRelease, upgradedRelease *release.Relea // pre-upgrade hooks if !u.DisableHooks { - if err := u.cfg.execHook(upgradedRelease, release.HookPreUpgrade, u.Timeout); err != nil { + if err := u.cfg.execHookEvent(upgradedRelease, release.HookPreUpgrade, u.Timeout, u.HookParallelism); err != nil { return u.failRelease(upgradedRelease, kube.ResourceList{}, fmt.Errorf("pre-upgrade hooks failed: %s", err)) } } else { @@ -290,7 +291,7 @@ func (u *Upgrade) performUpgrade(originalRelease, upgradedRelease *release.Relea // post-upgrade hooks if !u.DisableHooks { - if err := u.cfg.execHook(upgradedRelease, release.HookPostUpgrade, u.Timeout); err != nil { + if err := u.cfg.execHookEvent(upgradedRelease, release.HookPostUpgrade, u.Timeout, u.HookParallelism); err != nil { return u.failRelease(upgradedRelease, results.Created, fmt.Errorf("post-upgrade hooks failed: %s", err)) } } @@ -354,6 +355,7 @@ func (u *Upgrade) failRelease(rel *release.Release, created kube.ResourceList, e rollin.Version = filteredHistory[0].Version rollin.Wait = true rollin.DisableHooks = u.DisableHooks + rollin.HookParallelism = u.HookParallelism rollin.Recreate = u.Recreate rollin.Force = u.Force rollin.Timeout = u.Timeout