diff --git a/cmd/helm/dependency_build.go b/cmd/helm/dependency_build.go index 2cf0c6c81..c89e4990c 100644 --- a/cmd/helm/dependency_build.go +++ b/cmd/helm/dependency_build.go @@ -26,8 +26,10 @@ import ( "helm.sh/helm/v3/cmd/helm/require" "helm.sh/helm/v3/pkg/action" + "helm.sh/helm/v3/pkg/cache" "helm.sh/helm/v3/pkg/downloader" "helm.sh/helm/v3/pkg/getter" + "helm.sh/helm/v3/pkg/repo" ) const dependencyBuildDesc = ` @@ -54,6 +56,7 @@ func newDependencyBuildCmd(cfg *action.Configuration, out io.Writer) *cobra.Comm if len(args) > 0 { chartpath = filepath.Clean(args[0]) } + var c cache.Cache[*repo.IndexFile] = cache.NewConcurrentMapCache[*repo.IndexFile]() man := &downloader.Manager{ Out: out, ChartPath: chartpath, @@ -64,6 +67,7 @@ func newDependencyBuildCmd(cfg *action.Configuration, out io.Writer) *cobra.Comm RepositoryConfig: settings.RepositoryConfig, RepositoryCache: settings.RepositoryCache, Debug: settings.Debug, + IndexFileCache: &c, } if client.Verify { man.Verify = downloader.VerifyIfPossible diff --git a/cmd/helm/dependency_update.go b/cmd/helm/dependency_update.go index cb6e9c0cc..9bf9fb313 100644 --- a/cmd/helm/dependency_update.go +++ b/cmd/helm/dependency_update.go @@ -23,8 +23,10 @@ import ( "helm.sh/helm/v3/cmd/helm/require" "helm.sh/helm/v3/pkg/action" + "helm.sh/helm/v3/pkg/cache" "helm.sh/helm/v3/pkg/downloader" "helm.sh/helm/v3/pkg/getter" + "helm.sh/helm/v3/pkg/repo" ) const dependencyUpDesc = ` @@ -57,6 +59,9 @@ func newDependencyUpdateCmd(cfg *action.Configuration, out io.Writer) *cobra.Com if len(args) > 0 { chartpath = filepath.Clean(args[0]) } + // the helm client cli is run as a short-lived process so it's ok to use a simple + // implementation for a strictly growing cache + var c cache.Cache[*repo.IndexFile] = cache.NewConcurrentMapCache[*repo.IndexFile]() man := &downloader.Manager{ Out: out, ChartPath: chartpath, @@ -67,6 +72,7 @@ func newDependencyUpdateCmd(cfg *action.Configuration, out io.Writer) *cobra.Com RepositoryConfig: settings.RepositoryConfig, RepositoryCache: settings.RepositoryCache, Debug: settings.Debug, + IndexFileCache: &c, } if client.Verify { man.Verify = downloader.VerifyAlways diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index c5fc63643..7a7edd74b 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -27,6 +27,7 @@ import ( "github.com/Masterminds/semver/v3" "github.com/pkg/errors" + "helm.sh/helm/v3/pkg/cache" "helm.sh/helm/v3/pkg/chart" "helm.sh/helm/v3/pkg/chart/loader" "helm.sh/helm/v3/pkg/helmpath" @@ -51,9 +52,8 @@ func New(chartpath, cachepath string, registryClient *registry.Client) *Resolver } } -// Resolve resolves dependencies and returns a lock file with the resolution. -func (r *Resolver) Resolve(reqs []*chart.Dependency, repoNames map[string]string) (*chart.Lock, error) { - +// Resolve resolves dependencies with a cache and returns a lock file with the resolution. +func (r *Resolver) Resolve(reqs []*chart.Dependency, repoNames map[string]string, c *cache.Cache[*repo.IndexFile]) (*chart.Lock, error) { // Now we clone the dependencies, locking as we go. locked := make([]*chart.Dependency, len(reqs)) missing := []string{} @@ -122,7 +122,7 @@ func (r *Resolver) Resolve(reqs []*chart.Dependency, repoNames map[string]string var ok bool found := true if !registry.IsOCI(d.Repository) { - repoIndex, err := repo.LoadIndexFile(filepath.Join(r.cachepath, helmpath.CacheIndexFile(repoName))) + repoIndex, err := repo.LoadIndexFileWithCache(filepath.Join(r.cachepath, helmpath.CacheIndexFile(repoName)), c) if err != nil { return nil, errors.Wrapf(err, "no cached repository for %s found. (try 'helm repo update')", repoName) } diff --git a/internal/resolver/resolver_test.go b/internal/resolver/resolver_test.go index a79852175..108a6a489 100644 --- a/internal/resolver/resolver_test.go +++ b/internal/resolver/resolver_test.go @@ -144,7 +144,7 @@ func TestResolve(t *testing.T) { r := New("testdata/chartpath", "testdata/repository", registryClient) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - l, err := r.Resolve(tt.req, repoNames) + l, err := r.Resolve(tt.req, repoNames, nil) if err != nil { if tt.err { return diff --git a/pkg/cache/cache.go b/pkg/cache/cache.go new file mode 100644 index 000000000..b66841ea1 --- /dev/null +++ b/pkg/cache/cache.go @@ -0,0 +1,57 @@ +package cache + +import ( + "fmt" + "sync" +) + +// Cache interface defines the methods for a cache +type Cache[V any] interface { + // Set adds an item to the cache + Set(key string, value V) + + // Get retrieves an item from the cache + // The boolean return value indicates whether the key was found + Get(key string) (V, bool) +} + +// NoOpCache implements Cache interface with no-op operations +type NoOpCache[V any] struct{} + +func NewNoOpCache[V any]() *NoOpCache[V] { + return &NoOpCache[V]{} +} + +func (c *NoOpCache[V]) Set(key string, value V) {} + +func (c *NoOpCache[V]) Get(key string) (V, bool) { + var zero V + return zero, false +} + +// ConcurrentMapCache implements Cache interface using a concurrent map +type ConcurrentMapCache[V any] struct { + items map[string]V + mu sync.RWMutex +} + +func NewConcurrentMapCache[V any]() *ConcurrentMapCache[V] { + return &ConcurrentMapCache[V]{ + items: make(map[string]V), + } +} + +func (c *ConcurrentMapCache[V]) Set(key string, value V) { + c.mu.Lock() + defer c.mu.Unlock() + c.items[key] = value + fmt.Println("set", key) +} + +func (c *ConcurrentMapCache[V]) Get(key string) (V, bool) { + c.mu.RLock() + defer c.mu.RUnlock() + value, exists := c.items[key] + fmt.Println("get", key, "exists", exists) + return value, exists +} diff --git a/pkg/cache/cache_test.go b/pkg/cache/cache_test.go new file mode 100644 index 000000000..3f7bb7580 --- /dev/null +++ b/pkg/cache/cache_test.go @@ -0,0 +1,52 @@ +package cache + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestNoOpCache(t *testing.T) { + cache := NewNoOpCache[string]() + + t.Run("Set", func(t *testing.T) { + cache.Set("key", "value") + // No assertion needed as it's a no-op + }) + + t.Run("Get", func(t *testing.T) { + value, exists := cache.Get("key") + assert.False(t, exists) + assert.Empty(t, value) + }) +} + +func TestConcurrentMapCache(t *testing.T) { + cache := NewConcurrentMapCache[int]() + + t.Run("Set and Get", func(t *testing.T) { + cache.Set("key1", 42) + cache.Set("key2", 84) + + value1, exists1 := cache.Get("key1") + assert.True(t, exists1) + assert.Equal(t, 42, value1) + + value2, exists2 := cache.Get("key2") + assert.True(t, exists2) + assert.Equal(t, 84, value2) + }) + + t.Run("Get non-existent key", func(t *testing.T) { + value, exists := cache.Get("non-existent") + assert.False(t, exists) + assert.Zero(t, value) + }) + + t.Run("Overwrite existing key", func(t *testing.T) { + cache.Set("key1", 100) + value, exists := cache.Get("key1") + assert.True(t, exists) + assert.Equal(t, 100, value) + }) +} diff --git a/pkg/downloader/manager.go b/pkg/downloader/manager.go index d5340575d..0dfdd0f32 100644 --- a/pkg/downloader/manager.go +++ b/pkg/downloader/manager.go @@ -36,6 +36,7 @@ import ( "helm.sh/helm/v3/internal/resolver" "helm.sh/helm/v3/internal/third_party/dep/fs" "helm.sh/helm/v3/internal/urlutil" + "helm.sh/helm/v3/pkg/cache" "helm.sh/helm/v3/pkg/chart" "helm.sh/helm/v3/pkg/chart/loader" "helm.sh/helm/v3/pkg/chartutil" @@ -75,6 +76,8 @@ type Manager struct { RegistryClient *registry.Client RepositoryConfig string RepositoryCache string + // IndexFileCache holds parsed IndexFiles as YAML parsing big files is a very slow operation + IndexFileCache *cache.Cache[*repo.IndexFile] } // Build rebuilds a local charts directory from a lockfile. @@ -232,7 +235,7 @@ func (m *Manager) loadChartDir() (*chart.Chart, error) { // This returns a lock file, which has all of the dependencies normalized to a specific version. func (m *Manager) resolve(req []*chart.Dependency, repoNames map[string]string) (*chart.Lock, error) { res := resolver.New(m.ChartPath, m.RepositoryCache, m.RegistryClient) - return res.Resolve(req, repoNames) + return res.Resolve(req, repoNames, m.IndexFileCache) } // downloadAll takes a list of dependencies and downloads them into charts/ @@ -742,7 +745,7 @@ func (m *Manager) findChartURL(name, version, repoURL string, repos map[string]* return } } - url, err = repo.FindChartInRepoURL(repoURL, name, version, certFile, keyFile, caFile, m.Getters) + url, err = repo.FindChartInRepoURLWithCache(repoURL, name, version, certFile, keyFile, caFile, m.Getters, m.IndexFileCache) if err == nil { return url, username, password, false, false, "", "", "", err } diff --git a/pkg/repo/chartrepo.go b/pkg/repo/chartrepo.go index 970e96da2..d6225471d 100644 --- a/pkg/repo/chartrepo.go +++ b/pkg/repo/chartrepo.go @@ -31,6 +31,7 @@ import ( "github.com/pkg/errors" "sigs.k8s.io/yaml" + "helm.sh/helm/v3/pkg/cache" "helm.sh/helm/v3/pkg/chart/loader" "helm.sh/helm/v3/pkg/getter" "helm.sh/helm/v3/pkg/helmpath" @@ -199,14 +200,27 @@ func (r *ChartRepository) generateIndex() error { // FindChartInRepoURL finds chart in chart repository pointed by repoURL // without adding repo to repositories func FindChartInRepoURL(repoURL, chartName, chartVersion, certFile, keyFile, caFile string, getters getter.Providers) (string, error) { - return FindChartInAuthRepoURL(repoURL, "", "", chartName, chartVersion, certFile, keyFile, caFile, getters) + return FindChartInRepoURLWithCache(repoURL, chartName, chartVersion, certFile, keyFile, caFile, getters, nil) +} + +// FindChartInRepoURLWithCache finds chart in chart repository pointed by repoURL +// without adding repo to repositories. Also uses a cache for IndexFiles +func FindChartInRepoURLWithCache(repoURL, chartName, chartVersion, certFile, keyFile, caFile string, getters getter.Providers, c *cache.Cache[*IndexFile]) (string, error) { + return FindChartInAuthRepoURLWithCache(repoURL, "", "", chartName, chartVersion, certFile, keyFile, caFile, getters, c) } // FindChartInAuthRepoURL finds chart in chart repository pointed by repoURL // without adding repo to repositories, like FindChartInRepoURL, // but it also receives credentials for the chart repository. func FindChartInAuthRepoURL(repoURL, username, password, chartName, chartVersion, certFile, keyFile, caFile string, getters getter.Providers) (string, error) { - return FindChartInAuthAndTLSRepoURL(repoURL, username, password, chartName, chartVersion, certFile, keyFile, caFile, false, getters) + return FindChartInAuthRepoURLWithCache(repoURL, username, password, chartName, chartVersion, certFile, keyFile, caFile, getters, nil) +} + +// FindChartInAuthRepoURLWithCache finds chart in chart repository pointed by repoURL +// without adding repo to repositories, like FindChartInRepoURL, +// but it also receives credentials for the chart repository. Also uses a cache for IndexFiles +func FindChartInAuthRepoURLWithCache(repoURL, username, password, chartName, chartVersion, certFile, keyFile, caFile string, getters getter.Providers, c *cache.Cache[*IndexFile]) (string, error) { + return FindChartInAuthAndTLSRepoURLWithCache(repoURL, username, password, chartName, chartVersion, certFile, keyFile, caFile, false, getters, c) } // FindChartInAuthAndTLSRepoURL finds chart in chart repository pointed by repoURL @@ -214,7 +228,15 @@ func FindChartInAuthRepoURL(repoURL, username, password, chartName, chartVersion // but it also receives credentials and TLS verify flag for the chart repository. // TODO Helm 4, FindChartInAuthAndTLSRepoURL should be integrated into FindChartInAuthRepoURL. func FindChartInAuthAndTLSRepoURL(repoURL, username, password, chartName, chartVersion, certFile, keyFile, caFile string, insecureSkipTLSverify bool, getters getter.Providers) (string, error) { - return FindChartInAuthAndTLSAndPassRepoURL(repoURL, username, password, chartName, chartVersion, certFile, keyFile, caFile, insecureSkipTLSverify, false, getters) + return FindChartInAuthAndTLSRepoURLWithCache(repoURL, username, password, chartName, chartVersion, certFile, keyFile, caFile, insecureSkipTLSverify, getters, nil) +} + +// FindChartInAuthAndTLSRepoURLWithCache finds chart in chart repository pointed by repoURL +// without adding repo to repositories, like FindChartInRepoURL, +// but it also receives credentials and TLS verify flag for the chart repository. Also uses a cache for IndexFiles +// TODO Helm 4, FindChartInAuthAndTLSRepoURLWithCache should be integrated into FindChartInAuthRepoURL. +func FindChartInAuthAndTLSRepoURLWithCache(repoURL, username, password, chartName, chartVersion, certFile, keyFile, caFile string, insecureSkipTLSverify bool, getters getter.Providers, c *cache.Cache[*IndexFile]) (string, error) { + return FindChartInAuthAndTLSAndPassRepoURLWithCache(repoURL, username, password, chartName, chartVersion, certFile, keyFile, caFile, insecureSkipTLSverify, false, getters, c) } // FindChartInAuthAndTLSAndPassRepoURL finds chart in chart repository pointed by repoURL @@ -223,6 +245,15 @@ func FindChartInAuthAndTLSRepoURL(repoURL, username, password, chartName, chartV // be passed on to other domains. // TODO Helm 4, FindChartInAuthAndTLSAndPassRepoURL should be integrated into FindChartInAuthRepoURL. func FindChartInAuthAndTLSAndPassRepoURL(repoURL, username, password, chartName, chartVersion, certFile, keyFile, caFile string, insecureSkipTLSverify, passCredentialsAll bool, getters getter.Providers) (string, error) { + return FindChartInAuthAndTLSAndPassRepoURLWithCache(repoURL, username, password, chartName, chartVersion, certFile, keyFile, caFile, insecureSkipTLSverify, passCredentialsAll, getters, nil) +} + +// FindChartInAuthAndTLSAndPassRepoURLWithCache finds chart in chart repository pointed by repoURL +// without adding repo to repositories, like FindChartInRepoURL, +// but it also receives credentials, TLS verify flag, and if credentials should +// be passed on to other domains. Also uses a cache for IndexFiles +// TODO Helm 4, FindChartInAuthAndTLSAndPassRepoURL should be integrated into FindChartInAuthRepoURL. +func FindChartInAuthAndTLSAndPassRepoURLWithCache(repoURL, username, password, chartName, chartVersion, certFile, keyFile, caFile string, insecureSkipTLSverify, passCredentialsAll bool, getters getter.Providers, indexFileCache *cache.Cache[*IndexFile]) (string, error) { // Download and write the index file to a temporary location buf := make([]byte, 20) @@ -254,7 +285,7 @@ func FindChartInAuthAndTLSAndPassRepoURL(repoURL, username, password, chartName, }() // Read the index file for the repository to get chart information and return chart URL - repoIndex, err := LoadIndexFile(idx) + repoIndex, err := LoadIndexFileWithCache(idx, indexFileCache) if err != nil { return "", err } diff --git a/pkg/repo/index.go b/pkg/repo/index.go index 40b11c5cf..8e78faba6 100644 --- a/pkg/repo/index.go +++ b/pkg/repo/index.go @@ -18,6 +18,8 @@ package repo import ( "bytes" + "crypto/sha512" + "encoding/hex" "encoding/json" "log" "os" @@ -33,6 +35,7 @@ import ( "helm.sh/helm/v3/internal/fileutil" "helm.sh/helm/v3/internal/urlutil" + "helm.sh/helm/v3/pkg/cache" "helm.sh/helm/v3/pkg/chart" "helm.sh/helm/v3/pkg/chart/loader" "helm.sh/helm/v3/pkg/provenance" @@ -104,14 +107,36 @@ func NewIndexFile() *IndexFile { // LoadIndexFile takes a file at the given path and returns an IndexFile object func LoadIndexFile(path string) (*IndexFile, error) { + return LoadIndexFileWithCache(path, nil) +} + +// LoadIndexFile takes a file at the given path and a Cache, and returns an IndexFile object +func LoadIndexFileWithCache(path string, c *cache.Cache[*IndexFile]) (*IndexFile, error) { + if c == nil { + var noOpCache cache.Cache[*IndexFile] = cache.NewNoOpCache[*IndexFile]() + c = &noOpCache + } b, err := os.ReadFile(path) if err != nil { return nil, err } + + // Calculate the cache key by the file content + hasher := sha512.New() + hasher.Write(b) + cacheKey := hex.EncodeToString(hasher.Sum(nil)) + + if cached, ok := (*c).Get(cacheKey); ok { + return cached, nil + } + i, err := loadIndex(b, path) if err != nil { return nil, errors.Wrapf(err, "error loading %s", path) } + + (*c).Set(cacheKey, i) + return i, nil }