From 1ce79398388c778480963bf952495c11da2f94af Mon Sep 17 00:00:00 2001 From: JvD_Ericsson Date: Fri, 13 Jan 2023 11:56:12 +0000 Subject: [PATCH] Improve helm dependency update performance What this PR does / why we need it: This PR was created to improve performance of the dependency update command by skipping unnecessary downloading and loading of index files that have already been downloaded and loaded I believe this would close refs #9865 Signed-off-by: Jeff van Dam --- internal/resolver/resolver.go | 44 +++++++++++++++++++++--------- internal/resolver/resolver_test.go | 2 +- pkg/downloader/chart_downloader.go | 25 ++++++----------- pkg/downloader/manager.go | 31 +++++++++++++++------ pkg/downloader/manager_test.go | 6 ++-- 5 files changed, 67 insertions(+), 41 deletions(-) diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 5e8921f96..4c70f8c43 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -52,21 +52,23 @@ func New(chartpath, cachepath string, registryClient *registry.Client) *Resolver } // Resolve resolves dependencies and returns a lock file with the resolution. -func (r *Resolver) Resolve(reqs []*chart.Dependency, repoNames map[string]string) (*chart.Lock, error) { +func (r *Resolver) Resolve(reqs []*chart.Dependency, repoNames map[string]string) (*chart.Lock, map[string]string, error) { // Now we clone the dependencies, locking as we go. locked := make([]*chart.Dependency, len(reqs)) missing := []string{} + loadedIndexFiles := make(map[string]*repo.IndexFile) + urls := make(map[string]string) for i, d := range reqs { constraint, err := semver.NewConstraint(d.Version) if err != nil { - return nil, errors.Wrapf(err, "dependency %q has an invalid version/constraint format", d.Name) + return nil, nil, errors.Wrapf(err, "dependency %q has an invalid version/constraint format", d.Name) } if d.Repository == "" { // Local chart subfolder if _, err := GetLocalPath(filepath.Join("charts", d.Name), r.chartpath); err != nil { - return nil, err + return nil, nil, err } locked[i] = &chart.Dependency{ @@ -80,12 +82,12 @@ func (r *Resolver) Resolve(reqs []*chart.Dependency, repoNames map[string]string chartpath, err := GetLocalPath(d.Repository, r.chartpath) if err != nil { - return nil, err + return nil, nil, err } ch, err := loader.LoadDir(chartpath) if err != nil { - return nil, err + return nil, nil, err } v, err := semver.NewVersion(ch.Metadata.Version) @@ -123,14 +125,26 @@ func (r *Resolver) Resolve(reqs []*chart.Dependency, repoNames map[string]string var ok bool found := true if !registry.IsOCI(d.Repository) { - repoIndex, err := repo.LoadIndexFile(filepath.Join(r.cachepath, helmpath.CacheIndexFile(repoName))) - if err != nil { - return nil, errors.Wrapf(err, "no cached repository for %s found. (try 'helm repo update')", repoName) + filepath := filepath.Join(r.cachepath, helmpath.CacheIndexFile(repoName)) + var repoIndex *repo.IndexFile + + // Store previously loaded index files in a map. If repositories share the + // same index file there is no need to reload the same file again. This + // improves performance. + if indexFile, loaded := loadedIndexFiles[filepath]; !loaded { + var err error + repoIndex, err = repo.LoadIndexFile(filepath) + loadedIndexFiles[filepath] = repoIndex + if err != nil { + return nil, nil, errors.Wrapf(err, "no cached repository for %s found. (try 'helm repo update')", repoName) + } + } else { + repoIndex = indexFile } vs, ok = repoIndex.Entries[d.Name] if !ok { - return nil, errors.Errorf("%s chart not found in repo %s", d.Name, d.Repository) + return nil, nil, errors.Errorf("%s chart not found in repo %s", d.Name, d.Repository) } found = false } else { @@ -152,7 +166,7 @@ func (r *Resolver) Resolve(reqs []*chart.Dependency, repoNames map[string]string ref := fmt.Sprintf("%s/%s", strings.TrimPrefix(d.Repository, fmt.Sprintf("%s://", registry.OCIScheme)), d.Name) tags, err := r.registryClient.Tags(ref) if err != nil { - return nil, errors.Wrapf(err, "could not retrieve list of tags for repository %s", d.Repository) + return nil, nil, errors.Wrapf(err, "could not retrieve list of tags for repository %s", d.Repository) } vs = make(repo.ChartVersions, len(tags)) @@ -173,6 +187,7 @@ func (r *Resolver) Resolve(reqs []*chart.Dependency, repoNames map[string]string Repository: d.Repository, Version: version, } + // The version are already sorted and hence the first one to satisfy the constraint is used for _, ver := range vs { v, err := semver.NewVersion(ver.Version) @@ -183,6 +198,9 @@ func (r *Resolver) Resolve(reqs []*chart.Dependency, repoNames map[string]string } if constraint.Check(v) { found = true + if len(ver.URLs) > 0 { + urls[ver.Name] = ver.URLs[0] + } locked[i].Version = v.Original() break } @@ -193,19 +211,19 @@ func (r *Resolver) Resolve(reqs []*chart.Dependency, repoNames map[string]string } } if len(missing) > 0 { - return nil, errors.Errorf("can't get a valid version for repositories %s. Try changing the version constraint in Chart.yaml", strings.Join(missing, ", ")) + return nil, nil, errors.Errorf("can't get a valid version for repositories %s. Try changing the version constraint in Chart.yaml", strings.Join(missing, ", ")) } digest, err := HashReq(reqs, locked) if err != nil { - return nil, err + return nil, nil, err } return &chart.Lock{ Generated: time.Now(), Digest: digest, Dependencies: locked, - }, nil + }, urls, nil } // HashReq generates a hash of the dependencies. diff --git a/internal/resolver/resolver_test.go b/internal/resolver/resolver_test.go index a79852175..a4c4ef351 100644 --- a/internal/resolver/resolver_test.go +++ b/internal/resolver/resolver_test.go @@ -144,7 +144,7 @@ func TestResolve(t *testing.T) { r := New("testdata/chartpath", "testdata/repository", registryClient) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - l, err := r.Resolve(tt.req, repoNames) + l, _, err := r.Resolve(tt.req, repoNames) if err != nil { if tt.err { return diff --git a/pkg/downloader/chart_downloader.go b/pkg/downloader/chart_downloader.go index 29a9d64c2..8be6d4962 100644 --- a/pkg/downloader/chart_downloader.go +++ b/pkg/downloader/chart_downloader.go @@ -27,7 +27,6 @@ import ( "github.com/pkg/errors" "helm.sh/helm/v3/internal/fileutil" - "helm.sh/helm/v3/internal/urlutil" "helm.sh/helm/v3/pkg/getter" "helm.sh/helm/v3/pkg/helmpath" "helm.sh/helm/v3/pkg/provenance" @@ -184,11 +183,11 @@ func (c *ChartDownloader) getOciURI(ref, version string, u *url.URL) (*url.URL, // // A version is a SemVer string (1.2.3-beta.1+f334a6789). // -// - For fully qualified URLs, the version will be ignored (since URLs aren't versioned) -// - For a chart reference -// * If version is non-empty, this will return the URL for that version -// * If version is empty, this will return the URL for the latest version -// * If no version can be found, an error is returned +// - For fully qualified URLs, the version will be ignored (since URLs aren't versioned) +// - For a chart reference +// - If version is non-empty, this will return the URL for that version +// - If version is empty, this will return the URL for the latest version +// - If no version can be found, an error is returned func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, error) { u, err := url.Parse(ref) if err != nil { @@ -378,19 +377,13 @@ func (c *ChartDownloader) scanReposForURL(u string, rf *repo.File) (*repo.Entry, } idxFile := filepath.Join(c.RepositoryCache, helmpath.CacheIndexFile(r.Config.Name)) - i, err := repo.LoadIndexFile(idxFile) + yamlFile, err := os.ReadFile(idxFile) if err != nil { return nil, errors.Wrap(err, "no cached repo found. (try 'helm repo update')") } - - for _, entry := range i.Entries { - for _, ver := range entry { - for _, dl := range ver.URLs { - if urlutil.Equal(u, dl) { - return rc, nil - } - } - } + file := string(yamlFile[:]) + if strings.Contains(file, u) { + return rc, nil } } // This means that there is no repo file for the given URL. diff --git a/pkg/downloader/manager.go b/pkg/downloader/manager.go index 18b28dde1..131cc9759 100644 --- a/pkg/downloader/manager.go +++ b/pkg/downloader/manager.go @@ -142,7 +142,7 @@ func (m *Manager) Build() error { } // Now we need to fetch every package here into charts/ - return m.downloadAll(lock.Dependencies) + return m.downloadAll(lock.Dependencies, nil) } // Update updates a local charts directory. @@ -192,13 +192,13 @@ func (m *Manager) Update() error { // Now we need to find out which version of a chart best satisfies the // dependencies in the Chart.yaml - lock, err := m.resolve(req, repoNames) + lock, urls, err := m.resolve(req, repoNames) if err != nil { return err } // Now we need to fetch every package here into charts/ - if err := m.downloadAll(lock.Dependencies); err != nil { + if err := m.downloadAll(lock.Dependencies, urls); err != nil { return err } @@ -231,7 +231,7 @@ func (m *Manager) loadChartDir() (*chart.Chart, error) { // resolve takes a list of dependencies and translates them into an exact version to download. // // This returns a lock file, which has all of the dependencies normalized to a specific version. -func (m *Manager) resolve(req []*chart.Dependency, repoNames map[string]string) (*chart.Lock, error) { +func (m *Manager) resolve(req []*chart.Dependency, repoNames map[string]string) (*chart.Lock, map[string]string, error) { res := resolver.New(m.ChartPath, m.RepositoryCache, m.RegistryClient) return res.Resolve(req, repoNames) } @@ -240,7 +240,7 @@ func (m *Manager) resolve(req []*chart.Dependency, repoNames map[string]string) // // It will delete versions of the chart that exist on disk and might cause // a conflict. -func (m *Manager) downloadAll(deps []*chart.Dependency) error { +func (m *Manager) downloadAll(deps []*chart.Dependency, urls map[string]string) error { repos, err := m.loadChartRepositories() if err != nil { return err @@ -313,7 +313,7 @@ func (m *Manager) downloadAll(deps []*chart.Dependency) error { // Any failure to resolve/download a chart should fail: // https://github.com/helm/helm/issues/1439 - churl, username, password, insecureskiptlsverify, passcredentialsall, caFile, certFile, keyFile, err := m.findChartURL(dep.Name, dep.Version, dep.Repository, repos) + churl, username, password, insecureskiptlsverify, passcredentialsall, caFile, certFile, keyFile, err := m.findChartURL(dep.Name, dep.Version, dep.Repository, repos, urls) if err != nil { saveError = errors.Wrapf(err, "could not find %s", churl) break @@ -502,6 +502,7 @@ func (m *Manager) ensureMissingRepos(repoNames map[string]string, deps []*chart. var ru []*repo.Entry +Outer: for _, dd := range deps { // If the chart is in the local charts directory no repository needs @@ -529,6 +530,14 @@ func (m *Manager) ensureMissingRepos(repoNames map[string]string, deps []*chart. repoNames[dd.Name] = rn + // If repository is already present don't add to array. This will skip + // unnecessary index file downloading improving performance. + for _, item := range ru { + if item.URL == dd.Repository { + continue Outer + } + } + // Assuming the repository is generally available. For Helm managed // access controls the repository needs to be added through the user // managed system. This path will work for public charts, like those @@ -703,7 +712,7 @@ func (m *Manager) parallelRepoUpdate(repos []*repo.Entry) error { // repoURL is the repository to search // // If it finds a URL that is "relative", it will prepend the repoURL. -func (m *Manager) findChartURL(name, version, repoURL string, repos map[string]*repo.ChartRepository) (url, username, password string, insecureskiptlsverify, passcredentialsall bool, caFile, certFile, keyFile string, err error) { +func (m *Manager) findChartURL(name, version, repoURL string, repos map[string]*repo.ChartRepository, urls map[string]string) (url, username, password string, insecureskiptlsverify, passcredentialsall bool, caFile, certFile, keyFile string, err error) { if registry.IsOCI(repoURL) { return fmt.Sprintf("%s/%s:%s", repoURL, name, version), "", "", false, false, "", "", "", nil } @@ -735,7 +744,13 @@ func (m *Manager) findChartURL(name, version, repoURL string, repos map[string]* return } } - url, err = repo.FindChartInRepoURL(repoURL, name, version, certFile, keyFile, caFile, m.Getters) + + if _, ok := urls[name]; ok { + url = urls[name] + } else { + url, err = repo.FindChartInRepoURL(repoURL, name, version, certFile, keyFile, caFile, m.Getters) + } + if err == nil { return url, username, password, false, false, "", "", "", err } diff --git a/pkg/downloader/manager_test.go b/pkg/downloader/manager_test.go index f7ab1a568..8e7ce5fed 100644 --- a/pkg/downloader/manager_test.go +++ b/pkg/downloader/manager_test.go @@ -84,7 +84,7 @@ func TestFindChartURL(t *testing.T) { version := "0.1.0" repoURL := "http://example.com/charts" - churl, username, password, insecureSkipTLSVerify, passcredentialsall, _, _, _, err := m.findChartURL(name, version, repoURL, repos) + churl, username, password, insecureSkipTLSVerify, passcredentialsall, _, _, _, err := m.findChartURL(name, version, repoURL, repos, make(map[string]string)) if err != nil { t.Fatal(err) } @@ -109,7 +109,7 @@ func TestFindChartURL(t *testing.T) { version = "1.2.3" repoURL = "https://example-https-insecureskiptlsverify.com" - churl, username, password, insecureSkipTLSVerify, passcredentialsall, _, _, _, err = m.findChartURL(name, version, repoURL, repos) + churl, username, password, insecureSkipTLSVerify, passcredentialsall, _, _, _, err = m.findChartURL(name, version, repoURL, repos, make(map[string]string)) if err != nil { t.Fatal(err) } @@ -255,7 +255,7 @@ func TestDownloadAll(t *testing.T) { if err := os.MkdirAll(filepath.Join(chartPath, "tmpcharts"), 0755); err != nil { t.Fatal(err) } - if err := m.downloadAll([]*chart.Dependency{signDep, localDep}); err != nil { + if err := m.downloadAll([]*chart.Dependency{signDep, localDep}, make(map[string]string)); err != nil { t.Error(err) }