From 656fbaa9bd59e4972f49d49566c8e5013cd085b0 Mon Sep 17 00:00:00 2001 From: Holger Metschulat Date: Sat, 26 Jun 2021 00:03:30 +0200 Subject: [PATCH 1/3] first prototype Signed-off-by: Holger Metschulat --- internal/resolver/resolver.go | 2 +- pkg/downloader/chart_downloader.go | 2 +- pkg/downloader/manager.go | 2 +- pkg/repo/index.go | 25 +++++++++++++++++++++++++ 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go index 70ce6a55b..018f14e36 100644 --- a/internal/resolver/resolver.go +++ b/internal/resolver/resolver.go @@ -123,7 +123,7 @@ func (r *Resolver) Resolve(reqs []*chart.Dependency, repoNames map[string]string var ok bool found := true if !registry.IsOCI(d.Repository) { - repoIndex, err := repo.LoadIndexFile(filepath.Join(r.cachepath, helmpath.CacheIndexFile(repoName))) + repoIndex, err := repo.LoadIndexFileWithCaching(filepath.Join(r.cachepath, helmpath.CacheIndexFile(repoName))) if err != nil { return nil, errors.Wrapf(err, "no cached repository for %s found. (try 'helm repo update')", repoName) } diff --git a/pkg/downloader/chart_downloader.go b/pkg/downloader/chart_downloader.go index 93afb1461..e3cb8e572 100644 --- a/pkg/downloader/chart_downloader.go +++ b/pkg/downloader/chart_downloader.go @@ -356,7 +356,7 @@ func (c *ChartDownloader) scanReposForURL(u string, rf *repo.File) (*repo.Entry, } idxFile := filepath.Join(c.RepositoryCache, helmpath.CacheIndexFile(r.Config.Name)) - i, err := repo.LoadIndexFile(idxFile) + i, err := repo.LoadIndexFileWithCaching(idxFile) if err != nil { return nil, errors.Wrap(err, "no cached repo found. (try 'helm repo update')") } diff --git a/pkg/downloader/manager.go b/pkg/downloader/manager.go index 52f1a1312..5c78a84a2 100644 --- a/pkg/downloader/manager.go +++ b/pkg/downloader/manager.go @@ -824,7 +824,7 @@ func (m *Manager) loadChartRepositories() (map[string]*repo.ChartRepository, err for _, re := range rf.Repositories { lname := re.Name idxFile := filepath.Join(m.RepositoryCache, helmpath.CacheIndexFile(lname)) - index, err := repo.LoadIndexFile(idxFile) + index, err := repo.LoadIndexFileWithCaching(idxFile) if err != nil { return indices, err } diff --git a/pkg/repo/index.go b/pkg/repo/index.go index 1b65ac497..34609bb2b 100644 --- a/pkg/repo/index.go +++ b/pkg/repo/index.go @@ -25,6 +25,7 @@ import ( "path/filepath" "sort" "strings" + "sync" "time" "github.com/Masterminds/semver/v3" @@ -115,6 +116,30 @@ func LoadIndexFile(path string) (*IndexFile, error) { return i, nil } +var cache = make(map[string]*IndexFile) +var cacheLock sync.RWMutex + +func LoadIndexFileWithCaching(path string) (*IndexFile, error) { + // if already in cache, return cached entry + cacheLock.RLock() + if idx, ok := cache[path]; ok { + cacheLock.RUnlock() + // safe to return a pointer to the cached entry here since once in cache + // entry will never be overwritten + return idx, nil + } + cacheLock.RUnlock() + // not in cache, need to load from disk + idx, err := LoadIndexFile(path) + if err == nil { + // we fetched the index successfully. Store it in cache. + cacheLock.Lock() + cache[path] = idx + cacheLock.Unlock() + } + return idx, err +} + // MustAdd adds a file to the index // This can leave the index in an unsorted state func (i IndexFile) MustAdd(md *chart.Metadata, filename, baseURL, digest string) error { From 6fe61a0377c2daca47fba9c3313f0cc78d8cf709 Mon Sep 17 00:00:00 2001 From: Dirk Moermans Date: Fri, 26 Nov 2021 16:06:26 +0100 Subject: [PATCH 2/3] tune implementation / add test-case Signed-off-by: Dirk Moermans --- pkg/downloader/chart_downloader.go | 2 +- pkg/repo/chartrepo.go | 4 +-- pkg/repo/chartrepo_test.go | 4 +-- pkg/repo/index.go | 48 ++++++++++++++++++------------ pkg/repo/index_test.go | 40 +++++++++++++++++++++++++ 5 files changed, 74 insertions(+), 24 deletions(-) diff --git a/pkg/downloader/chart_downloader.go b/pkg/downloader/chart_downloader.go index e3cb8e572..7d2111769 100644 --- a/pkg/downloader/chart_downloader.go +++ b/pkg/downloader/chart_downloader.go @@ -239,7 +239,7 @@ func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, er // Next, we need to load the index, and actually look up the chart. idxFile := filepath.Join(c.RepositoryCache, helmpath.CacheIndexFile(r.Config.Name)) - i, err := repo.LoadIndexFile(idxFile) + i, err := repo.LoadIndexFileWithCaching(idxFile) if err != nil { return u, errors.Wrap(err, "no cached repo found. (try 'helm repo update')") } diff --git a/pkg/repo/chartrepo.go b/pkg/repo/chartrepo.go index 956997cc9..4485625f2 100644 --- a/pkg/repo/chartrepo.go +++ b/pkg/repo/chartrepo.go @@ -100,7 +100,7 @@ func (r *ChartRepository) Load() error { filepath.Walk(r.Config.Name, func(path string, f os.FileInfo, err error) error { if !f.IsDir() { if strings.Contains(f.Name(), "-index.yaml") { - i, err := LoadIndexFile(path) + i, err := LoadIndexFileWithCaching(path) if err != nil { return err } @@ -255,7 +255,7 @@ func FindChartInAuthAndTLSAndPassRepoURL(repoURL, username, password, chartName, } // Read the index file for the repository to get chart information and return chart URL - repoIndex, err := LoadIndexFile(idx) + repoIndex, err := LoadIndexFileWithCaching(idx) if err != nil { return "", err } diff --git a/pkg/repo/chartrepo_test.go b/pkg/repo/chartrepo_test.go index 0f317b2fd..cc97996d6 100644 --- a/pkg/repo/chartrepo_test.go +++ b/pkg/repo/chartrepo_test.go @@ -93,7 +93,7 @@ func TestIndex(t *testing.T) { } tempIndexPath := filepath.Join(testRepository, indexPath) - actual, err := LoadIndexFile(tempIndexPath) + actual, err := LoadIndexFileWithCaching(tempIndexPath) defer os.Remove(tempIndexPath) // clean up if err != nil { t.Errorf("Error loading index file %v", err) @@ -105,7 +105,7 @@ func TestIndex(t *testing.T) { if err != nil { t.Errorf("Error performing re-index: %s\n", err) } - second, err := LoadIndexFile(tempIndexPath) + second, err := LoadIndexFileWithCaching(tempIndexPath) if err != nil { t.Errorf("Error re-loading index file %v", err) } diff --git a/pkg/repo/index.go b/pkg/repo/index.go index 34609bb2b..e1c641502 100644 --- a/pkg/repo/index.go +++ b/pkg/repo/index.go @@ -116,28 +116,38 @@ func LoadIndexFile(path string) (*IndexFile, error) { return i, nil } -var cache = make(map[string]*IndexFile) -var cacheLock sync.RWMutex +type cacheEntry struct { + indexFile *IndexFile + err error + lock sync.RWMutex +} + +var cache sync.Map +// LoadIndexFileWithCaching is a wrapper around LoadIndexFile +// it adds an internal global cache to avoid reading the same file multiple times func LoadIndexFileWithCaching(path string) (*IndexFile, error) { - // if already in cache, return cached entry - cacheLock.RLock() - if idx, ok := cache[path]; ok { - cacheLock.RUnlock() - // safe to return a pointer to the cached entry here since once in cache - // entry will never be overwritten - return idx, nil - } - cacheLock.RUnlock() - // not in cache, need to load from disk - idx, err := LoadIndexFile(path) - if err == nil { - // we fetched the index successfully. Store it in cache. - cacheLock.Lock() - cache[path] = idx - cacheLock.Unlock() + // assume the entry is not cached, prepare a newEntry + newEntry := &cacheEntry{} + // lock to avoid threading issues in case of multiple loads in parallel + newEntry.lock.Lock() + + // check if index already in cache, add newEntry atomically if not-cached + if v, loaded := cache.LoadOrStore(path, newEntry); loaded { + // We hit the cache. newEntry is not needed anymore + newEntry.lock.Unlock() + entry := v.(*cacheEntry) + // wait for RLock on cached entry + // LoadIndexFile might not have completed yet in another go-routine + entry.lock.RLock() + defer entry.lock.RUnlock() + return entry.indexFile, entry.err } - return idx, err + // no entry found in cache + // LoadIndexFile while holding the write-lock + newEntry.indexFile, newEntry.err = LoadIndexFile(path) + defer newEntry.lock.Unlock() + return newEntry.indexFile, newEntry.err } // MustAdd adds a file to the index diff --git a/pkg/repo/index_test.go b/pkg/repo/index_test.go index 292856527..92b9f658d 100644 --- a/pkg/repo/index_test.go +++ b/pkg/repo/index_test.go @@ -27,6 +27,7 @@ import ( "strings" "testing" + "golang.org/x/sync/errgroup" "helm.sh/helm/v3/pkg/chart" "helm.sh/helm/v3/pkg/cli" "helm.sh/helm/v3/pkg/getter" @@ -145,6 +146,45 @@ func TestLoadIndex(t *testing.T) { } } +func TestLoadIndexWithCaching(t *testing.T) { + var eg errgroup.Group + + const nrParallelTests = 10 + + var indexFiles [nrParallelTests]*IndexFile + + // Load same index multiple times in parallel + for i := 0; i < nrParallelTests; i++ { + indexNr := i + eg.Go(func() (err error) { + indexFiles[indexNr], err = LoadIndexFileWithCaching(chartmuseumtestfile) + return err + }) + } + + if err := eg.Wait(); err != nil { + t.Fatal(err) + } + + // we check if the same cache entry is used by comparing pointers to indexFiles + // if the pointers are the same, we assume the same cache entry is used. + for i := 1; i < nrParallelTests; i++ { + if indexFiles[i] != indexFiles[0] { + t.Fatal("not all indices retrieved from same cache entry") + } + } + + // load another index, and check if new_entry is used + otherFile, err := LoadIndexFileWithCaching(testfile) + if err != nil { + t.Fatal(err) + } + + if otherFile == indexFiles[0] { + t.Fatal("same index used for different files") + } +} + // TestLoadIndex_Duplicates is a regression to make sure that we don't non-deterministically allow duplicate packages. func TestLoadIndex_Duplicates(t *testing.T) { if _, err := loadIndex([]byte(indexWithDuplicates), "indexWithDuplicates"); err == nil { From 7374b5110b749c3f361f25d835e03d622fa48771 Mon Sep 17 00:00:00 2001 From: Dirk Moermans Date: Mon, 29 Nov 2021 10:31:27 +0100 Subject: [PATCH 3/3] use correct formatter Signed-off-by: Dirk Moermans --- pkg/repo/index_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/repo/index_test.go b/pkg/repo/index_test.go index 92b9f658d..4a630fe74 100644 --- a/pkg/repo/index_test.go +++ b/pkg/repo/index_test.go @@ -28,6 +28,7 @@ import ( "testing" "golang.org/x/sync/errgroup" + "helm.sh/helm/v3/pkg/chart" "helm.sh/helm/v3/pkg/cli" "helm.sh/helm/v3/pkg/getter"