Initial addition of content based cache

The previous cache was based on chart name and version. If 2 charts
with different content had the same name and version they would collide.
Helm did not trust the cache because of this and always downloaded
content. It was a short lived cache.

This commit introduces a content based cache which is based on the
content rather than file name. Charts with the same name but different
content are no longer an issue.

While the system assumes a file based interface, the cache system
is pluggable. In the future, it should return bytes for the content
instead of paths to it. That would requie a larger change for Helm 5
or later.

Signed-off-by: Matt Farina <matt.farina@suse.com>
pull/31165/head
Matt Farina 2 weeks ago
parent ba53075a9d
commit 6ac2c34689
No known key found for this signature in database
GPG Key ID: 92C44A3D421FF7F9

@ -792,7 +792,7 @@ func (c *ChartPathOptions) LocateChart(name string, settings *cli.EnvSettings) (
return abs, err return abs, err
} }
if c.Verify { if c.Verify {
if _, err := downloader.VerifyChart(abs, c.Keyring); err != nil { if _, err := downloader.VerifyChart(abs, abs+".prov", c.Keyring); err != nil {
return "", err return "", err
} }
} }
@ -868,7 +868,7 @@ func (c *ChartPathOptions) LocateChart(name string, settings *cli.EnvSettings) (
return "", err return "", err
} }
filename, _, err := dl.DownloadTo(name, version, settings.RepositoryCache) filename, _, err := dl.DownloadToCache(name, version)
if err != nil { if err != nil {
return "", err return "", err
} }

@ -39,7 +39,7 @@ func NewVerify() *Verify {
// Run executes 'helm verify'. // Run executes 'helm verify'.
func (v *Verify) Run(chartfile string) error { func (v *Verify) Run(chartfile string) error {
var out strings.Builder var out strings.Builder
p, err := downloader.VerifyChart(chartfile, v.Keyring) p, err := downloader.VerifyChart(chartfile, chartfile+".prov", v.Keyring)
if err != nil { if err != nil {
return err return err
} }

@ -0,0 +1,86 @@
/*
Copyright The Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package downloader
import (
"crypto/sha256"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"helm.sh/helm/v4/internal/fileutil"
)
// Cache describes a cache that can get and put chart data.
// The cache key is the sha256 has of the content. sha256 is used in Helm for
// digests in index files providing a common key for checking content.
type Cache interface {
// Get returns a reader for the given key.
Get(key [sha256.Size]byte, prov bool) (string, error)
// Put stores the given reader for the given key.
Put(key [sha256.Size]byte, data io.Reader, prov bool) (string, error)
}
// TODO: The cache assumes files because much of Helm assumes files. Convert
// Helm to pass content around instead of file locations.
// DiskCache is a cache that stores data on disk.
type DiskCache struct {
Root string
}
// Get returns a reader for the given key.
func (c *DiskCache) Get(key [sha256.Size]byte, prov bool) (string, error) {
p := c.fileName(key, prov)
fi, err := os.Stat(p)
if err != nil {
return "", err
}
// Empty files treated as not exist because there is no content.
if fi.Size() == 0 {
return p, os.ErrNotExist
}
// directories should never happen unless something outside helm is operating
// on this content.
if fi.IsDir() {
return p, os.ErrInvalid
}
return p, nil
}
// Put stores the given reader for the given key.
// It returns the path to the stored file.
func (c *DiskCache) Put(key [sha256.Size]byte, data io.Reader, prov bool) (string, error) {
// TODO: verify the key and digest of the key are the same.
p := c.fileName(key, prov)
if err := os.MkdirAll(filepath.Dir(p), 0755); err != nil {
slog.Error("failed to create cache directory")
return p, err
}
return p, fileutil.AtomicWriteFile(p, data, 0644)
}
// fileName generates the filename in a structured manner where the first part is the
// directory and the full hash is the filename.
func (c *DiskCache) fileName(id [sha256.Size]byte, prov bool) string {
suffix := ".tgz"
if prov {
suffix = ".prov"
}
return filepath.Join(c.Root, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+suffix)
}

@ -16,6 +16,9 @@ limitations under the License.
package downloader package downloader
import ( import (
"bytes"
"crypto/sha256"
"encoding/hex"
"errors" "errors"
"fmt" "fmt"
"io" "io"
@ -72,6 +75,9 @@ type ChartDownloader struct {
RegistryClient *registry.Client RegistryClient *registry.Client
RepositoryConfig string RepositoryConfig string
RepositoryCache string RepositoryCache string
// Cache specifies the cache implementation to use.
Cache Cache
} }
// DownloadTo retrieves a chart. Depending on the settings, it may also download a provenance file. // DownloadTo retrieves a chart. Depending on the settings, it may also download a provenance file.
@ -86,7 +92,10 @@ type ChartDownloader struct {
// Returns a string path to the location where the file was downloaded and a verification // Returns a string path to the location where the file was downloaded and a verification
// (if provenance was verified), or an error if something bad happened. // (if provenance was verified), or an error if something bad happened.
func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *provenance.Verification, error) { func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *provenance.Verification, error) {
u, err := c.ResolveChartVersion(ref, version) if c.Cache == nil {
c.Cache = &DiskCache{Root: c.RepositoryCache}
}
hash, u, err := c.ResolveChartVersion(ref, version)
if err != nil { if err != nil {
return "", nil, err return "", nil, err
} }
@ -96,11 +105,36 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven
return "", nil, err return "", nil, err
} }
c.Options = append(c.Options, getter.WithAcceptHeader("application/gzip,application/octet-stream")) // Check the cache for the content. Otherwise download it.
// Note, this process will pull from the cache but does not automatically populate
// the cache with the file it downloads.
var data *bytes.Buffer
var found bool
var digest []byte
var digest32 [32]byte
if hash != "" {
// if there is a hash, populate the other formats
digest, err = hex.DecodeString(hash)
if err != nil {
return "", nil, err
}
copy(digest32[:], digest)
if pth, err := c.Cache.Get(digest32, false); err == nil {
fdata, err := os.ReadFile(pth)
if err == nil {
found = true
data = bytes.NewBuffer(fdata)
}
}
}
data, err := g.Get(u.String(), c.Options...) if !found {
if err != nil { c.Options = append(c.Options, getter.WithAcceptHeader("application/gzip,application/octet-stream"))
return "", nil, err
data, err = g.Get(u.String(), c.Options...)
if err != nil {
return "", nil, err
}
} }
name := filepath.Base(u.Path) name := filepath.Base(u.Path)
@ -117,13 +151,26 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven
// If provenance is requested, verify it. // If provenance is requested, verify it.
ver := &provenance.Verification{} ver := &provenance.Verification{}
if c.Verify > VerifyNever { if c.Verify > VerifyNever {
body, err := g.Get(u.String() + ".prov") found = false
if err != nil { var body *bytes.Buffer
if c.Verify == VerifyAlways { if hash != "" {
return destfile, ver, fmt.Errorf("failed to fetch provenance %q", u.String()+".prov") if pth, err := c.Cache.Get(digest32, true); err == nil {
fdata, err := os.ReadFile(pth)
if err == nil {
found = true
body = bytes.NewBuffer(fdata)
}
}
}
if !found {
body, err = g.Get(u.String() + ".prov")
if err != nil {
if c.Verify == VerifyAlways {
return destfile, ver, fmt.Errorf("failed to fetch provenance %q", u.String()+".prov")
}
fmt.Fprintf(c.Out, "WARNING: Verification not found for %s: %s\n", ref, err)
return destfile, ver, nil
} }
fmt.Fprintf(c.Out, "WARNING: Verification not found for %s: %s\n", ref, err)
return destfile, ver, nil
} }
provfile := destfile + ".prov" provfile := destfile + ".prov"
if err := fileutil.AtomicWriteFile(provfile, body, 0644); err != nil { if err := fileutil.AtomicWriteFile(provfile, body, 0644); err != nil {
@ -131,7 +178,7 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven
} }
if c.Verify != VerifyLater { if c.Verify != VerifyLater {
ver, err = VerifyChart(destfile, c.Keyring) ver, err = VerifyChart(destfile, destfile+".prov", c.Keyring)
if err != nil { if err != nil {
// Fail always in this case, since it means the verification step // Fail always in this case, since it means the verification step
// failed. // failed.
@ -142,10 +189,105 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven
return destfile, ver, nil return destfile, ver, nil
} }
// DownloadToCache retrieves resources while using a content based cache.
func (c *ChartDownloader) DownloadToCache(ref, version string) (string, *provenance.Verification, error) {
if c.Cache == nil {
c.Cache = &DiskCache{Root: c.RepositoryCache}
}
digestString, u, err := c.ResolveChartVersion(ref, version)
if err != nil {
return "", nil, err
}
g, err := c.Getters.ByScheme(u.Scheme)
if err != nil {
return "", nil, err
}
c.Options = append(c.Options, getter.WithAcceptHeader("application/gzip,application/octet-stream"))
// Check the cache for the file
digest, err := hex.DecodeString(digestString)
if err != nil {
return "", nil, err
}
var digest32 [32]byte
copy(digest32[:], digest)
if err != nil {
return "", nil, fmt.Errorf("unable to decode digest: %w", err)
}
var pth string
// only fetch from the cache if we have a digest
if len(digest) > 0 {
pth, err = c.Cache.Get(digest32, false)
}
if len(digest) == 0 || err != nil {
if err != nil && !os.IsNotExist(err) {
return "", nil, err
}
// Get file not in the cache
data, gerr := g.Get(u.String(), c.Options...)
if gerr != nil {
return "", nil, gerr
}
// Generate the digest
if len(digest) == 0 {
h := sha256.New()
digest32 = [sha256.Size]byte(h.Sum(data.Bytes()))
}
pth, err = c.Cache.Put(digest32, data, false)
if err != nil {
return "", nil, err
}
}
// If provenance is requested, verify it.
ver := &provenance.Verification{}
if c.Verify > VerifyNever {
ppth, err := c.Cache.Get(digest32, true)
if err != nil {
if !os.IsNotExist(err) {
return pth, ver, err
}
body, err := g.Get(u.String() + ".prov")
if err != nil {
if c.Verify == VerifyAlways {
return pth, ver, fmt.Errorf("failed to fetch provenance %q", u.String()+".prov")
}
fmt.Fprintf(c.Out, "WARNING: Verification not found for %s: %s\n", ref, err)
return pth, ver, nil
}
ppth, err = c.Cache.Put(digest32, body, true)
if err != nil {
return "", nil, err
}
}
if c.Verify != VerifyLater {
ver, err = VerifyChart(pth, ppth, c.Keyring)
if err != nil {
// Fail always in this case, since it means the verification step
// failed.
return pth, ver, err
}
}
}
return pth, ver, nil
}
// ResolveChartVersion resolves a chart reference to a URL. // ResolveChartVersion resolves a chart reference to a URL.
// //
// It returns the URL and sets the ChartDownloader's Options that can fetch // It returns:
// the URL using the appropriate Getter. // - A hash of the content if available
// - The URL and sets the ChartDownloader's Options that can fetch the URL using the appropriate Getter.
// - An error if there is one
// //
// A reference may be an HTTP URL, an oci reference URL, a 'reponame/chartname' // A reference may be an HTTP URL, an oci reference URL, a 'reponame/chartname'
// reference, or a local path. // reference, or a local path.
@ -157,23 +299,26 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven
// - If version is non-empty, this will return the URL for that version // - If version is non-empty, this will return the URL for that version
// - If version is empty, this will return the URL for the latest version // - If version is empty, this will return the URL for the latest version
// - If no version can be found, an error is returned // - If no version can be found, an error is returned
func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, error) { //
// TODO: support OCI hash
func (c *ChartDownloader) ResolveChartVersion(ref, version string) (string, *url.URL, error) {
u, err := url.Parse(ref) u, err := url.Parse(ref)
if err != nil { if err != nil {
return nil, fmt.Errorf("invalid chart URL format: %s", ref) return "", nil, fmt.Errorf("invalid chart URL format: %s", ref)
} }
if registry.IsOCI(u.String()) { if registry.IsOCI(u.String()) {
if c.RegistryClient == nil { if c.RegistryClient == nil {
return nil, fmt.Errorf("unable to lookup ref %s at version '%s', missing registry client", ref, version) return "", nil, fmt.Errorf("unable to lookup ref %s at version '%s', missing registry client", ref, version)
} }
return c.RegistryClient.ValidateReference(ref, version, u) digest, OCIref, err := c.RegistryClient.ValidateReference(ref, version, u)
return digest, OCIref, err
} }
rf, err := loadRepoConfig(c.RepositoryConfig) rf, err := loadRepoConfig(c.RepositoryConfig)
if err != nil { if err != nil {
return u, err return "", u, err
} }
if u.IsAbs() && len(u.Host) > 0 && len(u.Path) > 0 { if u.IsAbs() && len(u.Host) > 0 && len(u.Path) > 0 {
@ -190,9 +335,9 @@ func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, er
if err == ErrNoOwnerRepo { if err == ErrNoOwnerRepo {
// Make sure to add the ref URL as the URL for the getter // Make sure to add the ref URL as the URL for the getter
c.Options = append(c.Options, getter.WithURL(ref)) c.Options = append(c.Options, getter.WithURL(ref))
return u, nil return "", u, nil
} }
return u, err return "", u, err
} }
// If we get here, we don't need to go through the next phase of looking // If we get here, we don't need to go through the next phase of looking
@ -211,20 +356,20 @@ func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, er
getter.WithPassCredentialsAll(rc.PassCredentialsAll), getter.WithPassCredentialsAll(rc.PassCredentialsAll),
) )
} }
return u, nil return "", u, nil
} }
// See if it's of the form: repo/path_to_chart // See if it's of the form: repo/path_to_chart
p := strings.SplitN(u.Path, "/", 2) p := strings.SplitN(u.Path, "/", 2)
if len(p) < 2 { if len(p) < 2 {
return u, fmt.Errorf("non-absolute URLs should be in form of repo_name/path_to_chart, got: %s", u) return "", u, fmt.Errorf("non-absolute URLs should be in form of repo_name/path_to_chart, got: %s", u)
} }
repoName := p[0] repoName := p[0]
chartName := p[1] chartName := p[1]
rc, err := pickChartRepositoryConfigByName(repoName, rf.Repositories) rc, err := pickChartRepositoryConfigByName(repoName, rf.Repositories)
if err != nil { if err != nil {
return u, err return "", u, err
} }
// Now that we have the chart repository information we can use that URL // Now that we have the chart repository information we can use that URL
@ -233,7 +378,7 @@ func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, er
r, err := repo.NewChartRepository(rc, c.Getters) r, err := repo.NewChartRepository(rc, c.Getters)
if err != nil { if err != nil {
return u, err return "", u, err
} }
if r != nil && r.Config != nil { if r != nil && r.Config != nil {
@ -252,32 +397,33 @@ func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, er
idxFile := filepath.Join(c.RepositoryCache, helmpath.CacheIndexFile(r.Config.Name)) idxFile := filepath.Join(c.RepositoryCache, helmpath.CacheIndexFile(r.Config.Name))
i, err := repo.LoadIndexFile(idxFile) i, err := repo.LoadIndexFile(idxFile)
if err != nil { if err != nil {
return u, fmt.Errorf("no cached repo found. (try 'helm repo update'): %w", err) return "", u, fmt.Errorf("no cached repo found. (try 'helm repo update'): %w", err)
} }
cv, err := i.Get(chartName, version) cv, err := i.Get(chartName, version)
if err != nil { if err != nil {
return u, fmt.Errorf("chart %q matching %s not found in %s index. (try 'helm repo update'): %w", chartName, version, r.Config.Name, err) return "", u, fmt.Errorf("chart %q matching %s not found in %s index. (try 'helm repo update'): %w", chartName, version, r.Config.Name, err)
} }
if len(cv.URLs) == 0 { if len(cv.URLs) == 0 {
return u, fmt.Errorf("chart %q has no downloadable URLs", ref) return "", u, fmt.Errorf("chart %q has no downloadable URLs", ref)
} }
// TODO: Seems that picking first URL is not fully correct // TODO: Seems that picking first URL is not fully correct
resolvedURL, err := repo.ResolveReferenceURL(rc.URL, cv.URLs[0]) resolvedURL, err := repo.ResolveReferenceURL(rc.URL, cv.URLs[0])
if err != nil { if err != nil {
return u, fmt.Errorf("invalid chart URL format: %s", ref) return cv.Digest, u, fmt.Errorf("invalid chart URL format: %s", ref)
} }
return url.Parse(resolvedURL) loc, err := url.Parse(resolvedURL)
return cv.Digest, loc, err
} }
// VerifyChart takes a path to a chart archive and a keyring, and verifies the chart. // VerifyChart takes a path to a chart archive and a keyring, and verifies the chart.
// //
// It assumes that a chart archive file is accompanied by a provenance file whose // It assumes that a chart archive file is accompanied by a provenance file whose
// name is the archive file name plus the ".prov" extension. // name is the archive file name plus the ".prov" extension.
func VerifyChart(path, keyring string) (*provenance.Verification, error) { func VerifyChart(path, provfile, keyring string) (*provenance.Verification, error) {
// For now, error out if it's not a tar file. // For now, error out if it's not a tar file.
switch fi, err := os.Stat(path); { switch fi, err := os.Stat(path); {
case err != nil: case err != nil:
@ -288,7 +434,6 @@ func VerifyChart(path, keyring string) (*provenance.Verification, error) {
return nil, errors.New("chart must be a tgz file") return nil, errors.New("chart must be a tgz file")
} }
provfile := path + ".prov"
if _, err := os.Stat(provfile); err != nil { if _, err := os.Stat(provfile); err != nil {
return nil, fmt.Errorf("could not load provenance file %s: %w", provfile, err) return nil, fmt.Errorf("could not load provenance file %s: %w", provfile, err)
} }

@ -79,7 +79,7 @@ func TestResolveChartRef(t *testing.T) {
} }
for _, tt := range tests { for _, tt := range tests {
u, err := c.ResolveChartVersion(tt.ref, tt.version) _, u, err := c.ResolveChartVersion(tt.ref, tt.version)
if err != nil { if err != nil {
if tt.fail { if tt.fail {
continue continue
@ -131,7 +131,7 @@ func TestResolveChartOpts(t *testing.T) {
continue continue
} }
u, err := c.ResolveChartVersion(tt.ref, tt.version) _, u, err := c.ResolveChartVersion(tt.ref, tt.version)
if err != nil { if err != nil {
t.Errorf("%s: failed with error %s", tt.name, err) t.Errorf("%s: failed with error %s", tt.name, err)
continue continue
@ -155,7 +155,7 @@ func TestResolveChartOpts(t *testing.T) {
} }
func TestVerifyChart(t *testing.T) { func TestVerifyChart(t *testing.T) {
v, err := VerifyChart("testdata/signtest-0.1.0.tgz", "testdata/helm-test-key.pub") v, err := VerifyChart("testdata/signtest-0.1.0.tgz", "testdata/signtest-0.1.0.tgz.prov", "testdata/helm-test-key.pub")
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }

@ -823,12 +823,12 @@ func (c *Client) Resolve(ref string) (desc ocispec.Descriptor, err error) {
} }
// ValidateReference for path and version // ValidateReference for path and version
func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, error) { func (c *Client) ValidateReference(ref, version string, u *url.URL) (string, *url.URL, error) {
var tag string var tag string
registryReference, err := newReference(u.Host + u.Path) registryReference, err := newReference(u.Host + u.Path)
if err != nil { if err != nil {
return nil, err return "", nil, err
} }
if version == "" { if version == "" {
@ -836,14 +836,14 @@ func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, e
version = registryReference.Tag version = registryReference.Tag
} else { } else {
if registryReference.Tag != "" && registryReference.Tag != version { if registryReference.Tag != "" && registryReference.Tag != version {
return nil, fmt.Errorf("chart reference and version mismatch: %s is not %s", version, registryReference.Tag) return "", nil, fmt.Errorf("chart reference and version mismatch: %s is not %s", version, registryReference.Tag)
} }
} }
if registryReference.Digest != "" { if registryReference.Digest != "" {
if version == "" { if version == "" {
// Install by digest only // Install by digest only
return u, nil return "", u, nil
} }
u.Path = fmt.Sprintf("%s@%s", registryReference.Repository, registryReference.Digest) u.Path = fmt.Sprintf("%s@%s", registryReference.Repository, registryReference.Digest)
@ -852,12 +852,12 @@ func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, e
desc, err := c.Resolve(path) desc, err := c.Resolve(path)
if err != nil { if err != nil {
// The resource does not have to be tagged when digest is specified // The resource does not have to be tagged when digest is specified
return u, nil return "", u, nil
} }
if desc.Digest.String() != registryReference.Digest { if desc.Digest.String() != registryReference.Digest {
return nil, fmt.Errorf("chart reference digest mismatch: %s is not %s", desc.Digest.String(), registryReference.Digest) return "", nil, fmt.Errorf("chart reference digest mismatch: %s is not %s", desc.Digest.String(), registryReference.Digest)
} }
return u, nil return registryReference.Digest, u, nil
} }
// Evaluate whether an explicit version has been provided. Otherwise, determine version to use // Evaluate whether an explicit version has been provided. Otherwise, determine version to use
@ -868,10 +868,10 @@ func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, e
// Retrieve list of repository tags // Retrieve list of repository tags
tags, err := c.Tags(strings.TrimPrefix(ref, fmt.Sprintf("%s://", OCIScheme))) tags, err := c.Tags(strings.TrimPrefix(ref, fmt.Sprintf("%s://", OCIScheme)))
if err != nil { if err != nil {
return nil, err return "", nil, err
} }
if len(tags) == 0 { if len(tags) == 0 {
return nil, fmt.Errorf("unable to locate any tags in provided repository: %s", ref) return "", nil, fmt.Errorf("unable to locate any tags in provided repository: %s", ref)
} }
// Determine if version provided // Determine if version provided
@ -880,13 +880,14 @@ func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, e
// If semver constraint string, try to find a match // If semver constraint string, try to find a match
tag, err = GetTagMatchingVersionOrConstraint(tags, version) tag, err = GetTagMatchingVersionOrConstraint(tags, version)
if err != nil { if err != nil {
return nil, err return "", nil, err
} }
} }
u.Path = fmt.Sprintf("%s:%s", registryReference.Repository, tag) u.Path = fmt.Sprintf("%s:%s", registryReference.Repository, tag)
// desc, err := c.Resolve(u.Path)
return u, err return "", u, err
} }
// tagManifest prepares and tags a manifest in memory storage // tagManifest prepares and tags a manifest in memory storage

Loading…
Cancel
Save