Initial addition of content based cache

The previous cache was based on chart name and version. If 2 charts
with different content had the same name and version they would collide.
Helm did not trust the cache because of this and always downloaded
content. It was a short lived cache.

This commit introduces a content based cache which is based on the
content rather than file name. Charts with the same name but different
content are no longer an issue.

While the system assumes a file based interface, the cache system
is pluggable. In the future, it should return bytes for the content
instead of paths to it. That would requie a larger change for Helm 5
or later.

Signed-off-by: Matt Farina <matt.farina@suse.com>
pull/31165/head
Matt Farina 2 weeks ago
parent ba53075a9d
commit 6ac2c34689
No known key found for this signature in database
GPG Key ID: 92C44A3D421FF7F9

@ -792,7 +792,7 @@ func (c *ChartPathOptions) LocateChart(name string, settings *cli.EnvSettings) (
return abs, err
}
if c.Verify {
if _, err := downloader.VerifyChart(abs, c.Keyring); err != nil {
if _, err := downloader.VerifyChart(abs, abs+".prov", c.Keyring); err != nil {
return "", err
}
}
@ -868,7 +868,7 @@ func (c *ChartPathOptions) LocateChart(name string, settings *cli.EnvSettings) (
return "", err
}
filename, _, err := dl.DownloadTo(name, version, settings.RepositoryCache)
filename, _, err := dl.DownloadToCache(name, version)
if err != nil {
return "", err
}

@ -39,7 +39,7 @@ func NewVerify() *Verify {
// Run executes 'helm verify'.
func (v *Verify) Run(chartfile string) error {
var out strings.Builder
p, err := downloader.VerifyChart(chartfile, v.Keyring)
p, err := downloader.VerifyChart(chartfile, chartfile+".prov", v.Keyring)
if err != nil {
return err
}

@ -0,0 +1,86 @@
/*
Copyright The Helm Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package downloader
import (
"crypto/sha256"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
"helm.sh/helm/v4/internal/fileutil"
)
// Cache describes a cache that can get and put chart data.
// The cache key is the sha256 has of the content. sha256 is used in Helm for
// digests in index files providing a common key for checking content.
type Cache interface {
// Get returns a reader for the given key.
Get(key [sha256.Size]byte, prov bool) (string, error)
// Put stores the given reader for the given key.
Put(key [sha256.Size]byte, data io.Reader, prov bool) (string, error)
}
// TODO: The cache assumes files because much of Helm assumes files. Convert
// Helm to pass content around instead of file locations.
// DiskCache is a cache that stores data on disk.
type DiskCache struct {
Root string
}
// Get returns a reader for the given key.
func (c *DiskCache) Get(key [sha256.Size]byte, prov bool) (string, error) {
p := c.fileName(key, prov)
fi, err := os.Stat(p)
if err != nil {
return "", err
}
// Empty files treated as not exist because there is no content.
if fi.Size() == 0 {
return p, os.ErrNotExist
}
// directories should never happen unless something outside helm is operating
// on this content.
if fi.IsDir() {
return p, os.ErrInvalid
}
return p, nil
}
// Put stores the given reader for the given key.
// It returns the path to the stored file.
func (c *DiskCache) Put(key [sha256.Size]byte, data io.Reader, prov bool) (string, error) {
// TODO: verify the key and digest of the key are the same.
p := c.fileName(key, prov)
if err := os.MkdirAll(filepath.Dir(p), 0755); err != nil {
slog.Error("failed to create cache directory")
return p, err
}
return p, fileutil.AtomicWriteFile(p, data, 0644)
}
// fileName generates the filename in a structured manner where the first part is the
// directory and the full hash is the filename.
func (c *DiskCache) fileName(id [sha256.Size]byte, prov bool) string {
suffix := ".tgz"
if prov {
suffix = ".prov"
}
return filepath.Join(c.Root, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+suffix)
}

@ -16,6 +16,9 @@ limitations under the License.
package downloader
import (
"bytes"
"crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"io"
@ -72,6 +75,9 @@ type ChartDownloader struct {
RegistryClient *registry.Client
RepositoryConfig string
RepositoryCache string
// Cache specifies the cache implementation to use.
Cache Cache
}
// DownloadTo retrieves a chart. Depending on the settings, it may also download a provenance file.
@ -86,7 +92,10 @@ type ChartDownloader struct {
// Returns a string path to the location where the file was downloaded and a verification
// (if provenance was verified), or an error if something bad happened.
func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *provenance.Verification, error) {
u, err := c.ResolveChartVersion(ref, version)
if c.Cache == nil {
c.Cache = &DiskCache{Root: c.RepositoryCache}
}
hash, u, err := c.ResolveChartVersion(ref, version)
if err != nil {
return "", nil, err
}
@ -96,11 +105,36 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven
return "", nil, err
}
c.Options = append(c.Options, getter.WithAcceptHeader("application/gzip,application/octet-stream"))
// Check the cache for the content. Otherwise download it.
// Note, this process will pull from the cache but does not automatically populate
// the cache with the file it downloads.
var data *bytes.Buffer
var found bool
var digest []byte
var digest32 [32]byte
if hash != "" {
// if there is a hash, populate the other formats
digest, err = hex.DecodeString(hash)
if err != nil {
return "", nil, err
}
copy(digest32[:], digest)
if pth, err := c.Cache.Get(digest32, false); err == nil {
fdata, err := os.ReadFile(pth)
if err == nil {
found = true
data = bytes.NewBuffer(fdata)
}
}
}
data, err := g.Get(u.String(), c.Options...)
if err != nil {
return "", nil, err
if !found {
c.Options = append(c.Options, getter.WithAcceptHeader("application/gzip,application/octet-stream"))
data, err = g.Get(u.String(), c.Options...)
if err != nil {
return "", nil, err
}
}
name := filepath.Base(u.Path)
@ -117,13 +151,26 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven
// If provenance is requested, verify it.
ver := &provenance.Verification{}
if c.Verify > VerifyNever {
body, err := g.Get(u.String() + ".prov")
if err != nil {
if c.Verify == VerifyAlways {
return destfile, ver, fmt.Errorf("failed to fetch provenance %q", u.String()+".prov")
found = false
var body *bytes.Buffer
if hash != "" {
if pth, err := c.Cache.Get(digest32, true); err == nil {
fdata, err := os.ReadFile(pth)
if err == nil {
found = true
body = bytes.NewBuffer(fdata)
}
}
}
if !found {
body, err = g.Get(u.String() + ".prov")
if err != nil {
if c.Verify == VerifyAlways {
return destfile, ver, fmt.Errorf("failed to fetch provenance %q", u.String()+".prov")
}
fmt.Fprintf(c.Out, "WARNING: Verification not found for %s: %s\n", ref, err)
return destfile, ver, nil
}
fmt.Fprintf(c.Out, "WARNING: Verification not found for %s: %s\n", ref, err)
return destfile, ver, nil
}
provfile := destfile + ".prov"
if err := fileutil.AtomicWriteFile(provfile, body, 0644); err != nil {
@ -131,7 +178,7 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven
}
if c.Verify != VerifyLater {
ver, err = VerifyChart(destfile, c.Keyring)
ver, err = VerifyChart(destfile, destfile+".prov", c.Keyring)
if err != nil {
// Fail always in this case, since it means the verification step
// failed.
@ -142,10 +189,105 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven
return destfile, ver, nil
}
// DownloadToCache retrieves resources while using a content based cache.
func (c *ChartDownloader) DownloadToCache(ref, version string) (string, *provenance.Verification, error) {
if c.Cache == nil {
c.Cache = &DiskCache{Root: c.RepositoryCache}
}
digestString, u, err := c.ResolveChartVersion(ref, version)
if err != nil {
return "", nil, err
}
g, err := c.Getters.ByScheme(u.Scheme)
if err != nil {
return "", nil, err
}
c.Options = append(c.Options, getter.WithAcceptHeader("application/gzip,application/octet-stream"))
// Check the cache for the file
digest, err := hex.DecodeString(digestString)
if err != nil {
return "", nil, err
}
var digest32 [32]byte
copy(digest32[:], digest)
if err != nil {
return "", nil, fmt.Errorf("unable to decode digest: %w", err)
}
var pth string
// only fetch from the cache if we have a digest
if len(digest) > 0 {
pth, err = c.Cache.Get(digest32, false)
}
if len(digest) == 0 || err != nil {
if err != nil && !os.IsNotExist(err) {
return "", nil, err
}
// Get file not in the cache
data, gerr := g.Get(u.String(), c.Options...)
if gerr != nil {
return "", nil, gerr
}
// Generate the digest
if len(digest) == 0 {
h := sha256.New()
digest32 = [sha256.Size]byte(h.Sum(data.Bytes()))
}
pth, err = c.Cache.Put(digest32, data, false)
if err != nil {
return "", nil, err
}
}
// If provenance is requested, verify it.
ver := &provenance.Verification{}
if c.Verify > VerifyNever {
ppth, err := c.Cache.Get(digest32, true)
if err != nil {
if !os.IsNotExist(err) {
return pth, ver, err
}
body, err := g.Get(u.String() + ".prov")
if err != nil {
if c.Verify == VerifyAlways {
return pth, ver, fmt.Errorf("failed to fetch provenance %q", u.String()+".prov")
}
fmt.Fprintf(c.Out, "WARNING: Verification not found for %s: %s\n", ref, err)
return pth, ver, nil
}
ppth, err = c.Cache.Put(digest32, body, true)
if err != nil {
return "", nil, err
}
}
if c.Verify != VerifyLater {
ver, err = VerifyChart(pth, ppth, c.Keyring)
if err != nil {
// Fail always in this case, since it means the verification step
// failed.
return pth, ver, err
}
}
}
return pth, ver, nil
}
// ResolveChartVersion resolves a chart reference to a URL.
//
// It returns the URL and sets the ChartDownloader's Options that can fetch
// the URL using the appropriate Getter.
// It returns:
// - A hash of the content if available
// - The URL and sets the ChartDownloader's Options that can fetch the URL using the appropriate Getter.
// - An error if there is one
//
// A reference may be an HTTP URL, an oci reference URL, a 'reponame/chartname'
// reference, or a local path.
@ -157,23 +299,26 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven
// - If version is non-empty, this will return the URL for that version
// - If version is empty, this will return the URL for the latest version
// - If no version can be found, an error is returned
func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, error) {
//
// TODO: support OCI hash
func (c *ChartDownloader) ResolveChartVersion(ref, version string) (string, *url.URL, error) {
u, err := url.Parse(ref)
if err != nil {
return nil, fmt.Errorf("invalid chart URL format: %s", ref)
return "", nil, fmt.Errorf("invalid chart URL format: %s", ref)
}
if registry.IsOCI(u.String()) {
if c.RegistryClient == nil {
return nil, fmt.Errorf("unable to lookup ref %s at version '%s', missing registry client", ref, version)
return "", nil, fmt.Errorf("unable to lookup ref %s at version '%s', missing registry client", ref, version)
}
return c.RegistryClient.ValidateReference(ref, version, u)
digest, OCIref, err := c.RegistryClient.ValidateReference(ref, version, u)
return digest, OCIref, err
}
rf, err := loadRepoConfig(c.RepositoryConfig)
if err != nil {
return u, err
return "", u, err
}
if u.IsAbs() && len(u.Host) > 0 && len(u.Path) > 0 {
@ -190,9 +335,9 @@ func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, er
if err == ErrNoOwnerRepo {
// Make sure to add the ref URL as the URL for the getter
c.Options = append(c.Options, getter.WithURL(ref))
return u, nil
return "", u, nil
}
return u, err
return "", u, err
}
// If we get here, we don't need to go through the next phase of looking
@ -211,20 +356,20 @@ func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, er
getter.WithPassCredentialsAll(rc.PassCredentialsAll),
)
}
return u, nil
return "", u, nil
}
// See if it's of the form: repo/path_to_chart
p := strings.SplitN(u.Path, "/", 2)
if len(p) < 2 {
return u, fmt.Errorf("non-absolute URLs should be in form of repo_name/path_to_chart, got: %s", u)
return "", u, fmt.Errorf("non-absolute URLs should be in form of repo_name/path_to_chart, got: %s", u)
}
repoName := p[0]
chartName := p[1]
rc, err := pickChartRepositoryConfigByName(repoName, rf.Repositories)
if err != nil {
return u, err
return "", u, err
}
// Now that we have the chart repository information we can use that URL
@ -233,7 +378,7 @@ func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, er
r, err := repo.NewChartRepository(rc, c.Getters)
if err != nil {
return u, err
return "", u, err
}
if r != nil && r.Config != nil {
@ -252,32 +397,33 @@ func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, er
idxFile := filepath.Join(c.RepositoryCache, helmpath.CacheIndexFile(r.Config.Name))
i, err := repo.LoadIndexFile(idxFile)
if err != nil {
return u, fmt.Errorf("no cached repo found. (try 'helm repo update'): %w", err)
return "", u, fmt.Errorf("no cached repo found. (try 'helm repo update'): %w", err)
}
cv, err := i.Get(chartName, version)
if err != nil {
return u, fmt.Errorf("chart %q matching %s not found in %s index. (try 'helm repo update'): %w", chartName, version, r.Config.Name, err)
return "", u, fmt.Errorf("chart %q matching %s not found in %s index. (try 'helm repo update'): %w", chartName, version, r.Config.Name, err)
}
if len(cv.URLs) == 0 {
return u, fmt.Errorf("chart %q has no downloadable URLs", ref)
return "", u, fmt.Errorf("chart %q has no downloadable URLs", ref)
}
// TODO: Seems that picking first URL is not fully correct
resolvedURL, err := repo.ResolveReferenceURL(rc.URL, cv.URLs[0])
if err != nil {
return u, fmt.Errorf("invalid chart URL format: %s", ref)
return cv.Digest, u, fmt.Errorf("invalid chart URL format: %s", ref)
}
return url.Parse(resolvedURL)
loc, err := url.Parse(resolvedURL)
return cv.Digest, loc, err
}
// VerifyChart takes a path to a chart archive and a keyring, and verifies the chart.
//
// It assumes that a chart archive file is accompanied by a provenance file whose
// name is the archive file name plus the ".prov" extension.
func VerifyChart(path, keyring string) (*provenance.Verification, error) {
func VerifyChart(path, provfile, keyring string) (*provenance.Verification, error) {
// For now, error out if it's not a tar file.
switch fi, err := os.Stat(path); {
case err != nil:
@ -288,7 +434,6 @@ func VerifyChart(path, keyring string) (*provenance.Verification, error) {
return nil, errors.New("chart must be a tgz file")
}
provfile := path + ".prov"
if _, err := os.Stat(provfile); err != nil {
return nil, fmt.Errorf("could not load provenance file %s: %w", provfile, err)
}

@ -79,7 +79,7 @@ func TestResolveChartRef(t *testing.T) {
}
for _, tt := range tests {
u, err := c.ResolveChartVersion(tt.ref, tt.version)
_, u, err := c.ResolveChartVersion(tt.ref, tt.version)
if err != nil {
if tt.fail {
continue
@ -131,7 +131,7 @@ func TestResolveChartOpts(t *testing.T) {
continue
}
u, err := c.ResolveChartVersion(tt.ref, tt.version)
_, u, err := c.ResolveChartVersion(tt.ref, tt.version)
if err != nil {
t.Errorf("%s: failed with error %s", tt.name, err)
continue
@ -155,7 +155,7 @@ func TestResolveChartOpts(t *testing.T) {
}
func TestVerifyChart(t *testing.T) {
v, err := VerifyChart("testdata/signtest-0.1.0.tgz", "testdata/helm-test-key.pub")
v, err := VerifyChart("testdata/signtest-0.1.0.tgz", "testdata/signtest-0.1.0.tgz.prov", "testdata/helm-test-key.pub")
if err != nil {
t.Fatal(err)
}

@ -823,12 +823,12 @@ func (c *Client) Resolve(ref string) (desc ocispec.Descriptor, err error) {
}
// ValidateReference for path and version
func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, error) {
func (c *Client) ValidateReference(ref, version string, u *url.URL) (string, *url.URL, error) {
var tag string
registryReference, err := newReference(u.Host + u.Path)
if err != nil {
return nil, err
return "", nil, err
}
if version == "" {
@ -836,14 +836,14 @@ func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, e
version = registryReference.Tag
} else {
if registryReference.Tag != "" && registryReference.Tag != version {
return nil, fmt.Errorf("chart reference and version mismatch: %s is not %s", version, registryReference.Tag)
return "", nil, fmt.Errorf("chart reference and version mismatch: %s is not %s", version, registryReference.Tag)
}
}
if registryReference.Digest != "" {
if version == "" {
// Install by digest only
return u, nil
return "", u, nil
}
u.Path = fmt.Sprintf("%s@%s", registryReference.Repository, registryReference.Digest)
@ -852,12 +852,12 @@ func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, e
desc, err := c.Resolve(path)
if err != nil {
// The resource does not have to be tagged when digest is specified
return u, nil
return "", u, nil
}
if desc.Digest.String() != registryReference.Digest {
return nil, fmt.Errorf("chart reference digest mismatch: %s is not %s", desc.Digest.String(), registryReference.Digest)
return "", nil, fmt.Errorf("chart reference digest mismatch: %s is not %s", desc.Digest.String(), registryReference.Digest)
}
return u, nil
return registryReference.Digest, u, nil
}
// Evaluate whether an explicit version has been provided. Otherwise, determine version to use
@ -868,10 +868,10 @@ func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, e
// Retrieve list of repository tags
tags, err := c.Tags(strings.TrimPrefix(ref, fmt.Sprintf("%s://", OCIScheme)))
if err != nil {
return nil, err
return "", nil, err
}
if len(tags) == 0 {
return nil, fmt.Errorf("unable to locate any tags in provided repository: %s", ref)
return "", nil, fmt.Errorf("unable to locate any tags in provided repository: %s", ref)
}
// Determine if version provided
@ -880,13 +880,14 @@ func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, e
// If semver constraint string, try to find a match
tag, err = GetTagMatchingVersionOrConstraint(tags, version)
if err != nil {
return nil, err
return "", nil, err
}
}
u.Path = fmt.Sprintf("%s:%s", registryReference.Repository, tag)
// desc, err := c.Resolve(u.Path)
return u, err
return "", u, err
}
// tagManifest prepares and tags a manifest in memory storage

Loading…
Cancel
Save