diff --git a/pkg/action/install.go b/pkg/action/install.go index d8efa5d5d..b13bbfb8b 100644 --- a/pkg/action/install.go +++ b/pkg/action/install.go @@ -792,7 +792,7 @@ func (c *ChartPathOptions) LocateChart(name string, settings *cli.EnvSettings) ( return abs, err } if c.Verify { - if _, err := downloader.VerifyChart(abs, c.Keyring); err != nil { + if _, err := downloader.VerifyChart(abs, abs+".prov", c.Keyring); err != nil { return "", err } } @@ -868,7 +868,7 @@ func (c *ChartPathOptions) LocateChart(name string, settings *cli.EnvSettings) ( return "", err } - filename, _, err := dl.DownloadTo(name, version, settings.RepositoryCache) + filename, _, err := dl.DownloadToCache(name, version) if err != nil { return "", err } diff --git a/pkg/action/verify.go b/pkg/action/verify.go index 68a5e2d88..ca2f4fa63 100644 --- a/pkg/action/verify.go +++ b/pkg/action/verify.go @@ -39,7 +39,7 @@ func NewVerify() *Verify { // Run executes 'helm verify'. func (v *Verify) Run(chartfile string) error { var out strings.Builder - p, err := downloader.VerifyChart(chartfile, v.Keyring) + p, err := downloader.VerifyChart(chartfile, chartfile+".prov", v.Keyring) if err != nil { return err } diff --git a/pkg/downloader/cache.go b/pkg/downloader/cache.go new file mode 100644 index 000000000..d9b925756 --- /dev/null +++ b/pkg/downloader/cache.go @@ -0,0 +1,86 @@ +/* +Copyright The Helm Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package downloader + +import ( + "crypto/sha256" + "fmt" + "io" + "log/slog" + "os" + "path/filepath" + + "helm.sh/helm/v4/internal/fileutil" +) + +// Cache describes a cache that can get and put chart data. +// The cache key is the sha256 has of the content. sha256 is used in Helm for +// digests in index files providing a common key for checking content. +type Cache interface { + // Get returns a reader for the given key. + Get(key [sha256.Size]byte, prov bool) (string, error) + // Put stores the given reader for the given key. + Put(key [sha256.Size]byte, data io.Reader, prov bool) (string, error) +} + +// TODO: The cache assumes files because much of Helm assumes files. Convert +// Helm to pass content around instead of file locations. + +// DiskCache is a cache that stores data on disk. +type DiskCache struct { + Root string +} + +// Get returns a reader for the given key. +func (c *DiskCache) Get(key [sha256.Size]byte, prov bool) (string, error) { + p := c.fileName(key, prov) + fi, err := os.Stat(p) + if err != nil { + return "", err + } + // Empty files treated as not exist because there is no content. + if fi.Size() == 0 { + return p, os.ErrNotExist + } + // directories should never happen unless something outside helm is operating + // on this content. + if fi.IsDir() { + return p, os.ErrInvalid + } + return p, nil +} + +// Put stores the given reader for the given key. +// It returns the path to the stored file. +func (c *DiskCache) Put(key [sha256.Size]byte, data io.Reader, prov bool) (string, error) { + // TODO: verify the key and digest of the key are the same. + p := c.fileName(key, prov) + if err := os.MkdirAll(filepath.Dir(p), 0755); err != nil { + slog.Error("failed to create cache directory") + return p, err + } + return p, fileutil.AtomicWriteFile(p, data, 0644) +} + +// fileName generates the filename in a structured manner where the first part is the +// directory and the full hash is the filename. +func (c *DiskCache) fileName(id [sha256.Size]byte, prov bool) string { + suffix := ".tgz" + if prov { + suffix = ".prov" + } + return filepath.Join(c.Root, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+suffix) +} diff --git a/pkg/downloader/chart_downloader.go b/pkg/downloader/chart_downloader.go index 529fd788e..bdf65011c 100644 --- a/pkg/downloader/chart_downloader.go +++ b/pkg/downloader/chart_downloader.go @@ -16,6 +16,9 @@ limitations under the License. package downloader import ( + "bytes" + "crypto/sha256" + "encoding/hex" "errors" "fmt" "io" @@ -72,6 +75,9 @@ type ChartDownloader struct { RegistryClient *registry.Client RepositoryConfig string RepositoryCache string + + // Cache specifies the cache implementation to use. + Cache Cache } // DownloadTo retrieves a chart. Depending on the settings, it may also download a provenance file. @@ -86,7 +92,10 @@ type ChartDownloader struct { // Returns a string path to the location where the file was downloaded and a verification // (if provenance was verified), or an error if something bad happened. func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *provenance.Verification, error) { - u, err := c.ResolveChartVersion(ref, version) + if c.Cache == nil { + c.Cache = &DiskCache{Root: c.RepositoryCache} + } + hash, u, err := c.ResolveChartVersion(ref, version) if err != nil { return "", nil, err } @@ -96,11 +105,36 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven return "", nil, err } - c.Options = append(c.Options, getter.WithAcceptHeader("application/gzip,application/octet-stream")) + // Check the cache for the content. Otherwise download it. + // Note, this process will pull from the cache but does not automatically populate + // the cache with the file it downloads. + var data *bytes.Buffer + var found bool + var digest []byte + var digest32 [32]byte + if hash != "" { + // if there is a hash, populate the other formats + digest, err = hex.DecodeString(hash) + if err != nil { + return "", nil, err + } + copy(digest32[:], digest) + if pth, err := c.Cache.Get(digest32, false); err == nil { + fdata, err := os.ReadFile(pth) + if err == nil { + found = true + data = bytes.NewBuffer(fdata) + } + } + } - data, err := g.Get(u.String(), c.Options...) - if err != nil { - return "", nil, err + if !found { + c.Options = append(c.Options, getter.WithAcceptHeader("application/gzip,application/octet-stream")) + + data, err = g.Get(u.String(), c.Options...) + if err != nil { + return "", nil, err + } } name := filepath.Base(u.Path) @@ -117,13 +151,26 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven // If provenance is requested, verify it. ver := &provenance.Verification{} if c.Verify > VerifyNever { - body, err := g.Get(u.String() + ".prov") - if err != nil { - if c.Verify == VerifyAlways { - return destfile, ver, fmt.Errorf("failed to fetch provenance %q", u.String()+".prov") + found = false + var body *bytes.Buffer + if hash != "" { + if pth, err := c.Cache.Get(digest32, true); err == nil { + fdata, err := os.ReadFile(pth) + if err == nil { + found = true + body = bytes.NewBuffer(fdata) + } + } + } + if !found { + body, err = g.Get(u.String() + ".prov") + if err != nil { + if c.Verify == VerifyAlways { + return destfile, ver, fmt.Errorf("failed to fetch provenance %q", u.String()+".prov") + } + fmt.Fprintf(c.Out, "WARNING: Verification not found for %s: %s\n", ref, err) + return destfile, ver, nil } - fmt.Fprintf(c.Out, "WARNING: Verification not found for %s: %s\n", ref, err) - return destfile, ver, nil } provfile := destfile + ".prov" if err := fileutil.AtomicWriteFile(provfile, body, 0644); err != nil { @@ -131,7 +178,7 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven } if c.Verify != VerifyLater { - ver, err = VerifyChart(destfile, c.Keyring) + ver, err = VerifyChart(destfile, destfile+".prov", c.Keyring) if err != nil { // Fail always in this case, since it means the verification step // failed. @@ -142,10 +189,105 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven return destfile, ver, nil } +// DownloadToCache retrieves resources while using a content based cache. +func (c *ChartDownloader) DownloadToCache(ref, version string) (string, *provenance.Verification, error) { + if c.Cache == nil { + c.Cache = &DiskCache{Root: c.RepositoryCache} + } + + digestString, u, err := c.ResolveChartVersion(ref, version) + if err != nil { + return "", nil, err + } + + g, err := c.Getters.ByScheme(u.Scheme) + if err != nil { + return "", nil, err + } + + c.Options = append(c.Options, getter.WithAcceptHeader("application/gzip,application/octet-stream")) + + // Check the cache for the file + digest, err := hex.DecodeString(digestString) + if err != nil { + return "", nil, err + } + var digest32 [32]byte + copy(digest32[:], digest) + if err != nil { + return "", nil, fmt.Errorf("unable to decode digest: %w", err) + } + + var pth string + // only fetch from the cache if we have a digest + if len(digest) > 0 { + pth, err = c.Cache.Get(digest32, false) + } + if len(digest) == 0 || err != nil { + if err != nil && !os.IsNotExist(err) { + return "", nil, err + } + + // Get file not in the cache + data, gerr := g.Get(u.String(), c.Options...) + if gerr != nil { + return "", nil, gerr + } + + // Generate the digest + if len(digest) == 0 { + h := sha256.New() + digest32 = [sha256.Size]byte(h.Sum(data.Bytes())) + } + + pth, err = c.Cache.Put(digest32, data, false) + if err != nil { + return "", nil, err + } + } + + // If provenance is requested, verify it. + ver := &provenance.Verification{} + if c.Verify > VerifyNever { + ppth, err := c.Cache.Get(digest32, true) + if err != nil { + if !os.IsNotExist(err) { + return pth, ver, err + } + + body, err := g.Get(u.String() + ".prov") + if err != nil { + if c.Verify == VerifyAlways { + return pth, ver, fmt.Errorf("failed to fetch provenance %q", u.String()+".prov") + } + fmt.Fprintf(c.Out, "WARNING: Verification not found for %s: %s\n", ref, err) + return pth, ver, nil + } + + ppth, err = c.Cache.Put(digest32, body, true) + if err != nil { + return "", nil, err + } + } + + if c.Verify != VerifyLater { + ver, err = VerifyChart(pth, ppth, c.Keyring) + if err != nil { + // Fail always in this case, since it means the verification step + // failed. + return pth, ver, err + } + } + } + return pth, ver, nil +} + // ResolveChartVersion resolves a chart reference to a URL. // -// It returns the URL and sets the ChartDownloader's Options that can fetch -// the URL using the appropriate Getter. +// It returns: +// - A hash of the content if available +// - The URL and sets the ChartDownloader's Options that can fetch the URL using the appropriate Getter. +// - An error if there is one // // A reference may be an HTTP URL, an oci reference URL, a 'reponame/chartname' // reference, or a local path. @@ -157,23 +299,26 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven // - If version is non-empty, this will return the URL for that version // - If version is empty, this will return the URL for the latest version // - If no version can be found, an error is returned -func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, error) { +// +// TODO: support OCI hash +func (c *ChartDownloader) ResolveChartVersion(ref, version string) (string, *url.URL, error) { u, err := url.Parse(ref) if err != nil { - return nil, fmt.Errorf("invalid chart URL format: %s", ref) + return "", nil, fmt.Errorf("invalid chart URL format: %s", ref) } if registry.IsOCI(u.String()) { if c.RegistryClient == nil { - return nil, fmt.Errorf("unable to lookup ref %s at version '%s', missing registry client", ref, version) + return "", nil, fmt.Errorf("unable to lookup ref %s at version '%s', missing registry client", ref, version) } - return c.RegistryClient.ValidateReference(ref, version, u) + digest, OCIref, err := c.RegistryClient.ValidateReference(ref, version, u) + return digest, OCIref, err } rf, err := loadRepoConfig(c.RepositoryConfig) if err != nil { - return u, err + return "", u, err } if u.IsAbs() && len(u.Host) > 0 && len(u.Path) > 0 { @@ -190,9 +335,9 @@ func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, er if err == ErrNoOwnerRepo { // Make sure to add the ref URL as the URL for the getter c.Options = append(c.Options, getter.WithURL(ref)) - return u, nil + return "", u, nil } - return u, err + return "", u, err } // If we get here, we don't need to go through the next phase of looking @@ -211,20 +356,20 @@ func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, er getter.WithPassCredentialsAll(rc.PassCredentialsAll), ) } - return u, nil + return "", u, nil } // See if it's of the form: repo/path_to_chart p := strings.SplitN(u.Path, "/", 2) if len(p) < 2 { - return u, fmt.Errorf("non-absolute URLs should be in form of repo_name/path_to_chart, got: %s", u) + return "", u, fmt.Errorf("non-absolute URLs should be in form of repo_name/path_to_chart, got: %s", u) } repoName := p[0] chartName := p[1] rc, err := pickChartRepositoryConfigByName(repoName, rf.Repositories) if err != nil { - return u, err + return "", u, err } // Now that we have the chart repository information we can use that URL @@ -233,7 +378,7 @@ func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, er r, err := repo.NewChartRepository(rc, c.Getters) if err != nil { - return u, err + return "", u, err } if r != nil && r.Config != nil { @@ -252,32 +397,33 @@ func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, er idxFile := filepath.Join(c.RepositoryCache, helmpath.CacheIndexFile(r.Config.Name)) i, err := repo.LoadIndexFile(idxFile) if err != nil { - return u, fmt.Errorf("no cached repo found. (try 'helm repo update'): %w", err) + return "", u, fmt.Errorf("no cached repo found. (try 'helm repo update'): %w", err) } cv, err := i.Get(chartName, version) if err != nil { - return u, fmt.Errorf("chart %q matching %s not found in %s index. (try 'helm repo update'): %w", chartName, version, r.Config.Name, err) + return "", u, fmt.Errorf("chart %q matching %s not found in %s index. (try 'helm repo update'): %w", chartName, version, r.Config.Name, err) } if len(cv.URLs) == 0 { - return u, fmt.Errorf("chart %q has no downloadable URLs", ref) + return "", u, fmt.Errorf("chart %q has no downloadable URLs", ref) } // TODO: Seems that picking first URL is not fully correct resolvedURL, err := repo.ResolveReferenceURL(rc.URL, cv.URLs[0]) if err != nil { - return u, fmt.Errorf("invalid chart URL format: %s", ref) + return cv.Digest, u, fmt.Errorf("invalid chart URL format: %s", ref) } - return url.Parse(resolvedURL) + loc, err := url.Parse(resolvedURL) + return cv.Digest, loc, err } // VerifyChart takes a path to a chart archive and a keyring, and verifies the chart. // // It assumes that a chart archive file is accompanied by a provenance file whose // name is the archive file name plus the ".prov" extension. -func VerifyChart(path, keyring string) (*provenance.Verification, error) { +func VerifyChart(path, provfile, keyring string) (*provenance.Verification, error) { // For now, error out if it's not a tar file. switch fi, err := os.Stat(path); { case err != nil: @@ -288,7 +434,6 @@ func VerifyChart(path, keyring string) (*provenance.Verification, error) { return nil, errors.New("chart must be a tgz file") } - provfile := path + ".prov" if _, err := os.Stat(provfile); err != nil { return nil, fmt.Errorf("could not load provenance file %s: %w", provfile, err) } diff --git a/pkg/downloader/chart_downloader_test.go b/pkg/downloader/chart_downloader_test.go index a2e09eae5..5b5f96751 100644 --- a/pkg/downloader/chart_downloader_test.go +++ b/pkg/downloader/chart_downloader_test.go @@ -79,7 +79,7 @@ func TestResolveChartRef(t *testing.T) { } for _, tt := range tests { - u, err := c.ResolveChartVersion(tt.ref, tt.version) + _, u, err := c.ResolveChartVersion(tt.ref, tt.version) if err != nil { if tt.fail { continue @@ -131,7 +131,7 @@ func TestResolveChartOpts(t *testing.T) { continue } - u, err := c.ResolveChartVersion(tt.ref, tt.version) + _, u, err := c.ResolveChartVersion(tt.ref, tt.version) if err != nil { t.Errorf("%s: failed with error %s", tt.name, err) continue @@ -155,7 +155,7 @@ func TestResolveChartOpts(t *testing.T) { } func TestVerifyChart(t *testing.T) { - v, err := VerifyChart("testdata/signtest-0.1.0.tgz", "testdata/helm-test-key.pub") + v, err := VerifyChart("testdata/signtest-0.1.0.tgz", "testdata/signtest-0.1.0.tgz.prov", "testdata/helm-test-key.pub") if err != nil { t.Fatal(err) } diff --git a/pkg/registry/client.go b/pkg/registry/client.go index 0c9f256d3..673c6ea87 100644 --- a/pkg/registry/client.go +++ b/pkg/registry/client.go @@ -823,12 +823,12 @@ func (c *Client) Resolve(ref string) (desc ocispec.Descriptor, err error) { } // ValidateReference for path and version -func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, error) { +func (c *Client) ValidateReference(ref, version string, u *url.URL) (string, *url.URL, error) { var tag string registryReference, err := newReference(u.Host + u.Path) if err != nil { - return nil, err + return "", nil, err } if version == "" { @@ -836,14 +836,14 @@ func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, e version = registryReference.Tag } else { if registryReference.Tag != "" && registryReference.Tag != version { - return nil, fmt.Errorf("chart reference and version mismatch: %s is not %s", version, registryReference.Tag) + return "", nil, fmt.Errorf("chart reference and version mismatch: %s is not %s", version, registryReference.Tag) } } if registryReference.Digest != "" { if version == "" { // Install by digest only - return u, nil + return "", u, nil } u.Path = fmt.Sprintf("%s@%s", registryReference.Repository, registryReference.Digest) @@ -852,12 +852,12 @@ func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, e desc, err := c.Resolve(path) if err != nil { // The resource does not have to be tagged when digest is specified - return u, nil + return "", u, nil } if desc.Digest.String() != registryReference.Digest { - return nil, fmt.Errorf("chart reference digest mismatch: %s is not %s", desc.Digest.String(), registryReference.Digest) + return "", nil, fmt.Errorf("chart reference digest mismatch: %s is not %s", desc.Digest.String(), registryReference.Digest) } - return u, nil + return registryReference.Digest, u, nil } // Evaluate whether an explicit version has been provided. Otherwise, determine version to use @@ -868,10 +868,10 @@ func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, e // Retrieve list of repository tags tags, err := c.Tags(strings.TrimPrefix(ref, fmt.Sprintf("%s://", OCIScheme))) if err != nil { - return nil, err + return "", nil, err } if len(tags) == 0 { - return nil, fmt.Errorf("unable to locate any tags in provided repository: %s", ref) + return "", nil, fmt.Errorf("unable to locate any tags in provided repository: %s", ref) } // Determine if version provided @@ -880,13 +880,14 @@ func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, e // If semver constraint string, try to find a match tag, err = GetTagMatchingVersionOrConstraint(tags, version) if err != nil { - return nil, err + return "", nil, err } } u.Path = fmt.Sprintf("%s:%s", registryReference.Repository, tag) + // desc, err := c.Resolve(u.Path) - return u, err + return "", u, err } // tagManifest prepares and tags a manifest in memory storage