diff --git a/internal/third_party/dep/fs/fs.go b/internal/third_party/dep/fs/fs.go index 717eff04d..6e2720f3b 100644 --- a/internal/third_party/dep/fs/fs.go +++ b/internal/third_party/dep/fs/fs.go @@ -73,7 +73,7 @@ func renameByCopy(src, dst string) error { cerr = fmt.Errorf("copying directory failed: %w", cerr) } } else { - cerr = copyFile(src, dst) + cerr = CopyFile(src, dst) if cerr != nil { cerr = fmt.Errorf("copying file failed: %w", cerr) } @@ -139,7 +139,7 @@ func CopyDir(src, dst string) error { } else { // This will include symlinks, which is what we want when // copying things. - if err = copyFile(srcPath, dstPath); err != nil { + if err = CopyFile(srcPath, dstPath); err != nil { return fmt.Errorf("copying file failed: %w", err) } } @@ -148,11 +148,11 @@ func CopyDir(src, dst string) error { return nil } -// copyFile copies the contents of the file named src to the file named +// CopyFile copies the contents of the file named src to the file named // by dst. The file will be created if it does not already exist. If the // destination file exists, all its contents will be replaced by the contents // of the source file. The file mode will be copied from the source. -func copyFile(src, dst string) (err error) { +func CopyFile(src, dst string) (err error) { if sym, err := IsSymlink(src); err != nil { return fmt.Errorf("symlink check failed: %w", err) } else if sym { diff --git a/internal/third_party/dep/fs/fs_test.go b/internal/third_party/dep/fs/fs_test.go index 4c59d17fe..610771bc3 100644 --- a/internal/third_party/dep/fs/fs_test.go +++ b/internal/third_party/dep/fs/fs_test.go @@ -326,7 +326,7 @@ func TestCopyFile(t *testing.T) { srcf.Close() destf := filepath.Join(dir, "destf") - if err := copyFile(srcf.Name(), destf); err != nil { + if err := CopyFile(srcf.Name(), destf); err != nil { t.Fatal(err) } @@ -366,7 +366,7 @@ func TestCopyFileSymlink(t *testing.T) { for symlink, dst := range testcases { t.Run(symlink, func(t *testing.T) { var err error - if err = copyFile(symlink, dst); err != nil { + if err = CopyFile(symlink, dst); err != nil { t.Fatalf("failed to copy symlink: %s", err) } @@ -438,7 +438,7 @@ func TestCopyFileFail(t *testing.T) { defer cleanup() fn := filepath.Join(dstdir, "file") - if err := copyFile(srcf.Name(), fn); err == nil { + if err := CopyFile(srcf.Name(), fn); err == nil { t.Fatalf("expected error for %s, got none", fn) } } diff --git a/pkg/action/install.go b/pkg/action/install.go index a7fccb1c8..78c86cdc0 100644 --- a/pkg/action/install.go +++ b/pkg/action/install.go @@ -801,7 +801,7 @@ func (c *ChartPathOptions) LocateChart(name string, settings *cli.EnvSettings) ( return abs, err } if c.Verify { - if _, err := downloader.VerifyChart(abs, c.Keyring); err != nil { + if _, err := downloader.VerifyChart(abs, abs+".prov", c.Keyring); err != nil { return "", err } } @@ -824,6 +824,7 @@ func (c *ChartPathOptions) LocateChart(name string, settings *cli.EnvSettings) ( }, RepositoryConfig: settings.RepositoryConfig, RepositoryCache: settings.RepositoryCache, + ContentCache: settings.ContentCache, RegistryClient: c.registryClient, } @@ -877,7 +878,7 @@ func (c *ChartPathOptions) LocateChart(name string, settings *cli.EnvSettings) ( return "", err } - filename, _, err := dl.DownloadTo(name, version, settings.RepositoryCache) + filename, _, err := dl.DownloadToCache(name, version) if err != nil { return "", err } diff --git a/pkg/action/pull.go b/pkg/action/pull.go index b4779f8d2..c1f77e44c 100644 --- a/pkg/action/pull.go +++ b/pkg/action/pull.go @@ -88,6 +88,7 @@ func (p *Pull) Run(chartRef string) (string, error) { RegistryClient: p.cfg.RegistryClient, RepositoryConfig: p.Settings.RepositoryConfig, RepositoryCache: p.Settings.RepositoryCache, + ContentCache: p.Settings.ContentCache, } if registry.IsOCI(chartRef) { diff --git a/pkg/action/verify.go b/pkg/action/verify.go index 68a5e2d88..ca2f4fa63 100644 --- a/pkg/action/verify.go +++ b/pkg/action/verify.go @@ -39,7 +39,7 @@ func NewVerify() *Verify { // Run executes 'helm verify'. func (v *Verify) Run(chartfile string) error { var out strings.Builder - p, err := downloader.VerifyChart(chartfile, v.Keyring) + p, err := downloader.VerifyChart(chartfile, chartfile+".prov", v.Keyring) if err != nil { return err } diff --git a/pkg/cli/environment.go b/pkg/cli/environment.go index c5f87cf24..19563cba3 100644 --- a/pkg/cli/environment.go +++ b/pkg/cli/environment.go @@ -91,6 +91,8 @@ type EnvSettings struct { QPS float32 // ColorMode controls colorized output (never, auto, always) ColorMode string + // ContentCache is the location where cached charts are stored + ContentCache string } func New() *EnvSettings { @@ -109,6 +111,7 @@ func New() *EnvSettings { RegistryConfig: envOr("HELM_REGISTRY_CONFIG", helmpath.ConfigPath("registry/config.json")), RepositoryConfig: envOr("HELM_REPOSITORY_CONFIG", helmpath.ConfigPath("repositories.yaml")), RepositoryCache: envOr("HELM_REPOSITORY_CACHE", helmpath.CachePath("repository")), + ContentCache: envOr("HELM_CONTENT_CACHE", helmpath.CachePath("content")), BurstLimit: envIntOr("HELM_BURST_LIMIT", defaultBurstLimit), QPS: envFloat32Or("HELM_QPS", defaultQPS), ColorMode: envColorMode(), @@ -161,6 +164,7 @@ func (s *EnvSettings) AddFlags(fs *pflag.FlagSet) { fs.StringVar(&s.RegistryConfig, "registry-config", s.RegistryConfig, "path to the registry config file") fs.StringVar(&s.RepositoryConfig, "repository-config", s.RepositoryConfig, "path to the file containing repository names and URLs") fs.StringVar(&s.RepositoryCache, "repository-cache", s.RepositoryCache, "path to the directory containing cached repository indexes") + fs.StringVar(&s.ContentCache, "content-cache", s.ContentCache, "path to the directory containing cached content (e.g. charts)") fs.IntVar(&s.BurstLimit, "burst-limit", s.BurstLimit, "client-side default throttling limit") fs.Float32Var(&s.QPS, "qps", s.QPS, "queries per second used when communicating with the Kubernetes API, not including bursting") fs.StringVar(&s.ColorMode, "color", s.ColorMode, "use colored output (never, auto, always)") diff --git a/pkg/cmd/dependency_build.go b/pkg/cmd/dependency_build.go index 16907facf..320fe12ae 100644 --- a/pkg/cmd/dependency_build.go +++ b/pkg/cmd/dependency_build.go @@ -69,6 +69,7 @@ func newDependencyBuildCmd(out io.Writer) *cobra.Command { RegistryClient: registryClient, RepositoryConfig: settings.RepositoryConfig, RepositoryCache: settings.RepositoryCache, + ContentCache: settings.ContentCache, Debug: settings.Debug, } if client.Verify { diff --git a/pkg/cmd/dependency_update.go b/pkg/cmd/dependency_update.go index 921e5ef49..b534fb48a 100644 --- a/pkg/cmd/dependency_update.go +++ b/pkg/cmd/dependency_update.go @@ -73,6 +73,7 @@ func newDependencyUpdateCmd(_ *action.Configuration, out io.Writer) *cobra.Comma RegistryClient: registryClient, RepositoryConfig: settings.RepositoryConfig, RepositoryCache: settings.RepositoryCache, + ContentCache: settings.ContentCache, Debug: settings.Debug, } if client.Verify { diff --git a/pkg/cmd/dependency_update_test.go b/pkg/cmd/dependency_update_test.go index 9646c6816..f1b39c4b7 100644 --- a/pkg/cmd/dependency_update_test.go +++ b/pkg/cmd/dependency_update_test.go @@ -45,6 +45,7 @@ func TestDependencyUpdateCmd(t *testing.T) { if err != nil { t.Fatal(err) } + contentCache := t.TempDir() ociChartName := "oci-depending-chart" c := createTestingMetadataForOCI(ociChartName, ociSrv.RegistryURL) @@ -69,7 +70,7 @@ func TestDependencyUpdateCmd(t *testing.T) { } _, out, err := executeActionCommand( - fmt.Sprintf("dependency update '%s' --repository-config %s --repository-cache %s --plain-http", dir(chartname), dir("repositories.yaml"), dir()), + fmt.Sprintf("dependency update '%s' --repository-config %s --repository-cache %s --content-cache %s --plain-http", dir(chartname), dir("repositories.yaml"), dir(), contentCache), ) if err != nil { t.Logf("Output: %s", out) @@ -112,7 +113,7 @@ func TestDependencyUpdateCmd(t *testing.T) { t.Fatal(err) } - _, out, err = executeActionCommand(fmt.Sprintf("dependency update '%s' --repository-config %s --repository-cache %s --plain-http", dir(chartname), dir("repositories.yaml"), dir())) + _, out, err = executeActionCommand(fmt.Sprintf("dependency update '%s' --repository-config %s --repository-cache %s --content-cache %s --plain-http", dir(chartname), dir("repositories.yaml"), dir(), contentCache)) if err != nil { t.Logf("Output: %s", out) t.Fatal(err) @@ -133,11 +134,12 @@ func TestDependencyUpdateCmd(t *testing.T) { if err := chartutil.SaveDir(c, dir()); err != nil { t.Fatal(err) } - cmd := fmt.Sprintf("dependency update '%s' --repository-config %s --repository-cache %s --registry-config %s/config.json --plain-http", + cmd := fmt.Sprintf("dependency update '%s' --repository-config %s --repository-cache %s --registry-config %s/config.json --content-cache %s --plain-http", dir(ociChartName), dir("repositories.yaml"), dir(), - dir()) + dir(), + contentCache) _, out, err = executeActionCommand(cmd) if err != nil { t.Logf("Output: %s", out) @@ -179,8 +181,9 @@ func TestDependencyUpdateCmd_DoNotDeleteOldChartsOnError(t *testing.T) { // Chart repo is down srv.Stop() + contentCache := t.TempDir() - _, output, err = executeActionCommand(fmt.Sprintf("dependency update %s --repository-config %s --repository-cache %s --plain-http", dir(chartname), dir("repositories.yaml"), dir())) + _, output, err = executeActionCommand(fmt.Sprintf("dependency update %s --repository-config %s --repository-cache %s --content-cache %s --plain-http", dir(chartname), dir("repositories.yaml"), dir(), contentCache)) if err == nil { t.Logf("Output: %s", output) t.Fatal("Expected error, got nil") @@ -232,9 +235,11 @@ func TestDependencyUpdateCmd_WithRepoThatWasNotAdded(t *testing.T) { t.Fatal(err) } + contentCache := t.TempDir() + _, out, err := executeActionCommand( - fmt.Sprintf("dependency update '%s' --repository-config %s --repository-cache %s", dir(chartname), - dir("repositories.yaml"), dir()), + fmt.Sprintf("dependency update '%s' --repository-config %s --repository-cache %s --content-cache %s", dir(chartname), + dir("repositories.yaml"), dir(), contentCache), ) if err != nil { diff --git a/pkg/cmd/install.go b/pkg/cmd/install.go index f09d8c78b..869163a2a 100644 --- a/pkg/cmd/install.go +++ b/pkg/cmd/install.go @@ -288,6 +288,7 @@ func runInstall(args []string, client *action.Install, valueOpts *values.Options Getters: p, RepositoryConfig: settings.RepositoryConfig, RepositoryCache: settings.RepositoryCache, + ContentCache: settings.ContentCache, Debug: settings.Debug, RegistryClient: client.GetRegistryClient(), } diff --git a/pkg/cmd/package.go b/pkg/cmd/package.go index 40c503222..fc56e936a 100644 --- a/pkg/cmd/package.go +++ b/pkg/cmd/package.go @@ -100,6 +100,7 @@ func newPackageCmd(out io.Writer) *cobra.Command { RegistryClient: registryClient, RepositoryConfig: settings.RepositoryConfig, RepositoryCache: settings.RepositoryCache, + ContentCache: settings.ContentCache, } if err := downloadManager.Update(); err != nil { diff --git a/pkg/cmd/pull_test.go b/pkg/cmd/pull_test.go index 6506e0b35..c3156c394 100644 --- a/pkg/cmd/pull_test.go +++ b/pkg/cmd/pull_test.go @@ -212,15 +212,18 @@ func TestPullCmd(t *testing.T) { }, } + contentCache := t.TempDir() + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { outdir := srv.Root() - cmd := fmt.Sprintf("fetch %s -d '%s' --repository-config %s --repository-cache %s --registry-config %s --plain-http", + cmd := fmt.Sprintf("fetch %s -d '%s' --repository-config %s --repository-cache %s --registry-config %s --content-cache %s --plain-http", tt.args, outdir, filepath.Join(outdir, "repositories.yaml"), outdir, filepath.Join(outdir, "config.json"), + contentCache, ) // Create file or Dir before helm pull --untar, see: https://github.com/helm/helm/issues/7182 if tt.existFile != "" { diff --git a/pkg/cmd/show_test.go b/pkg/cmd/show_test.go index ab8cafc37..5ccb4bcad 100644 --- a/pkg/cmd/show_test.go +++ b/pkg/cmd/show_test.go @@ -64,14 +64,17 @@ func TestShowPreReleaseChart(t *testing.T) { }, } + contentTmp := t.TempDir() + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { outdir := srv.Root() - cmd := fmt.Sprintf("show all '%s' %s --repository-config %s --repository-cache %s", + cmd := fmt.Sprintf("show all '%s' %s --repository-config %s --repository-cache %s --content-cache %s", tt.args, tt.flags, filepath.Join(outdir, "repositories.yaml"), outdir, + contentTmp, ) //_, out, err := executeActionCommand(cmd) _, _, err := executeActionCommand(cmd) diff --git a/pkg/cmd/upgrade.go b/pkg/cmd/upgrade.go index 54d9989b8..4ca889fc2 100644 --- a/pkg/cmd/upgrade.go +++ b/pkg/cmd/upgrade.go @@ -210,6 +210,7 @@ func newUpgradeCmd(cfg *action.Configuration, out io.Writer) *cobra.Command { Getters: p, RepositoryConfig: settings.RepositoryConfig, RepositoryCache: settings.RepositoryCache, + ContentCache: settings.ContentCache, Debug: settings.Debug, } if err := man.Update(); err != nil { diff --git a/pkg/downloader/cache.go b/pkg/downloader/cache.go new file mode 100644 index 000000000..cecfc8bd7 --- /dev/null +++ b/pkg/downloader/cache.go @@ -0,0 +1,89 @@ +/* +Copyright The Helm Authors. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package downloader + +import ( + "crypto/sha256" + "errors" + "fmt" + "io" + "log/slog" + "os" + "path/filepath" + + "helm.sh/helm/v4/internal/fileutil" +) + +// Cache describes a cache that can get and put chart data. +// The cache key is the sha256 has of the content. sha256 is used in Helm for +// digests in index files providing a common key for checking content. +type Cache interface { + // Get returns a reader for the given key. + Get(key [sha256.Size]byte, cacheType string) (string, error) + // Put stores the given reader for the given key. + Put(key [sha256.Size]byte, data io.Reader, cacheType string) (string, error) +} + +// CacheChart specifies the content is a chart +var CacheChart = ".chart" + +// CacheProv specifies the content is a provenance file +var CacheProv = ".prov" + +// TODO: The cache assumes files because much of Helm assumes files. Convert +// Helm to pass content around instead of file locations. + +// DiskCache is a cache that stores data on disk. +type DiskCache struct { + Root string +} + +// Get returns a reader for the given key. +func (c *DiskCache) Get(key [sha256.Size]byte, cacheType string) (string, error) { + p := c.fileName(key, cacheType) + fi, err := os.Stat(p) + if err != nil { + return "", err + } + // Empty files treated as not exist because there is no content. + if fi.Size() == 0 { + return p, os.ErrNotExist + } + // directories should never happen unless something outside helm is operating + // on this content. + if fi.IsDir() { + return p, errors.New("is a directory") + } + return p, nil +} + +// Put stores the given reader for the given key. +// It returns the path to the stored file. +func (c *DiskCache) Put(key [sha256.Size]byte, data io.Reader, cacheType string) (string, error) { + // TODO: verify the key and digest of the key are the same. + p := c.fileName(key, cacheType) + if err := os.MkdirAll(filepath.Dir(p), 0755); err != nil { + slog.Error("failed to create cache directory") + return p, err + } + return p, fileutil.AtomicWriteFile(p, data, 0644) +} + +// fileName generates the filename in a structured manner where the first part is the +// directory and the full hash is the filename. +func (c *DiskCache) fileName(id [sha256.Size]byte, cacheType string) string { + return filepath.Join(c.Root, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+cacheType) +} diff --git a/pkg/downloader/cache_test.go b/pkg/downloader/cache_test.go new file mode 100644 index 000000000..340c77aba --- /dev/null +++ b/pkg/downloader/cache_test.go @@ -0,0 +1,122 @@ +/* +Copyright The Helm Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package downloader + +import ( + "bytes" + "crypto/sha256" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// compiler check to ensure DiskCache implements the Cache interface. +var _ Cache = (*DiskCache)(nil) + +func TestDiskCache_PutAndGet(t *testing.T) { + // Setup a temporary directory for the cache + tmpDir := t.TempDir() + cache := &DiskCache{Root: tmpDir} + + // Test data + content := []byte("hello world") + key := sha256.Sum256(content) + + // --- Test case 1: Put and Get a regular file (prov=false) --- + t.Run("PutAndGetTgz", func(t *testing.T) { + // Put the data into the cache + path, err := cache.Put(key, bytes.NewReader(content), CacheChart) + require.NoError(t, err, "Put should not return an error") + + // Verify the file exists at the returned path + _, err = os.Stat(path) + require.NoError(t, err, "File should exist after Put") + + // Get the file from the cache + retrievedPath, err := cache.Get(key, CacheChart) + require.NoError(t, err, "Get should not return an error for existing file") + assert.Equal(t, path, retrievedPath, "Get should return the same path as Put") + + // Verify content + data, err := os.ReadFile(retrievedPath) + require.NoError(t, err) + assert.Equal(t, content, data, "Content of retrieved file should match original content") + }) + + // --- Test case 2: Put and Get a provenance file (prov=true) --- + t.Run("PutAndGetProv", func(t *testing.T) { + provContent := []byte("provenance data") + provKey := sha256.Sum256(provContent) + + path, err := cache.Put(provKey, bytes.NewReader(provContent), CacheProv) + require.NoError(t, err) + + retrievedPath, err := cache.Get(provKey, CacheProv) + require.NoError(t, err) + assert.Equal(t, path, retrievedPath) + + data, err := os.ReadFile(retrievedPath) + require.NoError(t, err) + assert.Equal(t, provContent, data) + }) + + // --- Test case 3: Get a non-existent file --- + t.Run("GetNonExistent", func(t *testing.T) { + nonExistentKey := sha256.Sum256([]byte("does not exist")) + _, err := cache.Get(nonExistentKey, CacheChart) + assert.ErrorIs(t, err, os.ErrNotExist, "Get for a non-existent key should return os.ErrNotExist") + }) + + // --- Test case 4: Put an empty file --- + t.Run("PutEmptyFile", func(t *testing.T) { + emptyContent := []byte{} + emptyKey := sha256.Sum256(emptyContent) + + path, err := cache.Put(emptyKey, bytes.NewReader(emptyContent), CacheChart) + require.NoError(t, err) + + // Get should return ErrNotExist for empty files + _, err = cache.Get(emptyKey, CacheChart) + assert.ErrorIs(t, err, os.ErrNotExist, "Get for an empty file should return os.ErrNotExist") + + // But the file should exist + _, err = os.Stat(path) + require.NoError(t, err, "Empty file should still exist on disk") + }) + + // --- Test case 5: Get a directory --- + t.Run("GetDirectory", func(t *testing.T) { + dirKey := sha256.Sum256([]byte("i am a directory")) + dirPath := cache.fileName(dirKey, CacheChart) + err := os.MkdirAll(dirPath, 0755) + require.NoError(t, err) + + _, err = cache.Get(dirKey, CacheChart) + assert.EqualError(t, err, "is a directory") + }) +} + +func TestDiskCache_fileName(t *testing.T) { + cache := &DiskCache{Root: "/tmp/cache"} + key := sha256.Sum256([]byte("some data")) + + assert.Equal(t, filepath.Join("/tmp/cache", "13", "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee.chart"), cache.fileName(key, CacheChart)) + assert.Equal(t, filepath.Join("/tmp/cache", "13", "1307990e6ba5ca145eb35e99182a9bec46531bc54ddf656a602c780fa0240dee.prov"), cache.fileName(key, CacheProv)) +} diff --git a/pkg/downloader/chart_downloader.go b/pkg/downloader/chart_downloader.go index 529fd788e..693e6b009 100644 --- a/pkg/downloader/chart_downloader.go +++ b/pkg/downloader/chart_downloader.go @@ -16,16 +16,21 @@ limitations under the License. package downloader import ( + "bytes" + "crypto/sha256" + "encoding/hex" "errors" "fmt" "io" "io/fs" + "log/slog" "net/url" "os" "path/filepath" "strings" "helm.sh/helm/v4/internal/fileutil" + ifs "helm.sh/helm/v4/internal/third_party/dep/fs" "helm.sh/helm/v4/internal/urlutil" "helm.sh/helm/v4/pkg/getter" "helm.sh/helm/v4/pkg/helmpath" @@ -72,6 +77,14 @@ type ChartDownloader struct { RegistryClient *registry.Client RepositoryConfig string RepositoryCache string + + // ContentCache is the location where Cache stores its files by default + // In previous versions of Helm the charts were put in the RepositoryCache. The + // repositories and charts are stored in 2 difference caches. + ContentCache string + + // Cache specifies the cache implementation to use. + Cache Cache } // DownloadTo retrieves a chart. Depending on the settings, it may also download a provenance file. @@ -86,7 +99,14 @@ type ChartDownloader struct { // Returns a string path to the location where the file was downloaded and a verification // (if provenance was verified), or an error if something bad happened. func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *provenance.Verification, error) { - u, err := c.ResolveChartVersion(ref, version) + if c.Cache == nil { + if c.ContentCache == "" { + return "", nil, errors.New("content cache must be set") + } + c.Cache = &DiskCache{Root: c.ContentCache} + slog.Debug("setup up default downloader cache") + } + hash, u, err := c.ResolveChartVersion(ref, version) if err != nil { return "", nil, err } @@ -96,11 +116,37 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven return "", nil, err } - c.Options = append(c.Options, getter.WithAcceptHeader("application/gzip,application/octet-stream")) + // Check the cache for the content. Otherwise download it. + // Note, this process will pull from the cache but does not automatically populate + // the cache with the file it downloads. + var data *bytes.Buffer + var found bool + var digest []byte + var digest32 [32]byte + if hash != "" { + // if there is a hash, populate the other formats + digest, err = hex.DecodeString(hash) + if err != nil { + return "", nil, err + } + copy(digest32[:], digest) + if pth, err := c.Cache.Get(digest32, CacheChart); err == nil { + fdata, err := os.ReadFile(pth) + if err == nil { + found = true + data = bytes.NewBuffer(fdata) + slog.Debug("found chart in cache", "id", hash) + } + } + } - data, err := g.Get(u.String(), c.Options...) - if err != nil { - return "", nil, err + if !found { + c.Options = append(c.Options, getter.WithAcceptHeader("application/gzip,application/octet-stream")) + + data, err = g.Get(u.String(), c.Options...) + if err != nil { + return "", nil, err + } } name := filepath.Base(u.Path) @@ -117,13 +163,27 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven // If provenance is requested, verify it. ver := &provenance.Verification{} if c.Verify > VerifyNever { - body, err := g.Get(u.String() + ".prov") - if err != nil { - if c.Verify == VerifyAlways { - return destfile, ver, fmt.Errorf("failed to fetch provenance %q", u.String()+".prov") + found = false + var body *bytes.Buffer + if hash != "" { + if pth, err := c.Cache.Get(digest32, CacheProv); err == nil { + fdata, err := os.ReadFile(pth) + if err == nil { + found = true + body = bytes.NewBuffer(fdata) + slog.Debug("found provenance in cache", "id", hash) + } + } + } + if !found { + body, err = g.Get(u.String() + ".prov") + if err != nil { + if c.Verify == VerifyAlways { + return destfile, ver, fmt.Errorf("failed to fetch provenance %q", u.String()+".prov") + } + fmt.Fprintf(c.Out, "WARNING: Verification not found for %s: %s\n", ref, err) + return destfile, ver, nil } - fmt.Fprintf(c.Out, "WARNING: Verification not found for %s: %s\n", ref, err) - return destfile, ver, nil } provfile := destfile + ".prov" if err := fileutil.AtomicWriteFile(provfile, body, 0644); err != nil { @@ -131,7 +191,7 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven } if c.Verify != VerifyLater { - ver, err = VerifyChart(destfile, c.Keyring) + ver, err = VerifyChart(destfile, destfile+".prov", c.Keyring) if err != nil { // Fail always in this case, since it means the verification step // failed. @@ -142,10 +202,143 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven return destfile, ver, nil } +// DownloadToCache retrieves resources while using a content based cache. +func (c *ChartDownloader) DownloadToCache(ref, version string) (string, *provenance.Verification, error) { + if c.Cache == nil { + if c.ContentCache == "" { + return "", nil, errors.New("content cache must be set") + } + c.Cache = &DiskCache{Root: c.ContentCache} + slog.Debug("setup up default downloader cache") + } + + digestString, u, err := c.ResolveChartVersion(ref, version) + if err != nil { + return "", nil, err + } + + g, err := c.Getters.ByScheme(u.Scheme) + if err != nil { + return "", nil, err + } + + c.Options = append(c.Options, getter.WithAcceptHeader("application/gzip,application/octet-stream")) + + // Check the cache for the file + digest, err := hex.DecodeString(digestString) + if err != nil { + return "", nil, err + } + var digest32 [32]byte + copy(digest32[:], digest) + if err != nil { + return "", nil, fmt.Errorf("unable to decode digest: %w", err) + } + + var pth string + // only fetch from the cache if we have a digest + if len(digest) > 0 { + pth, err = c.Cache.Get(digest32, CacheChart) + if err == nil { + slog.Debug("found chart in cache", "id", digestString) + } + } + if len(digest) == 0 || err != nil { + slog.Debug("attempting to download chart", "ref", ref, "version", version) + if err != nil && !os.IsNotExist(err) { + return "", nil, err + } + + // Get file not in the cache + data, gerr := g.Get(u.String(), c.Options...) + if gerr != nil { + return "", nil, gerr + } + + // Generate the digest + if len(digest) == 0 { + digest32 = sha256.Sum256(data.Bytes()) + } + + pth, err = c.Cache.Put(digest32, data, CacheChart) + if err != nil { + return "", nil, err + } + slog.Debug("put downloaded chart in cache", "id", hex.EncodeToString(digest32[:])) + } + + // If provenance is requested, verify it. + ver := &provenance.Verification{} + if c.Verify > VerifyNever { + + ppth, err := c.Cache.Get(digest32, CacheProv) + if err == nil { + slog.Debug("found provenance in cache", "id", digestString) + } else { + if !os.IsNotExist(err) { + return pth, ver, err + } + + body, err := g.Get(u.String() + ".prov") + if err != nil { + if c.Verify == VerifyAlways { + return pth, ver, fmt.Errorf("failed to fetch provenance %q", u.String()+".prov") + } + fmt.Fprintf(c.Out, "WARNING: Verification not found for %s: %s\n", ref, err) + return pth, ver, nil + } + + ppth, err = c.Cache.Put(digest32, body, CacheProv) + if err != nil { + return "", nil, err + } + slog.Debug("put downloaded provenance file in cache", "id", hex.EncodeToString(digest32[:])) + } + + if c.Verify != VerifyLater { + + // provenance files pin to a specific name so this needs to be accounted for + // when verifying. + // Note, this does make an assumption that the name/version is unique to a + // hash when a provenance file is used. If this isn't true, this section of code + // will need to be reworked. + name := filepath.Base(u.Path) + if u.Scheme == registry.OCIScheme { + idx := strings.LastIndexByte(name, ':') + name = fmt.Sprintf("%s-%s.tgz", name[:idx], name[idx+1:]) + } + + // Copy chart to a known location with the right name for verification and then + // clean it up. + tmpdir := filepath.Dir(filepath.Join(c.ContentCache, "tmp")) + if err := os.MkdirAll(tmpdir, 0755); err != nil { + return pth, ver, err + } + tmpfile := filepath.Join(tmpdir, name) + err = ifs.CopyFile(pth, tmpfile) + if err != nil { + return pth, ver, err + } + // Not removing the tmp dir itself because a concurrent process may be using it + defer os.RemoveAll(tmpfile) + + ver, err = VerifyChart(tmpfile, ppth, c.Keyring) + if err != nil { + // Fail always in this case, since it means the verification step + // failed. + return pth, ver, err + } + } + } + return pth, ver, nil +} + // ResolveChartVersion resolves a chart reference to a URL. // -// It returns the URL and sets the ChartDownloader's Options that can fetch -// the URL using the appropriate Getter. +// It returns: +// - A hash of the content if available +// - The URL and sets the ChartDownloader's Options that can fetch the URL using the appropriate Getter. +// - An error if there is one // // A reference may be an HTTP URL, an oci reference URL, a 'reponame/chartname' // reference, or a local path. @@ -157,23 +350,26 @@ func (c *ChartDownloader) DownloadTo(ref, version, dest string) (string, *proven // - If version is non-empty, this will return the URL for that version // - If version is empty, this will return the URL for the latest version // - If no version can be found, an error is returned -func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, error) { +// +// TODO: support OCI hash +func (c *ChartDownloader) ResolveChartVersion(ref, version string) (string, *url.URL, error) { u, err := url.Parse(ref) if err != nil { - return nil, fmt.Errorf("invalid chart URL format: %s", ref) + return "", nil, fmt.Errorf("invalid chart URL format: %s", ref) } if registry.IsOCI(u.String()) { if c.RegistryClient == nil { - return nil, fmt.Errorf("unable to lookup ref %s at version '%s', missing registry client", ref, version) + return "", nil, fmt.Errorf("unable to lookup ref %s at version '%s', missing registry client", ref, version) } - return c.RegistryClient.ValidateReference(ref, version, u) + digest, OCIref, err := c.RegistryClient.ValidateReference(ref, version, u) + return digest, OCIref, err } rf, err := loadRepoConfig(c.RepositoryConfig) if err != nil { - return u, err + return "", u, err } if u.IsAbs() && len(u.Host) > 0 && len(u.Path) > 0 { @@ -190,9 +386,9 @@ func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, er if err == ErrNoOwnerRepo { // Make sure to add the ref URL as the URL for the getter c.Options = append(c.Options, getter.WithURL(ref)) - return u, nil + return "", u, nil } - return u, err + return "", u, err } // If we get here, we don't need to go through the next phase of looking @@ -211,20 +407,20 @@ func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, er getter.WithPassCredentialsAll(rc.PassCredentialsAll), ) } - return u, nil + return "", u, nil } // See if it's of the form: repo/path_to_chart p := strings.SplitN(u.Path, "/", 2) if len(p) < 2 { - return u, fmt.Errorf("non-absolute URLs should be in form of repo_name/path_to_chart, got: %s", u) + return "", u, fmt.Errorf("non-absolute URLs should be in form of repo_name/path_to_chart, got: %s", u) } repoName := p[0] chartName := p[1] rc, err := pickChartRepositoryConfigByName(repoName, rf.Repositories) if err != nil { - return u, err + return "", u, err } // Now that we have the chart repository information we can use that URL @@ -233,7 +429,7 @@ func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, er r, err := repo.NewChartRepository(rc, c.Getters) if err != nil { - return u, err + return "", u, err } if r != nil && r.Config != nil { @@ -252,32 +448,33 @@ func (c *ChartDownloader) ResolveChartVersion(ref, version string) (*url.URL, er idxFile := filepath.Join(c.RepositoryCache, helmpath.CacheIndexFile(r.Config.Name)) i, err := repo.LoadIndexFile(idxFile) if err != nil { - return u, fmt.Errorf("no cached repo found. (try 'helm repo update'): %w", err) + return "", u, fmt.Errorf("no cached repo found. (try 'helm repo update'): %w", err) } cv, err := i.Get(chartName, version) if err != nil { - return u, fmt.Errorf("chart %q matching %s not found in %s index. (try 'helm repo update'): %w", chartName, version, r.Config.Name, err) + return "", u, fmt.Errorf("chart %q matching %s not found in %s index. (try 'helm repo update'): %w", chartName, version, r.Config.Name, err) } if len(cv.URLs) == 0 { - return u, fmt.Errorf("chart %q has no downloadable URLs", ref) + return "", u, fmt.Errorf("chart %q has no downloadable URLs", ref) } // TODO: Seems that picking first URL is not fully correct resolvedURL, err := repo.ResolveReferenceURL(rc.URL, cv.URLs[0]) if err != nil { - return u, fmt.Errorf("invalid chart URL format: %s", ref) + return cv.Digest, u, fmt.Errorf("invalid chart URL format: %s", ref) } - return url.Parse(resolvedURL) + loc, err := url.Parse(resolvedURL) + return cv.Digest, loc, err } // VerifyChart takes a path to a chart archive and a keyring, and verifies the chart. // // It assumes that a chart archive file is accompanied by a provenance file whose // name is the archive file name plus the ".prov" extension. -func VerifyChart(path, keyring string) (*provenance.Verification, error) { +func VerifyChart(path, provfile, keyring string) (*provenance.Verification, error) { // For now, error out if it's not a tar file. switch fi, err := os.Stat(path); { case err != nil: @@ -288,7 +485,6 @@ func VerifyChart(path, keyring string) (*provenance.Verification, error) { return nil, errors.New("chart must be a tgz file") } - provfile := path + ".prov" if _, err := os.Stat(provfile); err != nil { return nil, fmt.Errorf("could not load provenance file %s: %w", provfile, err) } diff --git a/pkg/downloader/chart_downloader_test.go b/pkg/downloader/chart_downloader_test.go index a2e09eae5..649448fef 100644 --- a/pkg/downloader/chart_downloader_test.go +++ b/pkg/downloader/chart_downloader_test.go @@ -16,10 +16,14 @@ limitations under the License. package downloader import ( + "crypto/sha256" + "encoding/hex" "os" "path/filepath" "testing" + "github.com/stretchr/testify/require" + "helm.sh/helm/v4/internal/test/ensure" "helm.sh/helm/v4/pkg/cli" "helm.sh/helm/v4/pkg/getter" @@ -79,7 +83,7 @@ func TestResolveChartRef(t *testing.T) { } for _, tt := range tests { - u, err := c.ResolveChartVersion(tt.ref, tt.version) + _, u, err := c.ResolveChartVersion(tt.ref, tt.version) if err != nil { if tt.fail { continue @@ -131,7 +135,7 @@ func TestResolveChartOpts(t *testing.T) { continue } - u, err := c.ResolveChartVersion(tt.ref, tt.version) + _, u, err := c.ResolveChartVersion(tt.ref, tt.version) if err != nil { t.Errorf("%s: failed with error %s", tt.name, err) continue @@ -155,7 +159,7 @@ func TestResolveChartOpts(t *testing.T) { } func TestVerifyChart(t *testing.T) { - v, err := VerifyChart("testdata/signtest-0.1.0.tgz", "testdata/helm-test-key.pub") + v, err := VerifyChart("testdata/signtest-0.1.0.tgz", "testdata/signtest-0.1.0.tgz.prov", "testdata/helm-test-key.pub") if err != nil { t.Fatal(err) } @@ -198,15 +202,19 @@ func TestDownloadTo(t *testing.T) { t.Fatal(err) } + contentCache := t.TempDir() + c := ChartDownloader{ Out: os.Stderr, Verify: VerifyAlways, Keyring: "testdata/helm-test-key.pub", RepositoryConfig: repoConfig, RepositoryCache: repoCache, + ContentCache: contentCache, Getters: getter.All(&cli.EnvSettings{ RepositoryConfig: repoConfig, RepositoryCache: repoCache, + ContentCache: contentCache, }), Options: []getter.Option{ getter.WithBasicAuth("username", "password"), @@ -250,6 +258,7 @@ func TestDownloadTo_TLS(t *testing.T) { repoConfig := filepath.Join(srv.Root(), "repositories.yaml") repoCache := srv.Root() + contentCache := t.TempDir() c := ChartDownloader{ Out: os.Stderr, @@ -257,9 +266,11 @@ func TestDownloadTo_TLS(t *testing.T) { Keyring: "testdata/helm-test-key.pub", RepositoryConfig: repoConfig, RepositoryCache: repoCache, + ContentCache: contentCache, Getters: getter.All(&cli.EnvSettings{ RepositoryConfig: repoConfig, RepositoryCache: repoCache, + ContentCache: contentCache, }), Options: []getter.Option{ getter.WithTLSClientConfig( @@ -304,15 +315,18 @@ func TestDownloadTo_VerifyLater(t *testing.T) { if err := srv.LinkIndices(); err != nil { t.Fatal(err) } + contentCache := t.TempDir() c := ChartDownloader{ Out: os.Stderr, Verify: VerifyLater, RepositoryConfig: repoConfig, RepositoryCache: repoCache, + ContentCache: contentCache, Getters: getter.All(&cli.EnvSettings{ RepositoryConfig: repoConfig, RepositoryCache: repoCache, + ContentCache: contentCache, }), } cname := "/signtest-0.1.0.tgz" @@ -366,3 +380,108 @@ func TestScanReposForURL(t *testing.T) { t.Fatalf("expected ErrNoOwnerRepo, got %v", err) } } + +func TestDownloadToCache(t *testing.T) { + srv := repotest.NewTempServer(t, + repotest.WithChartSourceGlob("testdata/*.tgz*"), + ) + defer srv.Stop() + if err := srv.CreateIndex(); err != nil { + t.Fatal(err) + } + if err := srv.LinkIndices(); err != nil { + t.Fatal(err) + } + + // The repo file needs to point to our server. + repoFile := filepath.Join(srv.Root(), "repositories.yaml") + repoCache := srv.Root() + contentCache := t.TempDir() + + c := ChartDownloader{ + Out: os.Stderr, + Verify: VerifyNever, + RepositoryConfig: repoFile, + RepositoryCache: repoCache, + Getters: getter.All(&cli.EnvSettings{ + RepositoryConfig: repoFile, + RepositoryCache: repoCache, + ContentCache: contentCache, + }), + Cache: &DiskCache{Root: contentCache}, + } + + // Case 1: Chart not in cache, download it. + t.Run("download and cache chart", func(t *testing.T) { + // Clear cache for this test + os.RemoveAll(contentCache) + os.MkdirAll(contentCache, 0755) + c.Cache = &DiskCache{Root: contentCache} + + pth, v, err := c.DownloadToCache("test/signtest", "0.1.0") + require.NoError(t, err) + require.NotNil(t, v) + + // Check that the file exists at the returned path + _, err = os.Stat(pth) + require.NoError(t, err, "chart should exist at returned path") + + // Check that it's in the cache + digest, _, err := c.ResolveChartVersion("test/signtest", "0.1.0") + require.NoError(t, err) + digestBytes, err := hex.DecodeString(digest) + require.NoError(t, err) + var digestArray [sha256.Size]byte + copy(digestArray[:], digestBytes) + + cachePath, err := c.Cache.Get(digestArray, CacheChart) + require.NoError(t, err, "chart should now be in cache") + require.Equal(t, pth, cachePath) + }) + + // Case 2: Chart is in cache, get from cache. + t.Run("get chart from cache", func(t *testing.T) { + // The cache should be populated from the previous test. + // To prove it's coming from cache, we can stop the server. + // But repotest doesn't support restarting. + // Let's just call it again and assume it works if it's fast and doesn't error. + pth, v, err := c.DownloadToCache("test/signtest", "0.1.0") + require.NoError(t, err) + require.NotNil(t, v) + + _, err = os.Stat(pth) + require.NoError(t, err, "chart should exist at returned path") + }) + + // Case 3: Download with verification + t.Run("download and verify", func(t *testing.T) { + // Clear cache + os.RemoveAll(contentCache) + os.MkdirAll(contentCache, 0755) + c.Cache = &DiskCache{Root: contentCache} + c.Verify = VerifyAlways + c.Keyring = "testdata/helm-test-key.pub" + + _, v, err := c.DownloadToCache("test/signtest", "0.1.0") + require.NoError(t, err) + require.NotNil(t, v) + require.NotEmpty(t, v.FileHash, "verification should have a file hash") + + // Check that both chart and prov are in cache + digest, _, err := c.ResolveChartVersion("test/signtest", "0.1.0") + require.NoError(t, err) + digestBytes, err := hex.DecodeString(digest) + require.NoError(t, err) + var digestArray [sha256.Size]byte + copy(digestArray[:], digestBytes) + + _, err = c.Cache.Get(digestArray, CacheChart) + require.NoError(t, err, "chart should be in cache") + _, err = c.Cache.Get(digestArray, CacheProv) + require.NoError(t, err, "provenance file should be in cache") + + // Reset for other tests + c.Verify = VerifyNever + c.Keyring = "" + }) +} diff --git a/pkg/downloader/manager.go b/pkg/downloader/manager.go index b43165975..8b77a77c0 100644 --- a/pkg/downloader/manager.go +++ b/pkg/downloader/manager.go @@ -75,6 +75,9 @@ type Manager struct { RegistryClient *registry.Client RepositoryConfig string RepositoryCache string + + // ContentCache is a location where a cache of charts can be stored + ContentCache string } // Build rebuilds a local charts directory from a lockfile. @@ -331,6 +334,7 @@ func (m *Manager) downloadAll(deps []*chart.Dependency) error { Keyring: m.Keyring, RepositoryConfig: m.RepositoryConfig, RepositoryCache: m.RepositoryCache, + ContentCache: m.ContentCache, RegistryClient: m.RegistryClient, Getters: m.Getters, Options: []getter.Option{ diff --git a/pkg/downloader/manager_test.go b/pkg/downloader/manager_test.go index f01a5d7ad..b7121a4ce 100644 --- a/pkg/downloader/manager_test.go +++ b/pkg/downloader/manager_test.go @@ -488,12 +488,14 @@ func checkBuildWithOptionalFields(t *testing.T, chartName string, dep chart.Depe Schemes: []string{"http", "https"}, New: getter.NewHTTPGetter, }} + contentCache := t.TempDir() m := &Manager{ ChartPath: dir(chartName), Out: b, Getters: g, RepositoryConfig: dir("repositories.yaml"), RepositoryCache: dir(), + ContentCache: contentCache, } // First build will update dependencies and create Chart.lock file. diff --git a/pkg/registry/client.go b/pkg/registry/client.go index 8d6af9697..169900750 100644 --- a/pkg/registry/client.go +++ b/pkg/registry/client.go @@ -831,12 +831,12 @@ func (c *Client) Resolve(ref string) (desc ocispec.Descriptor, err error) { } // ValidateReference for path and version -func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, error) { +func (c *Client) ValidateReference(ref, version string, u *url.URL) (string, *url.URL, error) { var tag string registryReference, err := newReference(u.Host + u.Path) if err != nil { - return nil, err + return "", nil, err } if version == "" { @@ -844,14 +844,14 @@ func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, e version = registryReference.Tag } else { if registryReference.Tag != "" && registryReference.Tag != version { - return nil, fmt.Errorf("chart reference and version mismatch: %s is not %s", version, registryReference.Tag) + return "", nil, fmt.Errorf("chart reference and version mismatch: %s is not %s", version, registryReference.Tag) } } if registryReference.Digest != "" { if version == "" { // Install by digest only - return u, nil + return "", u, nil } u.Path = fmt.Sprintf("%s@%s", registryReference.Repository, registryReference.Digest) @@ -860,12 +860,12 @@ func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, e desc, err := c.Resolve(path) if err != nil { // The resource does not have to be tagged when digest is specified - return u, nil + return "", u, nil } if desc.Digest.String() != registryReference.Digest { - return nil, fmt.Errorf("chart reference digest mismatch: %s is not %s", desc.Digest.String(), registryReference.Digest) + return "", nil, fmt.Errorf("chart reference digest mismatch: %s is not %s", desc.Digest.String(), registryReference.Digest) } - return u, nil + return registryReference.Digest, u, nil } // Evaluate whether an explicit version has been provided. Otherwise, determine version to use @@ -876,10 +876,10 @@ func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, e // Retrieve list of repository tags tags, err := c.Tags(strings.TrimPrefix(ref, fmt.Sprintf("%s://", OCIScheme))) if err != nil { - return nil, err + return "", nil, err } if len(tags) == 0 { - return nil, fmt.Errorf("unable to locate any tags in provided repository: %s", ref) + return "", nil, fmt.Errorf("unable to locate any tags in provided repository: %s", ref) } // Determine if version provided @@ -888,13 +888,14 @@ func (c *Client) ValidateReference(ref, version string, u *url.URL) (*url.URL, e // If semver constraint string, try to find a match tag, err = GetTagMatchingVersionOrConstraint(tags, version) if err != nil { - return nil, err + return "", nil, err } } u.Path = fmt.Sprintf("%s:%s", registryReference.Repository, tag) + // desc, err := c.Resolve(u.Path) - return u, err + return "", u, err } // tagManifest prepares and tags a manifest in memory storage