From e21c9cf7e243ca30868c2dfdd232168d7c4f744e Mon Sep 17 00:00:00 2001 From: Hidde Beydals Date: Thu, 20 Jul 2023 23:23:35 +0200 Subject: [PATCH 1/2] repo: detect JSON and unmarshal efficiently When an index is in a JSON format, the `sigs.k8s.io/yaml` package uses an inefficient approach to unmarshaling the data, as it does an unnecessary roundtrip on the data to transform the YAML to valid JSON. To prevent this from happening, detect if the bytes which we attempt to load contain valid JSON, and unmarshal them directly using `json.Unmarshal` instead. Signed-off-by: Hidde Beydals --- pkg/repo/index.go | 17 +++++++++- pkg/repo/index_test.go | 5 +++ pkg/repo/testdata/local-index.json | 53 ++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 pkg/repo/testdata/local-index.json diff --git a/pkg/repo/index.go b/pkg/repo/index.go index ba2e365c8..0618fe70a 100644 --- a/pkg/repo/index.go +++ b/pkg/repo/index.go @@ -18,6 +18,7 @@ package repo import ( "bytes" + "encoding/json" "log" "os" "path" @@ -336,7 +337,7 @@ func loadIndex(data []byte, source string) (*IndexFile, error) { return i, ErrEmptyIndexYaml } - if err := yaml.UnmarshalStrict(data, i); err != nil { + if err := jsonOrYamlUnmarshal(data, i); err != nil { return i, err } @@ -361,3 +362,17 @@ func loadIndex(data []byte, source string) (*IndexFile, error) { } return i, nil } + +// jsonOrYamlUnmarshal unmarshals the given byte slice containing JSON or YAML +// into the provided interface. +// +// It automatically detects whether the data is in JSON or YAML format by +// checking its validity as JSON. If the data is valid JSON, it will use the +// `encoding/json` package to unmarshal it. Otherwise, it will use the +// `sigs.k8s.io/yaml` package to unmarshal the YAML data. +func jsonOrYamlUnmarshal(b []byte, i interface{}) error { + if json.Valid(b) { + return json.Unmarshal(b, i) + } + return yaml.UnmarshalStrict(b, i) +} diff --git a/pkg/repo/index_test.go b/pkg/repo/index_test.go index bbc48c97e..5528bd943 100644 --- a/pkg/repo/index_test.go +++ b/pkg/repo/index_test.go @@ -37,6 +37,7 @@ const ( annotationstestfile = "testdata/local-index-annotations.yaml" chartmuseumtestfile = "testdata/chartmuseum-index.yaml" unorderedTestfile = "testdata/local-index-unordered.yaml" + jsonTestfile = "testdata/local-index.json" testRepo = "test-repo" indexWithDuplicates = ` apiVersion: v1 @@ -145,6 +146,10 @@ func TestLoadIndex(t *testing.T) { Name: "chartmuseum index file", Filename: chartmuseumtestfile, }, + { + Name: "JSON index file", + Filename: jsonTestfile, + }, } for _, tc := range tests { diff --git a/pkg/repo/testdata/local-index.json b/pkg/repo/testdata/local-index.json new file mode 100644 index 000000000..25296d5ca --- /dev/null +++ b/pkg/repo/testdata/local-index.json @@ -0,0 +1,53 @@ +{ + "apiVersion": "v1", + "entries": { + "nginx": [ + { + "urls": ["https://charts.helm.sh/stable/nginx-0.2.0.tgz"], + "name": "nginx", + "description": "string", + "version": "0.2.0", + "home": "https://github.com/something/else", + "digest": "sha256:1234567890abcdef", + "keywords": ["popular", "web server", "proxy"], + "apiVersion": "v2" + }, + { + "urls": ["https://charts.helm.sh/stable/nginx-0.1.0.tgz"], + "name": "nginx", + "description": "string", + "version": "0.1.0", + "home": "https://github.com/something", + "digest": "sha256:1234567890abcdef", + "keywords": ["popular", "web server", "proxy"], + "apiVersion": "v2" + } + ], + "alpine": [ + { + "urls": [ + "https://charts.helm.sh/stable/alpine-1.0.0.tgz", + "http://storage2.googleapis.com/kubernetes-charts/alpine-1.0.0.tgz" + ], + "name": "alpine", + "description": "string", + "version": "1.0.0", + "home": "https://github.com/something", + "keywords": ["linux", "alpine", "small", "sumtin"], + "digest": "sha256:1234567890abcdef", + "apiVersion": "v2" + } + ], + "chartWithNoURL": [ + { + "name": "chartWithNoURL", + "description": "string", + "version": "1.0.0", + "home": "https://github.com/something", + "keywords": ["small", "sumtin"], + "digest": "sha256:1234567890abcdef", + "apiVersion": "v2" + } + ] + } +} From 2544aa23a33977d91fe8f59d12dd923dc43be6c5 Mon Sep 17 00:00:00 2001 From: Hidde Beydals Date: Fri, 21 Jul 2023 00:32:42 +0200 Subject: [PATCH 2/2] cmd: support generating index in JSON format This adds support for generating the repository index file in JSON format using the `--json` flag. The index itself is still written to `index.yaml`, which is fully backwards compatible as YAML is a superset of JSON. For big indexes (think multiple megabytes), this approach is however more efficient in combination with the changes to the load logic, as it prevents a YAML -> JSON roundtrip during decoding. Signed-off-by: Hidde Beydals --- cmd/helm/repo_index.go | 15 ++++++++++++--- cmd/helm/repo_index_test.go | 23 +++++++++++++++++++++++ pkg/repo/index.go | 12 ++++++++++++ pkg/repo/index_test.go | 22 ++++++++++++++++++++++ 4 files changed, 69 insertions(+), 3 deletions(-) diff --git a/cmd/helm/repo_index.go b/cmd/helm/repo_index.go index 917acd442..3960380d1 100644 --- a/cmd/helm/repo_index.go +++ b/cmd/helm/repo_index.go @@ -43,6 +43,7 @@ type repoIndexOptions struct { dir string url string merge string + json bool } func newRepoIndexCmd(out io.Writer) *cobra.Command { @@ -70,6 +71,7 @@ func newRepoIndexCmd(out io.Writer) *cobra.Command { f := cmd.Flags() f.StringVar(&o.url, "url", "", "url of chart repository") f.StringVar(&o.merge, "merge", "", "merge the generated index into the given index") + f.BoolVar(&o.json, "json", false, "output in JSON format") return cmd } @@ -80,10 +82,10 @@ func (i *repoIndexOptions) run(out io.Writer) error { return err } - return index(path, i.url, i.merge) + return index(path, i.url, i.merge, i.json) } -func index(dir, url, mergeTo string) error { +func index(dir, url, mergeTo string, json bool) error { out := filepath.Join(dir, "index.yaml") i, err := repo.IndexDirectory(dir, url) @@ -95,7 +97,7 @@ func index(dir, url, mergeTo string) error { var i2 *repo.IndexFile if _, err := os.Stat(mergeTo); os.IsNotExist(err) { i2 = repo.NewIndexFile() - i2.WriteFile(mergeTo, 0644) + writeIndexFile(i2, mergeTo, json) } else { i2, err = repo.LoadIndexFile(mergeTo) if err != nil { @@ -105,5 +107,12 @@ func index(dir, url, mergeTo string) error { i.Merge(i2) } i.SortEntries() + return writeIndexFile(i, out, json) +} + +func writeIndexFile(i *repo.IndexFile, out string, json bool) error { + if json { + return i.WriteJSONFile(out, 0644) + } return i.WriteFile(out, 0644) } diff --git a/cmd/helm/repo_index_test.go b/cmd/helm/repo_index_test.go index ae3390154..9ba3595bf 100644 --- a/cmd/helm/repo_index_test.go +++ b/cmd/helm/repo_index_test.go @@ -18,6 +18,7 @@ package main import ( "bytes" + "encoding/json" "io" "os" "path/filepath" @@ -68,6 +69,28 @@ func TestRepoIndexCmd(t *testing.T) { t.Errorf("expected %q, got %q", expectedVersion, vs[0].Version) } + b, err := os.ReadFile(destIndex) + if err != nil { + t.Fatal(err) + } + if json.Valid(b) { + t.Error("did not expect index file to be valid json") + } + + // Test with `--json` + + c.ParseFlags([]string{"--json", "true"}) + if err := c.RunE(c, []string{dir}); err != nil { + t.Error(err) + } + + if b, err = os.ReadFile(destIndex); err != nil { + t.Fatal(err) + } + if !json.Valid(b) { + t.Error("index file is not valid json") + } + // Test with `--merge` // Remove first two charts. diff --git a/pkg/repo/index.go b/pkg/repo/index.go index 0618fe70a..8a23ba060 100644 --- a/pkg/repo/index.go +++ b/pkg/repo/index.go @@ -233,6 +233,18 @@ func (i IndexFile) WriteFile(dest string, mode os.FileMode) error { return fileutil.AtomicWriteFile(dest, bytes.NewReader(b), mode) } +// WriteJSONFile writes an index file in JSON format to the given destination +// path. +// +// The mode on the file is set to 'mode'. +func (i IndexFile) WriteJSONFile(dest string, mode os.FileMode) error { + b, err := json.MarshalIndent(i, "", " ") + if err != nil { + return err + } + return fileutil.AtomicWriteFile(dest, bytes.NewReader(b), mode) +} + // Merge merges the given index file into this index. // // This merges by name and version. diff --git a/pkg/repo/index_test.go b/pkg/repo/index_test.go index 5528bd943..efb50ba6a 100644 --- a/pkg/repo/index_test.go +++ b/pkg/repo/index_test.go @@ -19,6 +19,7 @@ package repo import ( "bufio" "bytes" + "encoding/json" "net/http" "os" "path/filepath" @@ -553,6 +554,27 @@ func TestIndexWrite(t *testing.T) { } } +func TestIndexJSONWrite(t *testing.T) { + i := NewIndexFile() + if err := i.MustAdd(&chart.Metadata{APIVersion: "v2", Name: "clipper", Version: "0.1.0"}, "clipper-0.1.0.tgz", "http://example.com/charts", "sha256:1234567890"); err != nil { + t.Fatalf("unexpected error: %s", err) + } + dir := t.TempDir() + testpath := filepath.Join(dir, "test") + i.WriteJSONFile(testpath, 0600) + + got, err := os.ReadFile(testpath) + if err != nil { + t.Fatal(err) + } + if !json.Valid(got) { + t.Fatal("Index files doesn't contain valid JSON") + } + if !strings.Contains(string(got), "clipper-0.1.0.tgz") { + t.Fatal("Index files doesn't contain expected content") + } +} + func TestAddFileIndexEntriesNil(t *testing.T) { i := NewIndexFile() i.APIVersion = chart.APIVersionV1