mirror of https://github.com/helm/helm
[WIP] Implemented secondary repository indexing refs #1904
This commit is an attempt to address concerns scoped in #1904. `ChartDownloader.scanReposForURL()` is called when a fetching chart is identified by an absolute URL and no trivial reference between the chart and parental repository could be established. The operation requires a full linear repository scan. The runtime perfromance degenerates in a case of a non-existing URL as a full scan goes over the full range of existing repos, all their ChartVersion entries and each version of an entry. The motivation for this change is to speed up this operation, ideally make the aforementioned operation to run in nearly constant amortized time (a detailed poreformance analysis is presented below). There is a constraint that should be taken into account before a conclusion about overall sense can be made: the function call exists in a vacuum: there is no continuous runtime between consecutive runs of the function. I.e. we can't use an in-memory structure that's created once and should consider using a persistent cache approach (temporary cache files). Dealing with a FS might come with a singnificant performance penalty but in the light of the aforementioned situation that seems decent. This commit introduces a concept of a secondary repository index: an extended cached index structure based on the primary repo index. Effectively it builds an inverted index based on the data read from index.yaml. In the current implementation it only builds inverred URL index. The proposed structure of a secondary file looks like: indexes: byURL: chart_url_1: name: chart_name_1 version: chart_version_1 chart_url_2: name: chart_name_2 version: chart_version_2 Ideally, the structure should provide a nearly constant lookup time in order to provide any sensible result. On the other hand, repository cache is a self-contained substance which exists independently from other repositories. Therefore it was decided to create a secondary index file for every repository. In this case we're still dealing with a linear traversal of repositories but relying on an assumption that an average # of repositories is smaller than an average # of entries in them. The assumed invariant is irrelevant to the existing algorithm as it's runtime doesn't change if the ratio changes. In the current implementation, `scanReposForURL` performs cached index.yaml loading on linear repo traversal. In the proposed implementation this operation is substituted by reading index-secondary.yaml insted. We believe it has no impact on the operation runtime complexity: the files are comparable in size and nesting complexity. The results of a benchmark run: goos: darwin goarch: amd64 pkg: helm.sh/helm/pkg/downloader BenchmarkScanReposForURL/with_secondary_index-4 50000 24649 ns/op BenchmarkScanReposForURL/with_secondary_index-4 100000 22762 ns/op BenchmarkScanReposForURL/with_secondary_index-4 50000 23170 ns/op BenchmarkScanReposForURL/with_secondary_index-4 100000 22712 ns/op BenchmarkScanReposForURL/with_secondary_index-4 50000 22346 ns/op BenchmarkScanReposForURL/with_secondary_index-4 100000 22658 ns/op BenchmarkScanReposForURL/with_secondary_index-4 100000 23544 ns/op BenchmarkScanReposForURL/with_secondary_index-4 100000 23062 ns/op BenchmarkScanReposForURL/with_secondary_index-4 100000 22476 ns/op BenchmarkScanReposForURL/with_secondary_index-4 100000 22621 ns/op BenchmarkScanReposForURL/no_secondary_index-4 20 144900320 ns/op BenchmarkScanReposForURL/no_secondary_index-4 10 178732070 ns/op BenchmarkScanReposForURL/no_secondary_index-4 10 130792721 ns/op BenchmarkScanReposForURL/no_secondary_index-4 10 170880342 ns/op BenchmarkScanReposForURL/no_secondary_index-4 10 144174110 ns/op BenchmarkScanReposForURL/no_secondary_index-4 20 147588994 ns/op BenchmarkScanReposForURL/no_secondary_index-4 10 155225613 ns/op BenchmarkScanReposForURL/no_secondary_index-4 10 205336598 ns/op BenchmarkScanReposForURL/no_secondary_index-4 10 168166931 ns/op BenchmarkScanReposForURL/no_secondary_index-4 10 152456294 ns/op Signed-off-by: Oleg Sidorov <oleg.sidorov@booking.com>pull/6405/head
parent
335d27a976
commit
ca4738414d
@ -0,0 +1,14 @@
|
||||
apiVersion: v1
|
||||
generated: "2019-09-10T09:57:59.406483+02:00"
|
||||
indexes:
|
||||
byURL:
|
||||
https://kubernetes-charts.storage.googleapis.com/alpine-0.1.0.tgz:
|
||||
name: alpine
|
||||
version: 0.1.0
|
||||
https://kubernetes-charts.storage.googleapis.com/alpine-0.2.0.tgz:
|
||||
name: alpine
|
||||
version: 0.2.0
|
||||
https://kubernetes-charts.storage.googleapis.com/mariadb-0.3.0.tgz:
|
||||
name: mariadb
|
||||
version: 0.3.0
|
||||
digest: sha256:a78bb76c183f8cbb
|
@ -0,0 +1,8 @@
|
||||
apiVersion: v1
|
||||
generated: "2019-09-10T09:57:59.40846+02:00"
|
||||
indexes:
|
||||
byURL:
|
||||
alpine-1.2.3.tgz:
|
||||
name: alpine
|
||||
version: 1.2.3
|
||||
digest: sha256:ede9ca5325393151
|
@ -0,0 +1,8 @@
|
||||
apiVersion: v1
|
||||
generated: "2019-09-10T09:57:59.405788+02:00"
|
||||
indexes:
|
||||
byURL:
|
||||
http://username:password@example.com/foo-1.2.3.tgz:
|
||||
name: foo
|
||||
version: 1.2.3
|
||||
digest: sha256:2cbcf6bb0028857e
|
@ -0,0 +1,8 @@
|
||||
apiVersion: v1
|
||||
generated: "2019-09-10T09:57:59.405227+02:00"
|
||||
indexes:
|
||||
byURL:
|
||||
https://example.com/foo-1.2.3.tgz:
|
||||
name: foo
|
||||
version: 1.2.3
|
||||
digest: sha256:01443f314ae01861
|
@ -0,0 +1,17 @@
|
||||
apiVersion: v1
|
||||
generated: "2019-09-10T09:57:59.404447+02:00"
|
||||
indexes:
|
||||
byURL:
|
||||
http://example.com/alpine-0.2.0.tgz:
|
||||
name: alpine
|
||||
version: 0.2.0
|
||||
http://example.com/alpine-1.2.3.tgz:
|
||||
name: alpine
|
||||
version: 1.2.3
|
||||
http://example.com/foo-1.2.3.tgz:
|
||||
name: foo
|
||||
version: 1.2.3
|
||||
https://kubernetes-charts.storage.googleapis.com/alpine-0.2.0.tgz:
|
||||
name: alpine
|
||||
version: 0.2.0
|
||||
digest: sha256:44d0069c3c7ea47c
|
@ -0,0 +1,8 @@
|
||||
apiVersion: v1
|
||||
generated: "2019-09-10T09:57:59.409072+02:00"
|
||||
indexes:
|
||||
byURL:
|
||||
alpine-1.2.3.tgz:
|
||||
name: alpine
|
||||
version: 1.2.3
|
||||
digest: sha256:ede9ca5325393151
|
@ -0,0 +1,11 @@
|
||||
apiVersion: v1
|
||||
generated: "2019-09-10T09:57:59.409713+02:00"
|
||||
indexes:
|
||||
byURL:
|
||||
bar-1.2.3.tgz:
|
||||
name: bar
|
||||
version: 1.2.3
|
||||
charts/foo-1.2.3.tgz:
|
||||
name: foo
|
||||
version: 1.2.3
|
||||
digest: sha256:99c8c16d33565af5
|
@ -0,0 +1,11 @@
|
||||
apiVersion: v1
|
||||
generated: "2019-09-10T09:57:59.410396+02:00"
|
||||
indexes:
|
||||
byURL:
|
||||
bar-1.2.3.tgz:
|
||||
name: bar
|
||||
version: 1.2.3
|
||||
charts/foo-1.2.3.tgz:
|
||||
name: foo
|
||||
version: 1.2.3
|
||||
digest: sha256:99c8c16d33565af5
|
@ -0,0 +1,102 @@
|
||||
/*
|
||||
Copyright The Helm Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package repo
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"sigs.k8s.io/yaml"
|
||||
|
||||
"helm.sh/helm/v3/internal/urlutil"
|
||||
)
|
||||
|
||||
type ChartVerEntry struct {
|
||||
Name string `json:"name"`
|
||||
Version string `json:"version"`
|
||||
}
|
||||
|
||||
type SecondaryIndexes struct {
|
||||
ByURL map[string]ChartVerEntry `json:"byURL,omitempty"`
|
||||
}
|
||||
|
||||
func NewSecondaryIndexes() *SecondaryIndexes {
|
||||
return &SecondaryIndexes{}
|
||||
}
|
||||
|
||||
type SecondaryIndexFile struct {
|
||||
APIVersion string `json:"apiVersion"`
|
||||
Generated time.Time `json:"generated"`
|
||||
Digest string `json:"digest"`
|
||||
Indexes *SecondaryIndexes `json:"indexes"`
|
||||
}
|
||||
|
||||
func NewSecondaryIndexFile() *SecondaryIndexFile {
|
||||
return &SecondaryIndexFile{
|
||||
APIVersion: APIVersionV1,
|
||||
Generated: time.Now(),
|
||||
Indexes: NewSecondaryIndexes(),
|
||||
}
|
||||
}
|
||||
|
||||
//TODO: this function should ensure the loaded secondary is consistent with the
|
||||
//existing primary index by comparing digests.
|
||||
func LoadSecondaryIndexFile(path string) (*SecondaryIndexFile, error) {
|
||||
data, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s := &SecondaryIndexFile{}
|
||||
if err := yaml.Unmarshal(data, s); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func (s *SecondaryIndexFile) WriteFile(dest string, mode os.FileMode) error {
|
||||
b, err := yaml.Marshal(s)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return ioutil.WriteFile(dest, b, mode)
|
||||
}
|
||||
|
||||
func (s *SecondaryIndexFile) IsComputedFrom(index *IndexFile) bool {
|
||||
digest, err := index.digest()
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return s.Digest == digest
|
||||
}
|
||||
|
||||
func (s *SecondaryIndexFile) buildURLIndex(index *IndexFile) error {
|
||||
urlIx := make(map[string]ChartVerEntry)
|
||||
for _, entry := range index.Entries {
|
||||
for _, ver := range entry {
|
||||
for _, dl := range ver.URLs {
|
||||
u := urlutil.Canonical(dl)
|
||||
urlIx[u] = ChartVerEntry{
|
||||
Name: ver.Name,
|
||||
Version: ver.Version,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
s.Indexes.ByURL = urlIx
|
||||
return nil
|
||||
}
|
Loading…
Reference in new issue