From 3a3627e512050f2de247ed5c548538e35674baef Mon Sep 17 00:00:00 2001 From: boris smidt Date: Tue, 12 Nov 2024 15:34:42 +0100 Subject: [PATCH] Replace the regex with a simple algorithm to split the yaml documents. Signed-off-by: boris smidt --- pkg/releaseutil/manifest.go | 56 ++++++++++++++----- pkg/releaseutil/manifest_test.go | 93 ++++++++++++++++++++++++++++++-- 2 files changed, 132 insertions(+), 17 deletions(-) diff --git a/pkg/releaseutil/manifest.go b/pkg/releaseutil/manifest.go index 0b04a4599..fc6f61257 100644 --- a/pkg/releaseutil/manifest.go +++ b/pkg/releaseutil/manifest.go @@ -17,8 +17,6 @@ limitations under the License. package releaseutil import ( - "fmt" - "regexp" "strconv" "strings" ) @@ -33,30 +31,60 @@ type SimpleHead struct { } `json:"metadata,omitempty"` } -var sep = regexp.MustCompile("(?:^|\\s*\n)---\\s*") - // SplitManifests takes a string of manifest and returns a map contains individual manifests func SplitManifests(bigFile string) map[string]string { // Basically, we're quickly splitting a stream of YAML documents into an // array of YAML docs. The file name is just a place holder, but should be // integer-sortable so that manifests get output in the same order as the // input (see `BySplitManifestsOrder`). - tpl := "manifest-%d" - res := map[string]string{} // Making sure that any extra whitespace in YAML stream doesn't interfere in splitting documents correctly. bigFileTmp := strings.TrimSpace(bigFile) - docs := sep.Split(bigFileTmp, -1) - var count int - for _, d := range docs { - if d == "" { + docs := splitDocs(bigFileTmp) + res := make(map[string]string, len(docs)) + for count, _ := range docs { + res["manifest-"+strconv.Itoa(count)] = docs[count] + } + return res +} + +const yamlDocumentTermination = "\n---" + +func splitDocs(bigFile string) []string { + docs := make([]string, 0) + docStartIdx := 0 + + // strip off a leading --- avoiding a special start case + bigFile = strings.TrimSpace(strings.TrimPrefix(strings.TrimSpace(bigFile), "---")) + + // using our own index so we can manually skip forward + i := 0 + for i < len(bigFile) { + // see if we find a document termination sequence, i.e. "\n---" + if !strings.HasPrefix(bigFile[i:], yamlDocumentTermination) { + i++ continue } - d = strings.TrimSpace(d) - res[fmt.Sprintf(tpl, count)] = d - count = count + 1 + // this is the end of the document, slicing the bytes array + doc := strings.TrimSpace(bigFile[docStartIdx:i]) + + // ignore empty docs + if doc != "" { + docs = append(docs, doc) + } + + // skip the document termination characters + docStartIdx = i + len(yamlDocumentTermination) + i = docStartIdx } - return res + + // append the 'rest' of the document as the last document + doc := strings.TrimSpace(bigFile[docStartIdx:]) + if doc != "" { + docs = append(docs, doc) + } + + return docs } // BySplitManifestsOrder sorts by in-file manifest order, as provided in function `SplitManifests` diff --git a/pkg/releaseutil/manifest_test.go b/pkg/releaseutil/manifest_test.go index 8664d20ef..71d158824 100644 --- a/pkg/releaseutil/manifest_test.go +++ b/pkg/releaseutil/manifest_test.go @@ -18,6 +18,7 @@ package releaseutil // import "helm.sh/helm/v3/pkg/releaseutil" import ( "reflect" + "strings" "testing" ) @@ -37,7 +38,7 @@ spec: cmd: fake-command ` -const expectedManifest = `apiVersion: v1 +const expectedManifest1 = `apiVersion: v1 kind: Pod metadata: name: finding-nemo, @@ -49,13 +50,99 @@ spec: image: fake-image cmd: fake-command` +const mockManifestFile2 = ` + +--- + +apiVersion: v1 +kind: Pod +metadata: + name: finding-nemo, + annotations: + "helm.sh/hook": test +spec: + containers: + - name: nemo-test + image: fake-image + cmd: fake-command + +---apiVersion: v1 +kind: Pod +metadata: + name: finding-nemo-2, + annotations: + "helm.sh/hook": test +spec: + containers: + - name: nemo-test + image: fake-image + cmd: fake-command +` + +const expectedManifest2 = `apiVersion: v1 +kind: Pod +metadata: + name: finding-nemo-2, + annotations: + "helm.sh/hook": test +spec: + containers: + - name: nemo-test + image: fake-image + cmd: fake-command` + func TestSplitManifest(t *testing.T) { manifests := SplitManifests(mockManifestFile) if len(manifests) != 1 { t.Errorf("Expected 1 manifest, got %v", len(manifests)) } - expected := map[string]string{"manifest-0": expectedManifest} + expected := map[string]string{"manifest-0": expectedManifest1} if !reflect.DeepEqual(manifests, expected) { - t.Errorf("Expected %v, got %v", expected, manifests) + t.Errorf("Expected \n%v\n got: \n%v", expected, manifests) } } + +func TestSplitManifestDocBreak(t *testing.T) { + manifests := SplitManifests(mockManifestFile2) + if len(manifests) != 2 { + t.Errorf("Expected 2 manifest, got %v", len(manifests)) + } + expected := map[string]string{"manifest-0": expectedManifest1, "manifest-1": expectedManifest2} + if !reflect.DeepEqual(manifests, expected) { + t.Errorf("Expected \n%v\n got: \n%v", expected, manifests) + } + +} + +func TestSplitManifestNoDocBreak(t *testing.T) { + manifests := SplitManifests(expectedManifest1) + if len(manifests) != 1 { + t.Errorf("Expected 1 manifest, got %v", len(manifests)) + } + expected := map[string]string{"manifest-0": expectedManifest1} + if !reflect.DeepEqual(manifests, expected) { + t.Errorf("Expected \n%v\n got: \n%v", expected, manifests) + } + +} + +func createManifest() string { + sb := strings.Builder{} + for i := 0; i < 10000; i++ { + sb.WriteString(expectedManifest2) + sb.WriteString("\n---") + } + return sb.String() +} + +var BenchmarkSplitManifestsResult map[string]string +var largeManifest = createManifest() + +func BenchmarkSplitManifests(b *testing.B) { + var r map[string]string + for n := 0; n < b.N; n++ { + r = SplitManifests(largeManifest) + } + + BenchmarkSplitManifestsResult = r +}