feat(helm): update helm search

Switch 'helm search' from file crawling to using the indices. Also
add scorable indexing, forward porting the search code I originally
wrote for Helm Classic.

Closes #1226
Partially addresses #1199
pull/1229/head
Matt Butcher 8 years ago
parent c0d33afc81
commit 446d555178

@ -101,6 +101,7 @@ func newRootCmd(out io.Writer) *cobra.Command {
newVersionCmd(nil, out), newVersionCmd(nil, out),
newRepoCmd(out), newRepoCmd(out),
newDependencyCmd(out), newDependencyCmd(out),
newSearchCmd(out),
) )
return cmd return cmd
} }

@ -17,90 +17,99 @@ limitations under the License.
package main package main
import ( import (
"errors"
"fmt" "fmt"
"os" "io"
"path/filepath"
"strings" "strings"
"github.com/spf13/cobra" "github.com/spf13/cobra"
"k8s.io/helm/cmd/helm/helmpath"
"k8s.io/helm/cmd/helm/search"
"k8s.io/helm/pkg/repo" "k8s.io/helm/pkg/repo"
) )
func init() { const searchDesc = `
RootCommand.AddCommand(searchCmd) Search reads through all of the repositories configured on the system, and
looks for matches.
Repositories are managed with 'helm repo' commands.
`
// searchMaxScore suggests that any score higher than this is not considered a match.
const searchMaxScore = 25
type searchCmd struct {
out io.Writer
helmhome helmpath.Home
regexp bool
} }
var searchCmd = &cobra.Command{ func newSearchCmd(out io.Writer) *cobra.Command {
sc := &searchCmd{out: out, helmhome: helmpath.Home(homePath())}
cmd := &cobra.Command{
Use: "search [keyword]", Use: "search [keyword]",
Short: "search for a keyword in charts", Short: "search for a keyword in charts",
Long: "Searches the known repositories cache files for the specified search string, looks at name and keywords", Long: searchDesc,
RunE: search, RunE: func(cmd *cobra.Command, args []string) error {
return sc.run(args)
},
PreRunE: requireInit, PreRunE: requireInit,
}
cmd.Flags().BoolVarP(&sc.regexp, "regexp", "r", false, "use regular expressions for searching")
return cmd
} }
func search(cmd *cobra.Command, args []string) error { func (s *searchCmd) run(args []string) error {
index, err := s.buildIndex()
if err != nil {
return err
}
if len(args) == 0 { if len(args) == 0 {
return errors.New("This command needs at least one argument (search string)") s.showAllCharts(index)
} }
// TODO: This needs to be refactored to use loadChartRepositories q := strings.Join(args, " ")
results, err := searchCacheForPattern(cacheDirectory(), args[0]) res, err := index.Search(q, searchMaxScore, s.regexp)
if err != nil { if err != nil {
return err return nil
}
if len(results) > 0 {
for _, result := range results {
fmt.Println(result)
} }
search.SortScore(res)
for _, r := range res {
fmt.Fprintln(s.out, r.Name)
} }
return nil return nil
} }
func searchChartRefsForPattern(search string, chartRefs map[string]*repo.ChartRef) []string { func (s *searchCmd) showAllCharts(i *search.Index) {
matches := []string{} for name := range i.Entries() {
for k, c := range chartRefs { fmt.Fprintln(s.out, name)
if strings.Contains(c.Name, search) && !c.Removed {
matches = append(matches, k)
continue
}
if c.Chartfile == nil {
continue
}
for _, keyword := range c.Chartfile.Keywords {
if strings.Contains(keyword, search) {
matches = append(matches, k)
} }
}
}
return matches
} }
func searchCacheForPattern(dir string, search string) ([]string, error) { func (s *searchCmd) buildIndex() (*search.Index, error) {
fileList := []string{} // Load the repositories.yaml
filepath.Walk(dir, func(path string, f os.FileInfo, err error) error { rf, err := repo.LoadRepositoriesFile(s.helmhome.RepositoryFile())
if !f.IsDir() {
fileList = append(fileList, path)
}
return nil
})
matches := []string{}
for _, f := range fileList {
index, err := repo.LoadIndexFile(f)
if err != nil { if err != nil {
return matches, fmt.Errorf("index %s corrupted: %s", f, err) return nil, err
} }
m := searchChartRefsForPattern(search, index.Entries) i := search.NewIndex()
repoName := strings.TrimSuffix(filepath.Base(f), "-index.yaml") for n := range rf.Repositories {
for _, c := range m { f := s.helmhome.CacheIndex(n)
// TODO: Is it possible for this file to be missing? Or to have ind, err := repo.LoadIndexFile(f)
// an extension other than .tgz? Should the actual filename be in if err != nil {
// the YAML? fmt.Fprintf(s.out, "WARNING: Repo %q is corrupt. Try 'helm update': %s", f, err)
fname := filepath.Join(repoName, c+".tgz") continue
matches = append(matches, fname)
} }
i.AddRepo(n, ind)
} }
return matches, nil return i, nil
} }

@ -0,0 +1,183 @@
/*
Copyright 2016 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*Package search provides client-side repository searching.
This supports building an in-memory search index based on the contents of
multiple repositories, and then using string matching or regular expressions
to find matches.
*/
package search
import (
"errors"
"path/filepath"
"regexp"
"sort"
"strings"
"k8s.io/helm/pkg/repo"
)
// Result is a search result.
//
// Score indicates how close it is to match. The higher the score, the longer
// the distance.
type Result struct {
Name string
Score int
}
// Index is a searchable index of chart information.
type Index struct {
lines map[string]string
charts map[string]*repo.ChartRef
}
const sep = "\v"
// NewIndex creats a new Index.
func NewIndex() *Index {
return &Index{lines: map[string]string{}, charts: map[string]*repo.ChartRef{}}
}
// AddRepo adds a repository index to the search index.
func (i *Index) AddRepo(rname string, ind *repo.IndexFile) {
for name, ref := range ind.Entries {
fname := filepath.Join(rname, name)
i.lines[fname] = indstr(rname, ref)
i.charts[fname] = ref
}
}
// Entries returns the entries in an index.
func (i *Index) Entries() map[string]*repo.ChartRef {
return i.charts
}
// Search searches an index for the given term.
//
// Threshold indicates the maximum score a term may have before being marked
// irrelevant. (Low score means higher relevance. Golf, not bowling.)
//
// If regexp is true, the term is treated as a regular expression. Otherwise,
// term is treated as a literal string.
func (i *Index) Search(term string, threshold int, regexp bool) ([]*Result, error) {
if regexp == true {
return i.SearchRegexp(term, threshold)
}
return i.SearchLiteral(term, threshold), nil
}
// calcScore calculates a score for a match.
func (i *Index) calcScore(index int, matchline string) int {
// This is currently tied to the fact that sep is a single char.
splits := []int{}
s := rune(sep[0])
for i, ch := range matchline {
if ch == s {
splits = append(splits, i)
}
}
for i, pos := range splits {
if index > pos {
continue
}
return i
}
return len(splits)
}
// SearchLiteral does a literal string search (no regexp).
func (i *Index) SearchLiteral(term string, threshold int) []*Result {
term = strings.ToLower(term)
buf := []*Result{}
for k, v := range i.lines {
res := strings.Index(v, term)
if score := i.calcScore(res, v); res != -1 && score < threshold {
buf = append(buf, &Result{Name: k, Score: score})
}
}
return buf
}
// SearchRegexp searches using a regular expression.
func (i *Index) SearchRegexp(re string, threshold int) ([]*Result, error) {
matcher, err := regexp.Compile(re)
if err != nil {
return []*Result{}, err
}
buf := []*Result{}
for k, v := range i.lines {
ind := matcher.FindStringIndex(v)
if len(ind) == 0 {
continue
}
if score := i.calcScore(ind[0], v); ind[0] >= 0 && score < threshold {
buf = append(buf, &Result{Name: k, Score: score})
}
}
return buf, nil
}
// Chart returns the ChartRef for a particular name.
func (i *Index) Chart(name string) (*repo.ChartRef, error) {
c, ok := i.charts[name]
if !ok {
return nil, errors.New("no such chart")
}
return c, nil
}
// SortScore does an in-place sort of the results.
//
// Lowest scores are highest on the list. Matching scores are subsorted alphabetically.
func SortScore(r []*Result) {
sort.Sort(scoreSorter(r))
}
// scoreSorter sorts results by score, and subsorts by alpha Name.
type scoreSorter []*Result
// Len returns the length of this scoreSorter.
func (s scoreSorter) Len() int { return len(s) }
// Swap performs an in-place swap.
func (s scoreSorter) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
// Less compares a to b, and returns true if a is less than b.
func (s scoreSorter) Less(a, b int) bool {
first := s[a]
second := s[b]
if first.Score > second.Score {
return false
}
if first.Score < second.Score {
return true
}
return first.Name < second.Name
}
func indstr(name string, ref *repo.ChartRef) string {
i := ref.Name + sep + name + "/" + ref.Name + sep
if ref.Chartfile != nil {
i += ref.Chartfile.Description + sep + strings.Join(ref.Chartfile.Keywords, sep)
}
return strings.ToLower(i)
}

@ -0,0 +1,232 @@
/*
Copyright 2016 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package search
import (
"strings"
"testing"
"k8s.io/helm/pkg/proto/hapi/chart"
"k8s.io/helm/pkg/repo"
)
func TestSortScore(t *testing.T) {
in := []*Result{
{Name: "bbb", Score: 0},
{Name: "aaa", Score: 5},
{Name: "abb", Score: 5},
{Name: "aab", Score: 0},
{Name: "bab", Score: 5},
}
expect := []string{"aab", "bbb", "aaa", "abb", "bab"}
expectScore := []int{0, 0, 5, 5, 5}
SortScore(in)
// Test Score
for i := 0; i < len(expectScore); i++ {
if expectScore[i] != in[i].Score {
t.Errorf("Sort error on index %d: expected %d, got %d", i, expectScore[i], in[i].Score)
}
}
// Test Name
for i := 0; i < len(expect); i++ {
if expect[i] != in[i].Name {
t.Errorf("Sort error: expected %s, got %s", expect[i], in[i].Name)
}
}
}
var testCacheDir = "../testdata/"
var indexfileEntries = map[string]*repo.ChartRef{
"niña-0.1.0": {
Name: "niña",
URL: "http://example.com/charts/nina-0.1.0.tgz",
Chartfile: &chart.Metadata{
Name: "niña",
Version: "0.1.0",
Description: "One boat",
},
},
"pinta-0.1.0": {
Name: "pinta",
URL: "http://example.com/charts/pinta-0.1.0.tgz",
Chartfile: &chart.Metadata{
Name: "pinta",
Version: "0.1.0",
Description: "Two ship",
},
},
"santa-maria-1.2.3": {
Name: "santa-maria",
URL: "http://example.com/charts/santa-maria-1.2.3.tgz",
Chartfile: &chart.Metadata{
Name: "santa-maria",
Version: "1.2.3",
Description: "Three boat",
},
},
}
func loadTestIndex(t *testing.T) *Index {
i := NewIndex()
i.AddRepo("testing", &repo.IndexFile{Entries: indexfileEntries})
i.AddRepo("ztesting", &repo.IndexFile{Entries: map[string]*repo.ChartRef{
"pinta-2.0.0": {
Name: "pinta",
URL: "http://example.com/charts/pinta-2.0.0.tgz",
Chartfile: &chart.Metadata{
Name: "pinta",
Version: "2.0.0",
Description: "Two ship, version two",
},
},
}})
return i
}
func TestSearchByName(t *testing.T) {
tests := []struct {
name string
query string
expect []*Result
regexp bool
fail bool
failMsg string
}{
{
name: "basic search for one result",
query: "santa-maria",
expect: []*Result{
{Name: "testing/santa-maria-1.2.3"},
},
},
{
name: "basic search for two results",
query: "pinta",
expect: []*Result{
{Name: "testing/pinta-0.1.0"},
{Name: "ztesting/pinta-2.0.0"},
},
},
{
name: "repo-specific search for one result",
query: "ztesting/pinta",
expect: []*Result{
{Name: "ztesting/pinta-2.0.0"},
},
},
{
name: "partial name search",
query: "santa",
expect: []*Result{
{Name: "testing/santa-maria-1.2.3"},
},
},
{
name: "description search, one result",
query: "Three",
expect: []*Result{
{Name: "testing/santa-maria-1.2.3"},
},
},
{
name: "description search, two results",
query: "two",
expect: []*Result{
{Name: "testing/pinta-0.1.0"},
{Name: "ztesting/pinta-2.0.0"},
},
},
{
name: "nothing found",
query: "mayflower",
expect: []*Result{},
},
{
name: "regexp, one result",
query: "th[ref]*",
expect: []*Result{
{Name: "testing/santa-maria-1.2.3"},
},
regexp: true,
},
{
name: "regexp, fail compile",
query: "th[",
expect: []*Result{},
regexp: true,
fail: true,
failMsg: "error parsing regexp:",
},
}
i := loadTestIndex(t)
for _, tt := range tests {
charts, err := i.Search(tt.query, 100, tt.regexp)
if err != nil {
if tt.fail {
if !strings.Contains(err.Error(), tt.failMsg) {
t.Fatalf("%s: Unexpected error message: %s", tt.name, err)
}
continue
}
t.Fatalf("%s: %s", tt.name, err)
}
// Give us predictably ordered results.
SortScore(charts)
l := len(charts)
if l != len(tt.expect) {
t.Fatalf("%s: Expected %d result, got %d", tt.name, len(tt.expect), l)
}
// For empty result sets, just keep going.
if l == 0 {
continue
}
for i, got := range charts {
ex := tt.expect[i]
if got.Name != ex.Name {
t.Errorf("%s[%d]: Expected name %q, got %q", tt.name, i, ex.Name, got.Name)
}
}
}
}
func TestCalcScore(t *testing.T) {
i := NewIndex()
fields := []string{"aaa", "bbb", "ccc", "ddd"}
matchline := strings.Join(fields, sep)
if r := i.calcScore(2, matchline); r != 0 {
t.Errorf("Expected 0, got %d", r)
}
if r := i.calcScore(5, matchline); r != 1 {
t.Errorf("Expected 1, got %d", r)
}
if r := i.calcScore(10, matchline); r != 2 {
t.Errorf("Expected 2, got %d", r)
}
if r := i.calcScore(14, matchline); r != 3 {
t.Errorf("Expected 3, got %d", r)
}
}

@ -17,79 +17,68 @@ limitations under the License.
package main package main
import ( import (
"bytes"
"strings"
"testing" "testing"
"k8s.io/helm/pkg/repo"
) )
const testDir = "testdata/testcache" func TestSearchCmd(t *testing.T) {
const testFile = "testdata/testcache/local-index.yaml" tests := []struct {
name string
type searchTestCase struct { args []string
in string flags []string
expectedOut []string expect string
} regexp bool
fail bool
var searchTestCases = []searchTestCase{ }{
{"foo", []string{}}, {
{"alpine", []string{"alpine-1.0.0"}}, name: "search for 'maria', expect one match",
{"sumtin", []string{"alpine-1.0.0"}}, args: []string{"maria"},
{"web", []string{"nginx-0.1.0"}}, expect: "testing/mariadb-0.3.0",
} },
{
var searchCacheTestCases = []searchTestCase{ name: "search for 'alpine', expect two matches",
{"notthere", []string{}}, args: []string{"alpine"},
{"odd", []string{"foobar/oddness-1.2.3.tgz"}}, expect: "testing/alpine-0.1.0\ntesting/alpine-0.2.0",
{"sumtin", []string{"local/alpine-1.0.0.tgz", "foobar/oddness-1.2.3.tgz"}}, },
{"foobar", []string{"foobar/foobar-0.1.0.tgz"}}, {
{"web", []string{"local/nginx-0.1.0.tgz"}}, name: "search for 'syzygy', expect no matches",
} args: []string{"syzygy"},
expect: "",
func validateEntries(t *testing.T, in string, found []string, expected []string) { },
if len(found) != len(expected) { {
t.Errorf("Failed to search %s: Expected: %#v got: %#v", in, expected, found) name: "search for 'alp[a-z]+', expect two matches",
} args: []string{"alp[a-z]+"},
foundCount := 0 flags: []string{"--regexp"},
for _, exp := range expected { expect: "testing/alpine-0.1.0\ntesting/alpine-0.2.0",
for _, f := range found { regexp: true,
if exp == f { },
foundCount = foundCount + 1 {
continue name: "search for 'alp[', expect failure to compile regexp",
} args: []string{"alp["},
} flags: []string{"--regexp"},
} regexp: true,
if foundCount != len(expected) { fail: true,
t.Errorf("Failed to find expected items for %s: Expected: %#v got: %#v", in, expected, found) },
} }
} oldhome := helmHome
helmHome = "testdata/helmhome"
defer func() { helmHome = oldhome }()
func searchTestRunner(t *testing.T, tc searchTestCase) { for _, tt := range tests {
cf, err := repo.LoadIndexFile(testFile) buf := bytes.NewBuffer(nil)
if err != nil { cmd := newSearchCmd(buf)
t.Errorf("Failed to load index file : %s : %s", testFile, err) cmd.ParseFlags(tt.flags)
if err := cmd.RunE(cmd, tt.args); err != nil {
if tt.fail {
continue
} }
t.Fatalf("%s: unexpected error %s", tt.name, err)
u := searchChartRefsForPattern(tc.in, cf.Entries)
validateEntries(t, tc.in, u, tc.expectedOut)
}
func searchCacheTestRunner(t *testing.T, tc searchTestCase) {
u, err := searchCacheForPattern(testDir, tc.in)
if err != nil {
t.Errorf("searchCacheForPattern failed: %#v", err)
} }
validateEntries(t, tc.in, u, tc.expectedOut) got := strings.TrimSpace(buf.String())
} if got != tt.expect {
t.Errorf("%s: expected %q, got %q", tt.name, tt.expect, got)
func TestSearches(t *testing.T) {
for _, tc := range searchTestCases {
searchTestRunner(t, tc)
} }
}
func TestCacheSearches(t *testing.T) {
for _, tc := range searchCacheTestCases {
searchCacheTestRunner(t, tc)
} }
} }

@ -0,0 +1,54 @@
alpine-0.1.0:
name: alpine
url: http://storage.googleapis.com/kubernetes-charts/alpine-0.1.0.tgz
created: 2016-09-06 21:58:44.211261566 +0000 UTC
checksum: 0e6661f193211d7a5206918d42f5c2a9470b737d
chartfile:
name: alpine
home: https://k8s.io/helm
sources:
- https://github.com/kubernetes/helm
version: 0.1.0
description: Deploy a basic Alpine Linux pod
keywords: []
maintainers: []
engine: ""
icon: ""
alpine-0.2.0:
name: alpine
url: http://storage.googleapis.com/kubernetes-charts/alpine-0.2.0.tgz
created: 2016-09-06 21:58:44.211261566 +0000 UTC
checksum: 0e6661f193211d7a5206918d42f5c2a9470b737d
chartfile:
name: alpine
home: https://k8s.io/helm
sources:
- https://github.com/kubernetes/helm
version: 0.2.0
description: Deploy a basic Alpine Linux pod
keywords: []
maintainers: []
engine: ""
icon: ""
mariadb-0.3.0:
name: mariadb
url: http://storage.googleapis.com/kubernetes-charts/mariadb-0.3.0.tgz
created: 2016-09-06 21:58:44.211870222 +0000 UTC
checksum: 65229f6de44a2be9f215d11dbff311673fc8ba56
chartfile:
name: mariadb
home: https://mariadb.org
sources:
- https://github.com/bitnami/bitnami-docker-mariadb
version: 0.3.0
description: Chart for MariaDB
keywords:
- mariadb
- mysql
- database
- sql
maintainers:
- name: Bitnami
email: containers@bitnami.com
engine: gotpl
icon: ""

@ -0,0 +1 @@
testing: http://example.com/charts
Loading…
Cancel
Save