feat(helm): update helm search

Switch 'helm search' from file crawling to using the indices. Also
add scorable indexing, forward porting the search code I originally
wrote for Helm Classic.

Closes #1226
Partially addresses #1199
pull/1229/head
Matt Butcher 8 years ago
parent c0d33afc81
commit 446d555178

@ -101,6 +101,7 @@ func newRootCmd(out io.Writer) *cobra.Command {
newVersionCmd(nil, out),
newRepoCmd(out),
newDependencyCmd(out),
newSearchCmd(out),
)
return cmd
}

@ -17,90 +17,99 @@ limitations under the License.
package main
import (
"errors"
"fmt"
"os"
"path/filepath"
"io"
"strings"
"github.com/spf13/cobra"
"k8s.io/helm/cmd/helm/helmpath"
"k8s.io/helm/cmd/helm/search"
"k8s.io/helm/pkg/repo"
)
func init() {
RootCommand.AddCommand(searchCmd)
const searchDesc = `
Search reads through all of the repositories configured on the system, and
looks for matches.
Repositories are managed with 'helm repo' commands.
`
// searchMaxScore suggests that any score higher than this is not considered a match.
const searchMaxScore = 25
type searchCmd struct {
out io.Writer
helmhome helmpath.Home
regexp bool
}
var searchCmd = &cobra.Command{
Use: "search [keyword]",
Short: "search for a keyword in charts",
Long: "Searches the known repositories cache files for the specified search string, looks at name and keywords",
RunE: search,
PreRunE: requireInit,
func newSearchCmd(out io.Writer) *cobra.Command {
sc := &searchCmd{out: out, helmhome: helmpath.Home(homePath())}
cmd := &cobra.Command{
Use: "search [keyword]",
Short: "search for a keyword in charts",
Long: searchDesc,
RunE: func(cmd *cobra.Command, args []string) error {
return sc.run(args)
},
PreRunE: requireInit,
}
cmd.Flags().BoolVarP(&sc.regexp, "regexp", "r", false, "use regular expressions for searching")
return cmd
}
func search(cmd *cobra.Command, args []string) error {
func (s *searchCmd) run(args []string) error {
index, err := s.buildIndex()
if err != nil {
return err
}
if len(args) == 0 {
return errors.New("This command needs at least one argument (search string)")
s.showAllCharts(index)
}
// TODO: This needs to be refactored to use loadChartRepositories
results, err := searchCacheForPattern(cacheDirectory(), args[0])
q := strings.Join(args, " ")
res, err := index.Search(q, searchMaxScore, s.regexp)
if err != nil {
return err
return nil
}
if len(results) > 0 {
for _, result := range results {
fmt.Println(result)
}
search.SortScore(res)
for _, r := range res {
fmt.Fprintln(s.out, r.Name)
}
return nil
}
func searchChartRefsForPattern(search string, chartRefs map[string]*repo.ChartRef) []string {
matches := []string{}
for k, c := range chartRefs {
if strings.Contains(c.Name, search) && !c.Removed {
matches = append(matches, k)
continue
}
if c.Chartfile == nil {
continue
}
for _, keyword := range c.Chartfile.Keywords {
if strings.Contains(keyword, search) {
matches = append(matches, k)
}
}
func (s *searchCmd) showAllCharts(i *search.Index) {
for name := range i.Entries() {
fmt.Fprintln(s.out, name)
}
return matches
}
func searchCacheForPattern(dir string, search string) ([]string, error) {
fileList := []string{}
filepath.Walk(dir, func(path string, f os.FileInfo, err error) error {
if !f.IsDir() {
fileList = append(fileList, path)
}
return nil
})
matches := []string{}
for _, f := range fileList {
index, err := repo.LoadIndexFile(f)
func (s *searchCmd) buildIndex() (*search.Index, error) {
// Load the repositories.yaml
rf, err := repo.LoadRepositoriesFile(s.helmhome.RepositoryFile())
if err != nil {
return nil, err
}
i := search.NewIndex()
for n := range rf.Repositories {
f := s.helmhome.CacheIndex(n)
ind, err := repo.LoadIndexFile(f)
if err != nil {
return matches, fmt.Errorf("index %s corrupted: %s", f, err)
fmt.Fprintf(s.out, "WARNING: Repo %q is corrupt. Try 'helm update': %s", f, err)
continue
}
m := searchChartRefsForPattern(search, index.Entries)
repoName := strings.TrimSuffix(filepath.Base(f), "-index.yaml")
for _, c := range m {
// TODO: Is it possible for this file to be missing? Or to have
// an extension other than .tgz? Should the actual filename be in
// the YAML?
fname := filepath.Join(repoName, c+".tgz")
matches = append(matches, fname)
}
i.AddRepo(n, ind)
}
return matches, nil
return i, nil
}

@ -0,0 +1,183 @@
/*
Copyright 2016 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/*Package search provides client-side repository searching.
This supports building an in-memory search index based on the contents of
multiple repositories, and then using string matching or regular expressions
to find matches.
*/
package search
import (
"errors"
"path/filepath"
"regexp"
"sort"
"strings"
"k8s.io/helm/pkg/repo"
)
// Result is a search result.
//
// Score indicates how close it is to match. The higher the score, the longer
// the distance.
type Result struct {
Name string
Score int
}
// Index is a searchable index of chart information.
type Index struct {
lines map[string]string
charts map[string]*repo.ChartRef
}
const sep = "\v"
// NewIndex creats a new Index.
func NewIndex() *Index {
return &Index{lines: map[string]string{}, charts: map[string]*repo.ChartRef{}}
}
// AddRepo adds a repository index to the search index.
func (i *Index) AddRepo(rname string, ind *repo.IndexFile) {
for name, ref := range ind.Entries {
fname := filepath.Join(rname, name)
i.lines[fname] = indstr(rname, ref)
i.charts[fname] = ref
}
}
// Entries returns the entries in an index.
func (i *Index) Entries() map[string]*repo.ChartRef {
return i.charts
}
// Search searches an index for the given term.
//
// Threshold indicates the maximum score a term may have before being marked
// irrelevant. (Low score means higher relevance. Golf, not bowling.)
//
// If regexp is true, the term is treated as a regular expression. Otherwise,
// term is treated as a literal string.
func (i *Index) Search(term string, threshold int, regexp bool) ([]*Result, error) {
if regexp == true {
return i.SearchRegexp(term, threshold)
}
return i.SearchLiteral(term, threshold), nil
}
// calcScore calculates a score for a match.
func (i *Index) calcScore(index int, matchline string) int {
// This is currently tied to the fact that sep is a single char.
splits := []int{}
s := rune(sep[0])
for i, ch := range matchline {
if ch == s {
splits = append(splits, i)
}
}
for i, pos := range splits {
if index > pos {
continue
}
return i
}
return len(splits)
}
// SearchLiteral does a literal string search (no regexp).
func (i *Index) SearchLiteral(term string, threshold int) []*Result {
term = strings.ToLower(term)
buf := []*Result{}
for k, v := range i.lines {
res := strings.Index(v, term)
if score := i.calcScore(res, v); res != -1 && score < threshold {
buf = append(buf, &Result{Name: k, Score: score})
}
}
return buf
}
// SearchRegexp searches using a regular expression.
func (i *Index) SearchRegexp(re string, threshold int) ([]*Result, error) {
matcher, err := regexp.Compile(re)
if err != nil {
return []*Result{}, err
}
buf := []*Result{}
for k, v := range i.lines {
ind := matcher.FindStringIndex(v)
if len(ind) == 0 {
continue
}
if score := i.calcScore(ind[0], v); ind[0] >= 0 && score < threshold {
buf = append(buf, &Result{Name: k, Score: score})
}
}
return buf, nil
}
// Chart returns the ChartRef for a particular name.
func (i *Index) Chart(name string) (*repo.ChartRef, error) {
c, ok := i.charts[name]
if !ok {
return nil, errors.New("no such chart")
}
return c, nil
}
// SortScore does an in-place sort of the results.
//
// Lowest scores are highest on the list. Matching scores are subsorted alphabetically.
func SortScore(r []*Result) {
sort.Sort(scoreSorter(r))
}
// scoreSorter sorts results by score, and subsorts by alpha Name.
type scoreSorter []*Result
// Len returns the length of this scoreSorter.
func (s scoreSorter) Len() int { return len(s) }
// Swap performs an in-place swap.
func (s scoreSorter) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
// Less compares a to b, and returns true if a is less than b.
func (s scoreSorter) Less(a, b int) bool {
first := s[a]
second := s[b]
if first.Score > second.Score {
return false
}
if first.Score < second.Score {
return true
}
return first.Name < second.Name
}
func indstr(name string, ref *repo.ChartRef) string {
i := ref.Name + sep + name + "/" + ref.Name + sep
if ref.Chartfile != nil {
i += ref.Chartfile.Description + sep + strings.Join(ref.Chartfile.Keywords, sep)
}
return strings.ToLower(i)
}

@ -0,0 +1,232 @@
/*
Copyright 2016 The Kubernetes Authors All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package search
import (
"strings"
"testing"
"k8s.io/helm/pkg/proto/hapi/chart"
"k8s.io/helm/pkg/repo"
)
func TestSortScore(t *testing.T) {
in := []*Result{
{Name: "bbb", Score: 0},
{Name: "aaa", Score: 5},
{Name: "abb", Score: 5},
{Name: "aab", Score: 0},
{Name: "bab", Score: 5},
}
expect := []string{"aab", "bbb", "aaa", "abb", "bab"}
expectScore := []int{0, 0, 5, 5, 5}
SortScore(in)
// Test Score
for i := 0; i < len(expectScore); i++ {
if expectScore[i] != in[i].Score {
t.Errorf("Sort error on index %d: expected %d, got %d", i, expectScore[i], in[i].Score)
}
}
// Test Name
for i := 0; i < len(expect); i++ {
if expect[i] != in[i].Name {
t.Errorf("Sort error: expected %s, got %s", expect[i], in[i].Name)
}
}
}
var testCacheDir = "../testdata/"
var indexfileEntries = map[string]*repo.ChartRef{
"niña-0.1.0": {
Name: "niña",
URL: "http://example.com/charts/nina-0.1.0.tgz",
Chartfile: &chart.Metadata{
Name: "niña",
Version: "0.1.0",
Description: "One boat",
},
},
"pinta-0.1.0": {
Name: "pinta",
URL: "http://example.com/charts/pinta-0.1.0.tgz",
Chartfile: &chart.Metadata{
Name: "pinta",
Version: "0.1.0",
Description: "Two ship",
},
},
"santa-maria-1.2.3": {
Name: "santa-maria",
URL: "http://example.com/charts/santa-maria-1.2.3.tgz",
Chartfile: &chart.Metadata{
Name: "santa-maria",
Version: "1.2.3",
Description: "Three boat",
},
},
}
func loadTestIndex(t *testing.T) *Index {
i := NewIndex()
i.AddRepo("testing", &repo.IndexFile{Entries: indexfileEntries})
i.AddRepo("ztesting", &repo.IndexFile{Entries: map[string]*repo.ChartRef{
"pinta-2.0.0": {
Name: "pinta",
URL: "http://example.com/charts/pinta-2.0.0.tgz",
Chartfile: &chart.Metadata{
Name: "pinta",
Version: "2.0.0",
Description: "Two ship, version two",
},
},
}})
return i
}
func TestSearchByName(t *testing.T) {
tests := []struct {
name string
query string
expect []*Result
regexp bool
fail bool
failMsg string
}{
{
name: "basic search for one result",
query: "santa-maria",
expect: []*Result{
{Name: "testing/santa-maria-1.2.3"},
},
},
{
name: "basic search for two results",
query: "pinta",
expect: []*Result{
{Name: "testing/pinta-0.1.0"},
{Name: "ztesting/pinta-2.0.0"},
},
},
{
name: "repo-specific search for one result",
query: "ztesting/pinta",
expect: []*Result{
{Name: "ztesting/pinta-2.0.0"},
},
},
{
name: "partial name search",
query: "santa",
expect: []*Result{
{Name: "testing/santa-maria-1.2.3"},
},
},
{
name: "description search, one result",
query: "Three",
expect: []*Result{
{Name: "testing/santa-maria-1.2.3"},
},
},
{
name: "description search, two results",
query: "two",
expect: []*Result{
{Name: "testing/pinta-0.1.0"},
{Name: "ztesting/pinta-2.0.0"},
},
},
{
name: "nothing found",
query: "mayflower",
expect: []*Result{},
},
{
name: "regexp, one result",
query: "th[ref]*",
expect: []*Result{
{Name: "testing/santa-maria-1.2.3"},
},
regexp: true,
},
{
name: "regexp, fail compile",
query: "th[",
expect: []*Result{},
regexp: true,
fail: true,
failMsg: "error parsing regexp:",
},
}
i := loadTestIndex(t)
for _, tt := range tests {
charts, err := i.Search(tt.query, 100, tt.regexp)
if err != nil {
if tt.fail {
if !strings.Contains(err.Error(), tt.failMsg) {
t.Fatalf("%s: Unexpected error message: %s", tt.name, err)
}
continue
}
t.Fatalf("%s: %s", tt.name, err)
}
// Give us predictably ordered results.
SortScore(charts)
l := len(charts)
if l != len(tt.expect) {
t.Fatalf("%s: Expected %d result, got %d", tt.name, len(tt.expect), l)
}
// For empty result sets, just keep going.
if l == 0 {
continue
}
for i, got := range charts {
ex := tt.expect[i]
if got.Name != ex.Name {
t.Errorf("%s[%d]: Expected name %q, got %q", tt.name, i, ex.Name, got.Name)
}
}
}
}
func TestCalcScore(t *testing.T) {
i := NewIndex()
fields := []string{"aaa", "bbb", "ccc", "ddd"}
matchline := strings.Join(fields, sep)
if r := i.calcScore(2, matchline); r != 0 {
t.Errorf("Expected 0, got %d", r)
}
if r := i.calcScore(5, matchline); r != 1 {
t.Errorf("Expected 1, got %d", r)
}
if r := i.calcScore(10, matchline); r != 2 {
t.Errorf("Expected 2, got %d", r)
}
if r := i.calcScore(14, matchline); r != 3 {
t.Errorf("Expected 3, got %d", r)
}
}

@ -17,79 +17,68 @@ limitations under the License.
package main
import (
"bytes"
"strings"
"testing"
"k8s.io/helm/pkg/repo"
)
const testDir = "testdata/testcache"
const testFile = "testdata/testcache/local-index.yaml"
type searchTestCase struct {
in string
expectedOut []string
}
var searchTestCases = []searchTestCase{
{"foo", []string{}},
{"alpine", []string{"alpine-1.0.0"}},
{"sumtin", []string{"alpine-1.0.0"}},
{"web", []string{"nginx-0.1.0"}},
}
func TestSearchCmd(t *testing.T) {
tests := []struct {
name string
args []string
flags []string
expect string
regexp bool
fail bool
}{
{
name: "search for 'maria', expect one match",
args: []string{"maria"},
expect: "testing/mariadb-0.3.0",
},
{
name: "search for 'alpine', expect two matches",
args: []string{"alpine"},
expect: "testing/alpine-0.1.0\ntesting/alpine-0.2.0",
},
{
name: "search for 'syzygy', expect no matches",
args: []string{"syzygy"},
expect: "",
},
{
name: "search for 'alp[a-z]+', expect two matches",
args: []string{"alp[a-z]+"},
flags: []string{"--regexp"},
expect: "testing/alpine-0.1.0\ntesting/alpine-0.2.0",
regexp: true,
},
{
name: "search for 'alp[', expect failure to compile regexp",
args: []string{"alp["},
flags: []string{"--regexp"},
regexp: true,
fail: true,
},
}
var searchCacheTestCases = []searchTestCase{
{"notthere", []string{}},
{"odd", []string{"foobar/oddness-1.2.3.tgz"}},
{"sumtin", []string{"local/alpine-1.0.0.tgz", "foobar/oddness-1.2.3.tgz"}},
{"foobar", []string{"foobar/foobar-0.1.0.tgz"}},
{"web", []string{"local/nginx-0.1.0.tgz"}},
}
oldhome := helmHome
helmHome = "testdata/helmhome"
defer func() { helmHome = oldhome }()
func validateEntries(t *testing.T, in string, found []string, expected []string) {
if len(found) != len(expected) {
t.Errorf("Failed to search %s: Expected: %#v got: %#v", in, expected, found)
}
foundCount := 0
for _, exp := range expected {
for _, f := range found {
if exp == f {
foundCount = foundCount + 1
for _, tt := range tests {
buf := bytes.NewBuffer(nil)
cmd := newSearchCmd(buf)
cmd.ParseFlags(tt.flags)
if err := cmd.RunE(cmd, tt.args); err != nil {
if tt.fail {
continue
}
t.Fatalf("%s: unexpected error %s", tt.name, err)
}
got := strings.TrimSpace(buf.String())
if got != tt.expect {
t.Errorf("%s: expected %q, got %q", tt.name, tt.expect, got)
}
}
if foundCount != len(expected) {
t.Errorf("Failed to find expected items for %s: Expected: %#v got: %#v", in, expected, found)
}
}
func searchTestRunner(t *testing.T, tc searchTestCase) {
cf, err := repo.LoadIndexFile(testFile)
if err != nil {
t.Errorf("Failed to load index file : %s : %s", testFile, err)
}
u := searchChartRefsForPattern(tc.in, cf.Entries)
validateEntries(t, tc.in, u, tc.expectedOut)
}
func searchCacheTestRunner(t *testing.T, tc searchTestCase) {
u, err := searchCacheForPattern(testDir, tc.in)
if err != nil {
t.Errorf("searchCacheForPattern failed: %#v", err)
}
validateEntries(t, tc.in, u, tc.expectedOut)
}
func TestSearches(t *testing.T) {
for _, tc := range searchTestCases {
searchTestRunner(t, tc)
}
}
func TestCacheSearches(t *testing.T) {
for _, tc := range searchCacheTestCases {
searchCacheTestRunner(t, tc)
}
}

@ -0,0 +1,54 @@
alpine-0.1.0:
name: alpine
url: http://storage.googleapis.com/kubernetes-charts/alpine-0.1.0.tgz
created: 2016-09-06 21:58:44.211261566 +0000 UTC
checksum: 0e6661f193211d7a5206918d42f5c2a9470b737d
chartfile:
name: alpine
home: https://k8s.io/helm
sources:
- https://github.com/kubernetes/helm
version: 0.1.0
description: Deploy a basic Alpine Linux pod
keywords: []
maintainers: []
engine: ""
icon: ""
alpine-0.2.0:
name: alpine
url: http://storage.googleapis.com/kubernetes-charts/alpine-0.2.0.tgz
created: 2016-09-06 21:58:44.211261566 +0000 UTC
checksum: 0e6661f193211d7a5206918d42f5c2a9470b737d
chartfile:
name: alpine
home: https://k8s.io/helm
sources:
- https://github.com/kubernetes/helm
version: 0.2.0
description: Deploy a basic Alpine Linux pod
keywords: []
maintainers: []
engine: ""
icon: ""
mariadb-0.3.0:
name: mariadb
url: http://storage.googleapis.com/kubernetes-charts/mariadb-0.3.0.tgz
created: 2016-09-06 21:58:44.211870222 +0000 UTC
checksum: 65229f6de44a2be9f215d11dbff311673fc8ba56
chartfile:
name: mariadb
home: https://mariadb.org
sources:
- https://github.com/bitnami/bitnami-docker-mariadb
version: 0.3.0
description: Chart for MariaDB
keywords:
- mariadb
- mysql
- database
- sql
maintainers:
- name: Bitnami
email: containers@bitnami.com
engine: gotpl
icon: ""

@ -0,0 +1 @@
testing: http://example.com/charts
Loading…
Cancel
Save