Skip to content

Commit

Permalink
Prefer zoekt for symbol search (sourcegraph#5192)
Browse files Browse the repository at this point in the history
* symbol search

* use zoekt master

* gofmt

* move symbolCount back to original place

* remove unrelated changes to ts code

* Use same addMatches impl as textsearch

* fix symbol display results

* fix symbol search suggestions

* fix tests

* fix lint

* zoekt: Use universal-ctags-dev in dev mode

* Update CHANGELOG.md

* go mod tidy

* go mod tidy
  • Loading branch information
kzh authored Aug 14, 2019
1 parent 6823f97 commit 19dadc4
Show file tree
Hide file tree
Showing 8 changed files with 174 additions and 82 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ All notable changes to Sourcegraph are documented in this file.
### Added

- Multi-line search now works for non-indexed search.
- Indexed search now supports symbol queries.

### Changed

Expand Down
2 changes: 1 addition & 1 deletion cmd/frontend/graphqlbackend/search_suggestions.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ func (r *searchResolver) Suggestions(ctx context.Context, args *searchSuggestion
ctx, cancel := context.WithTimeout(ctx, 1*time.Second)
defer cancel()

fileMatches, _, err := searchSymbols(ctx, &search.Args{Pattern: p, Repos: repoRevs, Query: r.query}, 7)
fileMatches, _, err := searchSymbols(ctx, &search.Args{Pattern: p, Repos: repoRevs, Query: r.query, Zoekt: r.zoekt}, 7)
if err != nil {
return nil, err
}
Expand Down
119 changes: 112 additions & 7 deletions cmd/frontend/graphqlbackend/search_symbols.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ import (
"context"
"fmt"
"net/url"
"sort"
"strings"
"sync"
"time"

"github.com/neelance/parallel"
"github.com/opentracing/opentracing-go"
Expand All @@ -17,6 +19,7 @@ import (
"github.com/sourcegraph/sourcegraph/cmd/frontend/internal/goroutine"
"github.com/sourcegraph/sourcegraph/cmd/frontend/internal/pkg/search"
"github.com/sourcegraph/sourcegraph/cmd/frontend/internal/pkg/search/query"
"github.com/sourcegraph/sourcegraph/cmd/frontend/types"
"github.com/sourcegraph/sourcegraph/pkg/api"
"github.com/sourcegraph/sourcegraph/pkg/conf"
"github.com/sourcegraph/sourcegraph/pkg/errcode"
Expand Down Expand Up @@ -63,12 +66,116 @@ func searchSymbols(ctx context.Context, args *search.Args, limit int) (res []*fi
ctx, cancelAll := context.WithCancel(ctx)
defer cancelAll()

common = &searchResultsCommon{}
common = &searchResultsCommon{partial: make(map[api.RepoName]struct{})}
var (
searcherRepos = args.Repos
zoektRepos []*search.RepositoryRevisions
)

if args.Zoekt.Enabled() {
zoektRepos, searcherRepos, err = zoektIndexedRepos(ctx, args.Zoekt, args.Repos)
if err != nil {
// Don't hard fail if index is not available yet.
tr.LogFields(otlog.String("indexErr", err.Error()))
if ctx.Err() == nil {
log15.Warn("zoektIndexedRepos failed", "error", err)
}
common.indexUnavailable = true
err = nil
}
}

common.repos = make([]*types.Repo, len(args.Repos))
for i, repo := range args.Repos {
common.repos[i] = repo.Repo
}

index, _ := args.Query.StringValues(query.FieldIndex)
if len(index) > 0 {
index := index[len(index)-1]
switch parseYesNoOnly(index) {
case Yes, True:
// default
if args.Zoekt.Enabled() {
tr.LazyPrintf("%d indexed repos, %d unindexed repos", len(zoektRepos), len(searcherRepos))
}
case Only:
if !args.Zoekt.Enabled() {
return nil, common, fmt.Errorf("invalid index:%q (indexed search is not enabled)", index)
}
common.missing = make([]*types.Repo, len(searcherRepos))
for i, r := range searcherRepos {
common.missing[i] = r.Repo
}
tr.LazyPrintf("index:only, ignoring %d unindexed repos", len(searcherRepos))
searcherRepos = nil
case No, False:
tr.LazyPrintf("index:no, bypassing zoekt (using searcher) for %d indexed repos", len(zoektRepos))
searcherRepos = append(searcherRepos, zoektRepos...)
zoektRepos = nil
default:
return nil, common, fmt.Errorf("invalid index:%q (valid values are: yes, only, no)", index)
}
}

var (
run = parallel.NewRun(conf.SearchSymbolsParallelism())
mu sync.Mutex

unflattened [][]*fileMatchResolver
flattenedSize int
overLimitCanceled bool
)
for _, repoRevs := range args.Repos {

addMatches := func(matches []*fileMatchResolver) {
if len(matches) > 0 {
common.resultCount += int32(len(matches))
sort.Slice(matches, func(i, j int) bool {
a, b := matches[i].uri, matches[j].uri
return a > b
})
unflattened = append(unflattened, matches)
flattenedSize += len(matches)

if flattenedSize > int(args.Pattern.FileMatchLimit) {
tr.LazyPrintf("cancel due to result size: %d > %d", flattenedSize, args.Pattern.FileMatchLimit)
overLimitCanceled = true
common.limitHit = true
cancelAll()
}
}
}

run.Acquire()
goroutine.Go(func() {
defer run.Release()
query := args.Pattern
k := zoektResultCountFactor(len(zoektRepos), query)
opts := zoektSearchOpts(k, query)
matches, limitHit, reposLimitHit, searchErr := zoektSearchHEAD(ctx, query, zoektRepos, args.UseFullDeadline, args.Zoekt.Client, opts, true, time.Since)
mu.Lock()
defer mu.Unlock()
if ctx.Err() == nil {
for _, repo := range zoektRepos {
common.searched = append(common.searched, repo.Repo)
common.indexed = append(common.indexed, repo.Repo)
}
for repo := range reposLimitHit {
common.partial[api.RepoName(repo)] = struct{}{}
}
}
if limitHit {
common.limitHit = true
}
tr.LogFields(otlog.Object("searchErr", searchErr), otlog.Error(err), otlog.Bool("overLimitCanceled", overLimitCanceled))
if searchErr != nil && err == nil && !overLimitCanceled {
err = searchErr
tr.LazyPrintf("cancel indexed symbol search due to error: %v", err)
}
addMatches(matches)
})

for _, repoRevs := range searcherRepos {
repoRevs := repoRevs
if ctx.Err() != nil {
break
Expand Down Expand Up @@ -96,15 +203,13 @@ func searchSymbols(ctx context.Context, args *search.Args, limit int) (res []*fi
common.searched = append(common.searched, repoRevs.Repo)
}
if repoSymbols != nil {
res = append(res, repoSymbols...)
if limitHit {
cancelAll()
}
addMatches(repoSymbols)
}
})
}
err = run.Wait()
res2 := limitSymbolResults(res, limit)
flattened := flattenFileMatches(unflattened, int(args.Pattern.FileMatchLimit))
res2 := limitSymbolResults(flattened, limit)
common.limitHit = symbolCount(res2) < symbolCount(res)
return res2, common, err
}
Expand Down
67 changes: 53 additions & 14 deletions cmd/frontend/graphqlbackend/textsearch.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@ import (
"github.com/sourcegraph/sourcegraph/pkg/api"
"github.com/sourcegraph/sourcegraph/pkg/errcode"
"github.com/sourcegraph/sourcegraph/pkg/gitserver"
"github.com/sourcegraph/sourcegraph/pkg/gituri"
"github.com/sourcegraph/sourcegraph/pkg/mutablelimiter"
searchbackend "github.com/sourcegraph/sourcegraph/pkg/search/backend"
"github.com/sourcegraph/sourcegraph/pkg/symbols/protocol"
"github.com/sourcegraph/sourcegraph/pkg/trace"
"github.com/sourcegraph/sourcegraph/pkg/vcs/git"
"gopkg.in/inconshreveable/log15.v2"
Expand Down Expand Up @@ -492,7 +494,7 @@ func zoektSearchOpts(k int, query *search.PatternInfo) zoekt.SearchOptions {
return searchOpts
}

func zoektSearchHEAD(ctx context.Context, query *search.PatternInfo, repos []*search.RepositoryRevisions, useFullDeadline bool, searcher zoekt.Searcher, searchOpts zoekt.SearchOptions, since func(t time.Time) time.Duration) (fm []*fileMatchResolver, limitHit bool, reposLimitHit map[string]struct{}, err error) {
func zoektSearchHEAD(ctx context.Context, query *search.PatternInfo, repos []*search.RepositoryRevisions, useFullDeadline bool, searcher zoekt.Searcher, searchOpts zoekt.SearchOptions, isSymbol bool, since func(t time.Time) time.Duration) (fm []*fileMatchResolver, limitHit bool, reposLimitHit map[string]struct{}, err error) {
if len(repos) == 0 {
return nil, false, nil, nil
}
Expand All @@ -505,7 +507,7 @@ func zoektSearchHEAD(ctx context.Context, query *search.PatternInfo, repos []*se
repoMap[api.RepoName(strings.ToLower(string(repoRev.Repo.Name)))] = repoRev
}

queryExceptRepos, err := queryToZoektQuery(query)
queryExceptRepos, err := queryToZoektQuery(query, isSymbol)
if err != nil {
return nil, false, nil, err
}
Expand Down Expand Up @@ -603,6 +605,7 @@ func zoektSearchHEAD(ctx context.Context, query *search.PatternInfo, repos []*se

limitHit = true
}

matches := make([]*fileMatchResolver, len(resp.Files))
for i, file := range resp.Files {
fileLimitHit := false
Expand All @@ -611,7 +614,11 @@ func zoektSearchHEAD(ctx context.Context, query *search.PatternInfo, repos []*se
fileLimitHit = true
limitHit = true
}
repoRev := repoMap[api.RepoName(strings.ToLower(string(file.Repository)))]
inputRev := repoRev.RevSpecs()[0]
baseURI := &gituri.URI{URL: url.URL{Scheme: "git://", Host: string(repoRev.Repo.Name), RawQuery: "?" + url.QueryEscape(inputRev)}}
lines := make([]*lineMatch, 0, len(file.LineMatches))
symbols := []*searchSymbolResult{}
for _, l := range file.LineMatches {
if !l.FileName {
if len(l.LineFragments) > maxLineFragmentMatches {
Expand All @@ -622,20 +629,43 @@ func zoektSearchHEAD(ctx context.Context, query *search.PatternInfo, repos []*se
offset := utf8.RuneCount(l.Line[:m.LineOffset])
length := utf8.RuneCount(l.Line[m.LineOffset : m.LineOffset+m.MatchLength])
offsets[k] = [2]int32{int32(offset), int32(length)}
if isSymbol && m.SymbolInfo != nil {
commit := &gitCommitResolver{
repo: &repositoryResolver{repo: repoRev.Repo},
oid: gitObjectID(repoRev.IndexedHEADCommit),
inputRev: &inputRev,
}

symbols = append(symbols, &searchSymbolResult{
symbol: protocol.Symbol{
Name: m.SymbolInfo.Sym,
Kind: m.SymbolInfo.Kind,
Parent: m.SymbolInfo.Parent,
ParentKind: m.SymbolInfo.ParentKind,
Path: file.FileName,
Line: l.LineNumber,
},
lang: strings.ToLower(file.Language),
baseURI: baseURI,
commit: commit,
})
}
}
if !isSymbol {
lines = append(lines, &lineMatch{
JPreview: string(l.Line),
JLineNumber: int32(l.LineNumber - 1),
JOffsetAndLengths: offsets,
})
}
lines = append(lines, &lineMatch{
JPreview: string(l.Line),
JLineNumber: int32(l.LineNumber - 1),
JOffsetAndLengths: offsets,
})
}
}
repoRev := repoMap[api.RepoName(strings.ToLower(string(file.Repository)))]
matches[i] = &fileMatchResolver{
JPath: file.FileName,
JLineMatches: lines,
JLimitHit: fileLimitHit,
uri: fileMatchURI(repoRev.Repo.Name, "", file.FileName),
symbols: symbols,
repo: repoRev.Repo,
commitID: repoRev.IndexedHEADCommit,
}
Expand Down Expand Up @@ -760,25 +790,34 @@ func fileRe(pattern string, queryIsCaseSensitive bool) (zoektquery.Q, error) {
return parseRe(pattern, true, queryIsCaseSensitive)
}

func queryToZoektQuery(query *search.PatternInfo) (zoektquery.Q, error) {
func queryToZoektQuery(query *search.PatternInfo, isSymbol bool) (zoektquery.Q, error) {
var and []zoektquery.Q

var q zoektquery.Q
if query.IsRegExp {
q, err := parseRe(query.Pattern, false, query.IsCaseSensitive)
var err error
q, err = parseRe(query.Pattern, false, query.IsCaseSensitive)
if err != nil {
return nil, err
}
and = append(and, q)
} else {
and = append(and, &zoektquery.Substring{
q = &zoektquery.Substring{
Pattern: query.Pattern,
CaseSensitive: query.IsCaseSensitive,

FileName: true,
Content: true,
})
}
}

if isSymbol {
q = &zoektquery.Symbol{
Expr: q,
}
}

and = append(and, q)

// zoekt also uses regular expressions for file paths
// TODO PathPatternsAreCaseSensitive
// TODO whitespace in file path patterns?
Expand Down Expand Up @@ -987,7 +1026,7 @@ func searchFilesInRepos(ctx context.Context, args *search.Args) (res []*fileMatc
query := args.Pattern
k := zoektResultCountFactor(len(zoektRepos), query)
opts := zoektSearchOpts(k, query)
matches, limitHit, reposLimitHit, searchErr := zoektSearchHEAD(ctx, query, zoektRepos, args.UseFullDeadline, args.Zoekt.Client, opts, time.Since)
matches, limitHit, reposLimitHit, searchErr := zoektSearchHEAD(ctx, query, zoektRepos, args.UseFullDeadline, args.Zoekt.Client, opts, false, time.Since)
mu.Lock()
defer mu.Unlock()
if ctx.Err() == nil {
Expand Down
4 changes: 2 additions & 2 deletions cmd/frontend/graphqlbackend/textsearch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ func TestQueryToZoektQuery(t *testing.T) {
if err != nil {
t.Fatalf("failed to parse %q: %v", tt.Query, err)
}
got, err := queryToZoektQuery(tt.Pattern)
got, err := queryToZoektQuery(tt.Pattern, false)
if err != nil {
t.Fatal("queryToZoektQuery failed:", err)
}
Expand Down Expand Up @@ -483,7 +483,7 @@ func Test_zoektSearchHEAD(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotFm, gotLimitHit, gotReposLimitHit, err := zoektSearchHEAD(tt.args.ctx, tt.args.query, tt.args.repos, tt.args.useFullDeadline, tt.args.searcher, tt.args.opts, tt.args.since)
gotFm, gotLimitHit, gotReposLimitHit, err := zoektSearchHEAD(tt.args.ctx, tt.args.query, tt.args.repos, tt.args.useFullDeadline, tt.args.searcher, tt.args.opts, false, tt.args.since)
if (err != nil) != tt.wantErr {
t.Errorf("zoektSearchHEAD() error = %v, wantErr = %v", err, tt.wantErr)
return
Expand Down
1 change: 1 addition & 0 deletions dev/launch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ export SRC_PROF_HTTP=
export SRC_PROF_SERVICES=$(cat dev/src-prof-services.json)
export OVERRIDE_AUTH_SECRET=sSsNGlI8fBDftBz0LDQNXEnP6lrWdt9g0fK6hoFvGQ
export DEPLOY_TYPE=dev
export CTAGS_COMMAND="${CTAGS_COMMAND:=cmd/symbols/universal-ctags-dev}"
export ZOEKT_HOST=localhost:3070

# webpack-dev-server is a proxy running on port 3080 that (1) serves assets, waiting to respond
Expand Down
3 changes: 1 addition & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ require (
github.com/microcosm-cc/bluemonday v1.0.2
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.1 // indirect
github.com/nbutton23/zxcvbn-go v0.0.0-20180912185939-ae427f1e4c1d // indirect
github.com/neelance/parallel v0.0.0-20160708114440-4de9ce63d14c
github.com/onsi/ginkgo v1.8.0 // indirect
github.com/onsi/gomega v1.5.0 // indirect
Expand Down Expand Up @@ -183,7 +182,7 @@ require (
)

replace (
github.com/google/zoekt => github.com/sourcegraph/zoekt v0.0.0-20190718092054-9cdf2d3e8edb
github.com/google/zoekt => github.com/sourcegraph/zoekt v0.0.0-20190814145959-75b5f541fb2b
github.com/graph-gophers/graphql-go => github.com/sourcegraph/graphql-go v0.0.0-20180929065141-c790ffc3c46a
github.com/mattn/goreman => github.com/sourcegraph/goreman v0.1.2-0.20180928223752-6e9a2beb830d
github.com/russellhaering/gosaml2 => github.com/sourcegraph/gosaml2 v0.0.0-20190712190530-f05918046bab
Expand Down
Loading

0 comments on commit 19dadc4

Please sign in to comment.