diff --git a/modules/git/grep.go b/modules/git/grep.go index d0d6094649..dd176e7aec 100644 --- a/modules/git/grep.go +++ b/modules/git/grep.go @@ -28,10 +28,10 @@ type GrepResult struct { HighlightedRanges [][3]int } -type grepMode int +type GrepMode int const ( - FixedGrepMode grepMode = iota + FixedGrepMode GrepMode = iota FixedAnyGrepMode RegExpGrepMode ) @@ -43,7 +43,7 @@ type GrepOptions struct { MaxResultLimit int MatchesPerFile int // >= git 2.38 ContextLineNumber int - Mode grepMode + Mode GrepMode Filename string } diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 9dc915499b..25f96933b6 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -40,10 +40,6 @@ import ( const ( unicodeNormalizeName = "unicodeNormalize" maxBatchSize = 16 - // fuzzyDenominator determines the levenshtein distance per each character of a keyword - fuzzyDenominator = 4 - // see https://github.com/blevesearch/bleve/issues/1563#issuecomment-786822311 - maxFuzziness = 2 ) func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { @@ -260,12 +256,14 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int keywordQuery query.Query ) - phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword) - phraseQuery.FieldVal = "Content" - phraseQuery.Analyzer = repoIndexerAnalyzer - keywordQuery = phraseQuery - if opts.IsKeywordFuzzy { - phraseQuery.Fuzziness = min(maxFuzziness, len(opts.Keyword)/fuzzyDenominator) + if opts.Mode == internal.CodeSearchModeUnion { + query := bleve.NewDisjunctionQuery() + for _, field := range strings.Fields(opts.Keyword) { + query.AddQuery(inner_bleve.MatchPhraseQuery(field, "Content", repoIndexerAnalyzer, 0)) + } + keywordQuery = query + } else { + keywordQuery = inner_bleve.MatchPhraseQuery(opts.Keyword, "Content", repoIndexerAnalyzer, 0) } if len(opts.RepoIDs) > 0 { @@ -325,13 +323,16 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int for i, hit := range result.Hits { startIndex, endIndex := -1, -1 for _, locations := range hit.Locations["Content"] { + if startIndex != -1 && endIndex != -1 { + break + } location := locations[0] locationStart := int(location.Start) locationEnd := int(location.End) if startIndex < 0 || locationStart < startIndex { startIndex = locationStart } - if endIndex < 0 || locationEnd > endIndex { + if endIndex < 0 && locationEnd > endIndex { endIndex = locationEnd } } diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index ad58615b30..688e53eb71 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -33,8 +33,8 @@ const ( esRepoIndexerLatestVersion = 2 // multi-match-types, currently only 2 types are used // Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types - esMultiMatchTypeBestFields = "best_fields" - esMultiMatchTypePhrasePrefix = "phrase_prefix" + esMultiMatchTypeBestFields = "best_fields" + esMultiMatchTypePhrase = "phrase" ) var _ internal.Indexer = &Indexer{} @@ -334,8 +334,8 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan // Search searches for codes and language stats by given conditions. func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { - searchType := esMultiMatchTypePhrasePrefix - if opts.IsKeywordFuzzy { + searchType := esMultiMatchTypePhrase + if opts.Mode == internal.CodeSearchModeUnion { searchType = esMultiMatchTypeBestFields } diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index 559b85626f..58768205c6 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -100,8 +100,8 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { Page: 1, PageSize: 10, }, - Filename: kw.Filename, - IsKeywordFuzzy: true, + Filename: kw.Filename, + Mode: SearchModeUnion, }) require.NoError(t, err) assert.Len(t, kw.IDs, int(total)) diff --git a/modules/indexer/code/internal/indexer.go b/modules/indexer/code/internal/indexer.go index 748c9e1bf9..b7440e9de2 100644 --- a/modules/indexer/code/internal/indexer.go +++ b/modules/indexer/code/internal/indexer.go @@ -20,13 +20,27 @@ type Indexer interface { Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) } +type CodeSearchMode int + +const ( + CodeSearchModeExact CodeSearchMode = iota + CodeSearchModeUnion +) + +func (mode CodeSearchMode) String() string { + if mode == CodeSearchModeUnion { + return "union" + } + return "exact" +} + type SearchOptions struct { RepoIDs []int64 Keyword string Language string Filename string - IsKeywordFuzzy bool + Mode CodeSearchMode db.Paginator } diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index 9f7aa2db60..27e35251f0 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -35,7 +35,14 @@ type SearchResultLanguages = internal.SearchResultLanguages type SearchOptions = internal.SearchOptions -var CodeSearchOptions = [2]string{"exact", "fuzzy"} +var CodeSearchOptions = [2]string{"exact", "union"} + +type SearchMode = internal.CodeSearchMode + +const ( + SearchModeExact = internal.CodeSearchModeExact + SearchModeUnion = internal.CodeSearchModeUnion +) func indices(content string, selectionStartIndex, selectionEndIndex int) (int, int) { startIndex := selectionStartIndex @@ -206,7 +213,6 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res } // PerformSearch perform a search on a repository -// if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2 func PerformSearch(ctx context.Context, opts *SearchOptions) (int, []*Result, []*SearchResultLanguages, error) { if opts == nil || len(opts.Keyword) == 0 { return 0, nil, nil, nil diff --git a/routers/web/explore/code.go b/routers/web/explore/code.go index 76238e80fb..0a4e828c71 100644 --- a/routers/web/explore/code.go +++ b/routers/web/explore/code.go @@ -37,19 +37,17 @@ func Code(ctx *context.Context) { keyword := ctx.FormTrim("q") path := ctx.FormTrim("path") - isFuzzy := ctx.FormOptionalBool("fuzzy").ValueOrDefault(true) - if mode := ctx.FormTrim("mode"); len(mode) > 0 { - isFuzzy = mode == "fuzzy" + mode := code_indexer.SearchModeExact + if m := ctx.FormTrim("mode"); m == "union" || + m == "fuzzy" || + ctx.FormBool("fuzzy") { + mode = code_indexer.SearchModeUnion } ctx.Data["Keyword"] = keyword ctx.Data["Language"] = language - ctx.Data["CodeSearchOptions"] = []string{"exact", "fuzzy"} - if isFuzzy { - ctx.Data["CodeSearchMode"] = "fuzzy" - } else { - ctx.Data["CodeSearchMode"] = "exact" - } + ctx.Data["CodeSearchOptions"] = code_indexer.CodeSearchOptions + ctx.Data["CodeSearchMode"] = mode.String() ctx.Data["PageIsViewCode"] = true if keyword == "" { @@ -88,11 +86,11 @@ func Code(ctx *context.Context) { if (len(repoIDs) > 0) || isAdmin { total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{ - RepoIDs: repoIDs, - Keyword: keyword, - IsKeywordFuzzy: isFuzzy, - Language: language, - Filename: path, + RepoIDs: repoIDs, + Keyword: keyword, + Mode: mode, + Language: language, + Filename: path, Paginator: &db.ListOptions{ Page: page, PageSize: setting.UI.RepoSearchPagingNum, diff --git a/routers/web/repo/search.go b/routers/web/repo/search.go index 442034b287..d10eb67528 100644 --- a/routers/web/repo/search.go +++ b/routers/web/repo/search.go @@ -21,14 +21,14 @@ type searchMode int const ( ExactSearchMode searchMode = iota - FuzzySearchMode + UnionSearchMode RegExpSearchMode ) func searchModeFromString(s string) searchMode { switch s { case "fuzzy", "union": - return FuzzySearchMode + return UnionSearchMode case "regexp": return RegExpSearchMode default: @@ -40,8 +40,8 @@ func (m searchMode) String() string { switch m { case ExactSearchMode: return "exact" - case FuzzySearchMode: - return "fuzzy" + case UnionSearchMode: + return "union" case RegExpSearchMode: return "regexp" default: @@ -49,6 +49,24 @@ func (m searchMode) String() string { } } +func (m searchMode) ToIndexer() code_indexer.SearchMode { + if m == ExactSearchMode { + return code_indexer.SearchModeExact + } + return code_indexer.SearchModeUnion +} + +func (m searchMode) ToGitGrep() git.GrepMode { + switch m { + case RegExpSearchMode: + return git.RegExpGrepMode + case UnionSearchMode: + return git.FixedAnyGrepMode + default: + return git.FixedGrepMode + } +} + // Search render repository search page func Search(ctx *context.Context) { language := ctx.FormTrim("l") @@ -59,7 +77,7 @@ func Search(ctx *context.Context) { if modeStr := ctx.FormString("mode"); len(modeStr) > 0 { mode = searchModeFromString(modeStr) } else if ctx.FormOptionalBool("fuzzy").ValueOrDefault(true) { // for backward compatibility in links - mode = FuzzySearchMode + mode = UnionSearchMode } ctx.Data["Keyword"] = keyword @@ -90,11 +108,11 @@ func Search(ctx *context.Context) { if setting.Indexer.RepoIndexerEnabled { var err error total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{ - RepoIDs: []int64{ctx.Repo.Repository.ID}, - Keyword: keyword, - IsKeywordFuzzy: mode == FuzzySearchMode, - Language: language, - Filename: path, + RepoIDs: []int64{ctx.Repo.Repository.ID}, + Keyword: keyword, + Mode: mode.ToIndexer(), + Language: language, + Filename: path, Paginator: &db.ListOptions{ Page: page, PageSize: setting.UI.RepoSearchPagingNum, @@ -110,19 +128,12 @@ func Search(ctx *context.Context) { ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx) } } else { - grepOpt := git.GrepOptions{ + res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, keyword, git.GrepOptions{ ContextLineNumber: 1, RefName: ctx.Repo.RefName, Filename: path, - } - switch mode { - case FuzzySearchMode: - grepOpt.Mode = git.FixedAnyGrepMode - ctx.Data["CodeSearchMode"] = "union" - case RegExpSearchMode: - grepOpt.Mode = git.RegExpGrepMode - } - res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, keyword, grepOpt) + Mode: mode.ToGitGrep(), + }) if err != nil { ctx.ServerError("GrepSearch", err) return diff --git a/routers/web/user/code.go b/routers/web/user/code.go index 3e044d7876..019249e3e0 100644 --- a/routers/web/user/code.go +++ b/routers/web/user/code.go @@ -41,19 +41,17 @@ func CodeSearch(ctx *context.Context) { keyword := ctx.FormTrim("q") path := ctx.FormTrim("path") - isFuzzy := ctx.FormOptionalBool("fuzzy").ValueOrDefault(true) - if mode := ctx.FormTrim("mode"); len(mode) > 0 { - isFuzzy = mode == "fuzzy" + mode := code_indexer.SearchModeExact + if m := ctx.FormTrim("mode"); m == "union" || + m == "fuzzy" || + ctx.FormBool("fuzzy") { + mode = code_indexer.SearchModeUnion } ctx.Data["Keyword"] = keyword ctx.Data["Language"] = language - ctx.Data["CodeSearchOptions"] = []string{"exact", "fuzzy"} - if isFuzzy { - ctx.Data["CodeSearchMode"] = "fuzzy" - } else { - ctx.Data["CodeSearchMode"] = "exact" - } + ctx.Data["CodeSearchOptions"] = code_indexer.CodeSearchOptions + ctx.Data["CodeSearchMode"] = mode.String() ctx.Data["IsCodePage"] = true if keyword == "" { @@ -85,11 +83,11 @@ func CodeSearch(ctx *context.Context) { if len(repoIDs) > 0 { total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{ - RepoIDs: repoIDs, - Keyword: keyword, - IsKeywordFuzzy: isFuzzy, - Language: language, - Filename: path, + RepoIDs: repoIDs, + Keyword: keyword, + Mode: mode, + Language: language, + Filename: path, Paginator: &db.ListOptions{ Page: page, PageSize: setting.UI.RepoSearchPagingNum, diff --git a/tests/integration/repo_search_test.go b/tests/integration/repo_search_test.go index 3c8ceb0896..60eca66735 100644 --- a/tests/integration/repo_search_test.go +++ b/tests/integration/repo_search_test.go @@ -82,19 +82,10 @@ func testSearchRepo(t *testing.T, indexer bool) { testSearch(t, "/user2/glob/search?q=loren&page=1", []string{"a.txt"}, indexer) testSearch(t, "/user2/glob/search?q=loren&page=1&mode=exact", []string{"a.txt"}, indexer) - if indexer { - // fuzzy search: matches both file3 (x/b.txt) and file1 (a.txt) - // when indexer is enabled - testSearch(t, "/user2/glob/search?q=file3&mode=fuzzy&page=1", []string{"x/b.txt", "a.txt"}, indexer) - testSearch(t, "/user2/glob/search?q=file4&mode=fuzzy&page=1", []string{"x/b.txt", "a.txt"}, indexer) - testSearch(t, "/user2/glob/search?q=file5&mode=fuzzy&page=1", []string{"x/b.txt", "a.txt"}, indexer) - } else { - // fuzzy search: Union/OR of all the keywords - // when indexer is disabled - testSearch(t, "/user2/glob/search?q=file3+file1&mode=union&page=1", []string{"a.txt", "x/b.txt"}, indexer) - testSearch(t, "/user2/glob/search?q=file4&mode=union&page=1", []string{}, indexer) - testSearch(t, "/user2/glob/search?q=file5&mode=union&page=1", []string{}, indexer) - } + // union search: Union/OR of all the keywords + testSearch(t, "/user2/glob/search?q=file3+file1&mode=union&page=1", []string{"a.txt", "x/b.txt"}, indexer) + testSearch(t, "/user2/glob/search?q=file4&mode=union&page=1", []string{}, indexer) + testSearch(t, "/user2/glob/search?q=file5&mode=union&page=1", []string{}, indexer) testSearch(t, "/user2/glob/search?q=file3&page=1&mode=exact", []string{"x/b.txt"}, indexer) testSearch(t, "/user2/glob/search?q=file4&page=1&mode=exact", []string{}, indexer) @@ -121,11 +112,11 @@ func testSearch(t *testing.T, url string, expected []string, indexer bool) { }) if indexer { - assert.EqualValues(t, []string{"exact", "fuzzy"}, dropdownOptions) + assert.EqualValues(t, []string{"exact", "union"}, dropdownOptions) } else { assert.EqualValues(t, []string{"exact", "union", "regexp"}, dropdownOptions) } filenames := resultFilenames(t, doc) - assert.EqualValues(t, expected, filenames) + assert.ElementsMatch(t, expected, filenames) }