From a26557482121ff163942bb1a0a8be0c7093f14a8 Mon Sep 17 00:00:00 2001 From: Shiny Nematoda Date: Fri, 1 Nov 2024 13:06:02 +0000 Subject: [PATCH 1/2] enh(search): improve issue search - new sort by relevency option for issue search - rework bleve fuzzy search to consider each term rather than matching the entire phrase --- modules/indexer/internal/bleve/query.go | 9 +++++ modules/indexer/issues/bleve/bleve.go | 33 ++++++++++--------- modules/indexer/issues/dboptions.go | 4 ++- .../issues/elasticsearch/elasticsearch.go | 2 +- modules/indexer/issues/indexer.go | 1 + modules/indexer/issues/internal/model.go | 1 + .../indexer/issues/meilisearch/meilisearch.go | 18 ++++++---- modules/templates/util_string.go | 4 +++ options/locale/locale_en-US.ini | 1 + templates/repo/issue/filter_list.tmpl | 14 ++++---- 10 files changed, 56 insertions(+), 31 deletions(-) diff --git a/modules/indexer/internal/bleve/query.go b/modules/indexer/internal/bleve/query.go index 90626da4f1..0cfda2d0f8 100644 --- a/modules/indexer/internal/bleve/query.go +++ b/modules/indexer/internal/bleve/query.go @@ -19,6 +19,15 @@ func NumericEqualityQuery(value int64, field string) *query.NumericRangeQuery { return q } +// MatchQuery generates a match query for the given phrase, field and analyzer +func MatchQuery(matchTerm, field, analyzer string, fuzziness int) *query.MatchQuery { + q := bleve.NewMatchQuery(matchTerm) + q.FieldVal = field + q.Analyzer = analyzer + q.Fuzziness = fuzziness + return q +} + // MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchPhraseQuery { q := bleve.NewMatchPhraseQuery(matchPhrase) diff --git a/modules/indexer/issues/bleve/bleve.go b/modules/indexer/issues/bleve/bleve.go index b20fcc6f80..5552a9deb0 100644 --- a/modules/indexer/issues/bleve/bleve.go +++ b/modules/indexer/issues/bleve/bleve.go @@ -35,13 +35,7 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { }) } -const ( - maxBatchSize = 16 - // fuzzyDenominator determines the levenshtein distance per each character of a keyword - fuzzyDenominator = 4 - // see https://github.com/blevesearch/bleve/issues/1563#issuecomment-786822311 - maxFuzziness = 2 -) +const maxBatchSize = 16 // IndexerData an update to the issue indexer type IndexerData internal.IndexerData @@ -162,16 +156,25 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( var queries []query.Query if options.Keyword != "" { - fuzziness := 0 if options.IsFuzzyKeyword { - fuzziness = min(maxFuzziness, len(options.Keyword)/fuzzyDenominator) + fuzziness := 1 + if kl := len(options.Keyword); kl > 3 { + fuzziness = 2 + } else if kl < 2 { + fuzziness = 0 + } + queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ + inner_bleve.MatchQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness), + inner_bleve.MatchQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness), + inner_bleve.MatchQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness), + }...)) + } else { + queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ + inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, 0), + inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, 0), + inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, 0), + }...)) } - - queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ - inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness), - inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness), - inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness), - }...)) } if len(options.RepoIDs) > 0 || options.AllPublic { diff --git a/modules/indexer/issues/dboptions.go b/modules/indexer/issues/dboptions.go index c1f454eeee..e34ef607c2 100644 --- a/modules/indexer/issues/dboptions.go +++ b/modules/indexer/issues/dboptions.go @@ -78,7 +78,9 @@ func ToSearchOptions(keyword string, opts *issues_model.IssuesOptions) *SearchOp searchOpt.Paginator = opts.Paginator switch opts.SortType { - case "", "latest": + case "", "relevance": + searchOpt.SortBy = SortByScore + case "latest": searchOpt.SortBy = SortByCreatedDesc case "oldest": searchOpt.SortBy = SortByCreatedAsc diff --git a/modules/indexer/issues/elasticsearch/elasticsearch.go b/modules/indexer/issues/elasticsearch/elasticsearch.go index 42e709a5e8..24e1ac8855 100644 --- a/modules/indexer/issues/elasticsearch/elasticsearch.go +++ b/modules/indexer/issues/elasticsearch/elasticsearch.go @@ -236,7 +236,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( } if options.SortBy == "" { - options.SortBy = internal.SortByCreatedAsc + options.SortBy = internal.SortByScore } sortBy := []elastic.Sorter{ parseSortBy(options.SortBy), diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go index d7310529fc..7edcb3aa6f 100644 --- a/modules/indexer/issues/indexer.go +++ b/modules/indexer/issues/indexer.go @@ -269,6 +269,7 @@ func IsAvailable(ctx context.Context) bool { type SearchOptions = internal.SearchOptions const ( + SortByScore = internal.SortByScore SortByCreatedDesc = internal.SortByCreatedDesc SortByUpdatedDesc = internal.SortByUpdatedDesc SortByCommentsDesc = internal.SortByCommentsDesc diff --git a/modules/indexer/issues/internal/model.go b/modules/indexer/issues/internal/model.go index 2dfee8b72e..dda2b7a5c1 100644 --- a/modules/indexer/issues/internal/model.go +++ b/modules/indexer/issues/internal/model.go @@ -127,6 +127,7 @@ func (o *SearchOptions) Copy(edit ...func(options *SearchOptions)) *SearchOption type SortBy string const ( + SortByScore SortBy = "-_score" SortByCreatedDesc SortBy = "-created_unix" SortByUpdatedDesc SortBy = "-updated_unix" SortByCommentsDesc SortBy = "-comment_count" diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index 7d18444e6c..7c291198f1 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -208,12 +208,18 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( query.And(inner_meilisearch.NewFilterLte("updated_unix", options.UpdatedBeforeUnix.Value())) } - if options.SortBy == "" { - options.SortBy = internal.SortByCreatedAsc - } - sortBy := []string{ - parseSortBy(options.SortBy), - "id:desc", + var sortBy []string + switch options.SortBy { + // sort by relevancy (no explicit sorting) + case internal.SortByScore: + fallthrough + case "": + sortBy = []string{} + default: + sortBy = []string{ + parseSortBy(options.SortBy), + "id:desc", + } } skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxTotalHits) diff --git a/modules/templates/util_string.go b/modules/templates/util_string.go index f23b74786a..685759dcbc 100644 --- a/modules/templates/util_string.go +++ b/modules/templates/util_string.go @@ -19,6 +19,10 @@ func NewStringUtils() *StringUtils { return &stringUtils } +func (su *StringUtils) Make(arr ...string) []string { + return arr +} + func (su *StringUtils) HasPrefix(s any, prefix string) bool { switch v := s.(type) { case string: diff --git a/options/locale/locale_en-US.ini b/options/locale/locale_en-US.ini index 8299ba83bb..52d5b2e0a8 100644 --- a/options/locale/locale_en-US.ini +++ b/options/locale/locale_en-US.ini @@ -1586,6 +1586,7 @@ issues.filter_type.mentioning_you = Mentioning you issues.filter_type.review_requested = Review requested issues.filter_type.reviewed_by_you = Reviewed by you issues.filter_sort = Sort +issues.filter_sort.relevance = Relevance issues.filter_sort.latest = Newest issues.filter_sort.oldest = Oldest issues.filter_sort.recentupdate = Recently updated diff --git a/templates/repo/issue/filter_list.tmpl b/templates/repo/issue/filter_list.tmpl index 09f87b582f..f774d517ea 100644 --- a/templates/repo/issue/filter_list.tmpl +++ b/templates/repo/issue/filter_list.tmpl @@ -146,13 +146,11 @@ {{svg "octicon-triangle-down" 14 "dropdown icon"}} From b6e6a2c36ec23958f288da05eef5b0da1369b2e8 Mon Sep 17 00:00:00 2001 From: Shiny Nematoda Date: Tue, 5 Nov 2024 14:42:36 +0000 Subject: [PATCH 2/2] test: add for issue indexer sort by score --- .../indexer/issues/internal/tests/tests.go | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/modules/indexer/issues/internal/tests/tests.go b/modules/indexer/issues/internal/tests/tests.go index a93b2913e9..e8e6a4e7d1 100644 --- a/modules/indexer/issues/internal/tests/tests.go +++ b/modules/indexer/issues/internal/tests/tests.go @@ -126,6 +126,7 @@ var cases = []*testIndexerCase{ }, SearchOptions: &internal.SearchOptions{ Keyword: "hello", + SortBy: internal.SortByCreatedDesc, }, ExpectedIDs: []int64{1002, 1001, 1000}, ExpectedTotal: 3, @@ -139,6 +140,7 @@ var cases = []*testIndexerCase{ }, SearchOptions: &internal.SearchOptions{ Keyword: "hello world", + SortBy: internal.SortByCreatedDesc, IsFuzzyKeyword: true, }, ExpectedIDs: []int64{1002, 1001, 1000}, @@ -157,6 +159,7 @@ var cases = []*testIndexerCase{ }, SearchOptions: &internal.SearchOptions{ Keyword: "hello", + SortBy: internal.SortByCreatedDesc, RepoIDs: []int64{1, 4}, }, ExpectedIDs: []int64{1006, 1002, 1001}, @@ -175,6 +178,7 @@ var cases = []*testIndexerCase{ }, SearchOptions: &internal.SearchOptions{ Keyword: "hello", + SortBy: internal.SortByCreatedDesc, RepoIDs: []int64{1, 4}, AllPublic: true, }, @@ -597,6 +601,22 @@ var cases = []*testIndexerCase{ } }, }, + { + Name: "SortByScore", + SearchOptions: &internal.SearchOptions{ + Paginator: &db.ListOptionsAll, + SortBy: internal.SortByScore, + }, + Expected: func(t *testing.T, data map[int64]*internal.IndexerData, result *internal.SearchResult) { + assert.Equal(t, len(data), len(result.Hits)) + assert.Equal(t, len(data), int(result.Total)) + for i, v := range result.Hits { + if i < len(result.Hits)-1 { + assert.GreaterOrEqual(t, v.Score, result.Hits[i+1].Score) + } + } + }, + }, { Name: "SortByCreatedAsc", SearchOptions: &internal.SearchOptions{