From 3f070a442a5ffdd771a4937fe079d95672fa3e3f Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Sun, 10 Dec 2023 14:15:41 +0100 Subject: [PATCH] [bugfix] Narrow search scope for accounts starting with '@'; don't LOWER SQLite text searches (#2435) --- internal/db/bundb/search.go | 46 +++++++++++++----- internal/db/bundb/search_test.go | 18 +++++++ internal/processing/search/accounts.go | 30 +++++++----- internal/processing/search/get.go | 66 +++++++++++++------------- 4 files changed, 104 insertions(+), 56 deletions(-) diff --git a/internal/db/bundb/search.go b/internal/db/bundb/search.go index 061471c19..61e52ce06 100644 --- a/internal/db/bundb/search.go +++ b/internal/db/bundb/search.go @@ -67,7 +67,7 @@ type searchDB struct { // WHERE (("account"."domain" IS NULL) OR ("account"."domain" != "account"."username")) // AND ("account"."id" < 'ZZZZZZZZZZZZZZZZZZZZZZZZZZ') // AND ("account"."id" IN (SELECT "target_account_id" FROM "follows" WHERE ("account_id" = '016T5Q3SQKBT337DAKVSKNXXW1'))) -// AND ((SELECT LOWER("account"."username" || COALESCE("account"."display_name", '') || COALESCE("account"."note", '')) AS "account_text") LIKE '%turtle%' ESCAPE '\') +// AND ((SELECT "account"."username" || COALESCE("account"."display_name", '') || COALESCE("account"."note", '') AS "account_text") LIKE '%turtle%' ESCAPE '\') // ORDER BY "account"."id" DESC LIMIT 10 func (s *searchDB) SearchForAccounts( ctx context.Context, @@ -128,12 +128,20 @@ func (s *searchDB) SearchForAccounts( ) } - // Select account text as subquery. - accountTextSubq := s.accountText(following) - - // Search using LIKE for matches of query - // string within accountText subquery. - q = whereLike(q, accountTextSubq, query) + if strings.HasPrefix(query, "@") { + // Query looks a bit like a username. + // Normalize it and just look for + // usernames that start with query. + query = query[1:] + subQ := s.accountUsername() + q = whereStartsLike(q, subQ, query) + } else { + // Query looks like arbitrary string. + // Search using LIKE for matches of query + // string within accountText subquery. + subQ := s.accountText(following) + q = whereLike(q, subQ, query) + } if limit > 0 { // Limit amount of accounts returned. @@ -191,7 +199,15 @@ func (s *searchDB) followedAccounts(accountID string) *bun.SelectQuery { Where("? = ?", bun.Ident("follow.account_id"), accountID) } -// statusText returns a subquery that selects a concatenation +// accountUsername returns a subquery that just selects +// from account usernames, without concatenation. +func (s *searchDB) accountUsername() *bun.SelectQuery { + return s.db. + NewSelect(). + Column("account.username") +} + +// accountText returns a subquery that selects a concatenation // of account username and display name as "account_text". If // `following` is true, then account note will also be included // in the concatenation. @@ -226,14 +242,17 @@ func (s *searchDB) accountText(following bool) *bun.SelectQuery { // different number of placeholders depending on // following/not following. COALESCE calls ensure // that we're not trying to concatenate null values. + // + // SQLite search is case insensitive. + // Postgres searches get lowercased. d := s.db.Dialect().Name() switch { case d == dialect.SQLite && following: - query = "LOWER(? || COALESCE(?, ?) || COALESCE(?, ?)) AS ?" + query = "? || COALESCE(?, ?) || COALESCE(?, ?) AS ?" case d == dialect.SQLite && !following: - query = "LOWER(? || COALESCE(?, ?)) AS ?" + query = "? || COALESCE(?, ?) AS ?" case d == dialect.PG && following: query = "LOWER(CONCAT(?, COALESCE(?, ?), COALESCE(?, ?))) AS ?" @@ -255,7 +274,7 @@ func (s *searchDB) accountText(following bool) *bun.SelectQuery { // WHERE ("status"."boost_of_id" IS NULL) // AND (("status"."account_id" = '01F8MH1H7YV1Z7D2C8K2730QBF') OR ("status"."in_reply_to_account_id" = '01F8MH1H7YV1Z7D2C8K2730QBF')) // AND ("status"."id" < 'ZZZZZZZZZZZZZZZZZZZZZZZZZZ') -// AND ((SELECT LOWER("status"."content" || COALESCE("status"."content_warning", '')) AS "status_text") LIKE '%hello%' ESCAPE '\') +// AND ((SELECT "status"."content" || COALESCE("status"."content_warning", '') AS "status_text") LIKE '%hello%' ESCAPE '\') // ORDER BY "status"."id" DESC LIMIT 10 func (s *searchDB) SearchForStatuses( ctx context.Context, @@ -366,11 +385,14 @@ func (s *searchDB) statusText() *bun.SelectQuery { // SQLite and Postgres use different // syntaxes for concatenation. + // + // SQLite search is case insensitive. + // Postgres searches get lowercased. switch s.db.Dialect().Name() { case dialect.SQLite: statusText = statusText.ColumnExpr( - "LOWER(? || COALESCE(?, ?)) AS ?", + "? || COALESCE(?, ?) AS ?", bun.Ident("status.content"), bun.Ident("status.content_warning"), "", bun.Ident("status_text")) diff --git a/internal/db/bundb/search_test.go b/internal/db/bundb/search_test.go index f84704df2..bc791271e 100644 --- a/internal/db/bundb/search_test.go +++ b/internal/db/bundb/search_test.go @@ -37,6 +37,24 @@ func (suite *SearchTestSuite) TestSearchAccountsTurtleAny() { suite.Len(accounts, 1) } +func (suite *SearchTestSuite) TestSearchAccounts1HappyWithPrefix() { + testAccount := suite.testAccounts["local_account_1"] + + // Query will just look for usernames that start with "1happy". + accounts, err := suite.db.SearchForAccounts(context.Background(), testAccount.ID, "@1happy", "", "", 10, false, 0) + suite.NoError(err) + suite.Len(accounts, 1) +} + +func (suite *SearchTestSuite) TestSearchAccounts1HappyNoPrefix() { + testAccount := suite.testAccounts["local_account_1"] + + // Query will do the full coalesce. + accounts, err := suite.db.SearchForAccounts(context.Background(), testAccount.ID, "1happy", "", "", 10, false, 0) + suite.NoError(err) + suite.Len(accounts, 1) +} + func (suite *SearchTestSuite) TestSearchAccountsTurtleFollowing() { testAccount := suite.testAccounts["local_account_1"] diff --git a/internal/processing/search/accounts.go b/internal/processing/search/accounts.go index 17002a29b..7201d0688 100644 --- a/internal/processing/search/accounts.go +++ b/internal/processing/search/accounts.go @@ -74,13 +74,6 @@ func (p *Processor) Accounts( return nil, gtserror.NewErrorBadRequest(err, err.Error()) } - // Be nice and normalize query by prepending '@'. - // This will make it easier for accountsByNamestring - // to pick this up as a valid namestring. - if query[0] != '@' { - query = "@" + query - } - log. WithContext(ctx). WithFields(kv.Fields{ @@ -107,9 +100,7 @@ func (p *Processor) Accounts( // See if we have something that looks like a namestring. username, domain, err := util.ExtractNamestringParts(query) - if err != nil { - log.Warnf(ctx, "couldn't parse '%s' as namestring: %v", query, err) - } else { + if err == nil { if domain != "" { // Search was an exact namestring; // we can safely assume caller is @@ -121,7 +112,7 @@ func (p *Processor) Accounts( // Get all accounts we can find // that match the provided query. - if err := p.accountsByNamestring( + if err := p.accountsByUsernameDomain( ctx, requestingAccount, id.Highest, @@ -137,6 +128,23 @@ func (p *Processor) Accounts( err = gtserror.Newf("error searching by namestring: %w", err) return nil, gtserror.NewErrorInternalError(err) } + } else { + // Query Doesn't look like a + // namestring, use text search. + if err := p.accountsByText( + ctx, + requestingAccount.ID, + id.Highest, + id.Lowest, + limit, + offset, + query, + following, + appendAccount, + ); err != nil && !errors.Is(err, db.ErrNoEntries) { + err = gtserror.Newf("error searching by text: %w", err) + return nil, gtserror.NewErrorInternalError(err) + } } // Return whatever we got (if anything). diff --git a/internal/processing/search/get.go b/internal/processing/search/get.go index 6b2125d81..c0b011bdb 100644 --- a/internal/processing/search/get.go +++ b/internal/processing/search/get.go @@ -165,13 +165,15 @@ func (p *Processor) Get( // We managed to parse query as a namestring. // If domain was set, this is a very specific // search for a particular account, so show - // that account to the caller even if they - // have it blocked. They might be looking - // for it to unblock it again! + // that account to the caller even if it's an + // instance account and/or even if they have + // it blocked. They might be looking for it + // to unblock it again! domainSet := (domain != "") + includeInstanceAccounts = domainSet includeBlockedAccounts = domainSet - err = p.accountsByNamestring( + err = p.accountsByUsernameDomain( ctx, account, maxID, @@ -189,24 +191,21 @@ func (p *Processor) Get( return nil, gtserror.NewErrorInternalError(err) } - // If domain was set, we know this is - // a full namestring, and not a url or - // just a username, so we should stop - // looking now and just return what we - // have (if anything). Otherwise we'll - // let the search keep going. - if domainSet { - return p.packageSearchResult( - ctx, - account, - foundAccounts, - foundStatuses, - foundTags, - req.APIv1, - includeInstanceAccounts, - includeBlockedAccounts, - ) - } + // Namestrings are a pretty unique format, so + // it's very unlikely that the caller was + // searching for anything except an account. + // As such, return early without falling + // through to broader search. + return p.packageSearchResult( + ctx, + account, + foundAccounts, + foundStatuses, + foundTags, + req.APIv1, + includeInstanceAccounts, + includeBlockedAccounts, + ) } } @@ -331,12 +330,12 @@ func (p *Processor) Get( ) } -// accountsByNamestring searches for accounts using the -// provided username and domain. If domain is not set, +// accountsByUsernameDomain searches for accounts using +// the provided username and domain. If domain is not set, // it may return more than one result by doing a text // search in the database for accounts matching the query. // Otherwise, it tries to return an exact match. -func (p *Processor) accountsByNamestring( +func (p *Processor) accountsByUsernameDomain( ctx context.Context, requestingAccount *gtsmodel.Account, maxID string, @@ -350,10 +349,10 @@ func (p *Processor) accountsByNamestring( appendAccount func(*gtsmodel.Account), ) error { if domain == "" { - // No error, but no domain set. That means the query - // looked like '@someone' which is not an exact search. - // Try to search for any accounts that match the query - // string, and let the caller know they should stop. + // No domain set. That means the query looked + // like '@someone' which is not an exact search, + // but is still a username search. Look for any + // usernames that start with the query string. return p.accountsByText( ctx, requestingAccount.ID, @@ -361,15 +360,16 @@ func (p *Processor) accountsByNamestring( minID, limit, offset, - // OK to assume username is set now. Use - // it instead of query to omit leading '@'. - username, + // Add @ prefix back in to indicate + // to search function that we want + // an account by its username. + "@"+username, following, appendAccount, ) } - // No error, and domain and username were both set. + // Domain and username were both set. // Caller is likely trying to search for an exact // match, from either a remote instance or local. foundAccount, err := p.accountByUsernameDomain(