From 10660e566d21c79ff86a8e69ea073cf562fc2766 Mon Sep 17 00:00:00 2001 From: kim <89579420+NyaaaWhatsUpDoc@users.noreply.github.com> Date: Tue, 9 Jan 2024 09:42:39 +0000 Subject: [PATCH] [bugfix] misc dereferencer fixes (#2475) * only perform status-up-to-date checks if no statusable has been provided * copy over the same style of freshness checking from status deref -> accounts * change some var names * check for empty account domain --- internal/federation/dereferencing/account.go | 121 +++++++++++++------ internal/federation/dereferencing/status.go | 17 ++- 2 files changed, 92 insertions(+), 46 deletions(-) diff --git a/internal/federation/dereferencing/account.go b/internal/federation/dereferencing/account.go index 19d53895a..dcb6116d2 100644 --- a/internal/federation/dereferencing/account.go +++ b/internal/federation/dereferencing/account.go @@ -41,7 +41,7 @@ import ( // accountUpToDate returns whether the given account model is both updateable (i.e. // non-instance remote account) and whether it needs an update based on `fetched_at`. -func accountUpToDate(account *gtsmodel.Account) bool { +func accountUpToDate(account *gtsmodel.Account, force bool) bool { if !account.SuspendedAt.IsZero() { // Can't update suspended accounts. return true @@ -57,8 +57,19 @@ func accountUpToDate(account *gtsmodel.Account) bool { return true } - // If this account was updated recently (last interval), we return as-is. - if next := account.FetchedAt.Add(6 * time.Hour); time.Now().Before(next) { + // Default limit we allow + // statuses to be refreshed. + limit := 6 * time.Hour + + if force { + // We specifically allow the force flag + // to force an early refresh (on a much + // smaller cooldown period). + limit = 5 * time.Minute + } + + // If account was updated recently (within limit), we return as-is. + if next := account.FetchedAt.Add(limit); time.Now().Before(next) { return true } @@ -70,7 +81,7 @@ func accountUpToDate(account *gtsmodel.Account) bool { // may be enqueued for asynchronous fetching, e.g. featured account statuses (pins). An ActivityPub object indicates the account was dereferenced. func (d *Dereferencer) GetAccountByURI(ctx context.Context, requestUser string, uri *url.URL) (*gtsmodel.Account, ap.Accountable, error) { // Fetch and dereference account if necessary. - account, apubAcc, err := d.getAccountByURI(ctx, + account, accountable, err := d.getAccountByURI(ctx, requestUser, uri, ) @@ -78,7 +89,7 @@ func (d *Dereferencer) GetAccountByURI(ctx context.Context, requestUser string, return nil, nil, err } - if apubAcc != nil { + if accountable != nil { // This account was updated, enqueue re-dereference featured posts. d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) { if err := d.dereferenceAccountFeatured(ctx, requestUser, account); err != nil { @@ -87,7 +98,7 @@ func (d *Dereferencer) GetAccountByURI(ctx context.Context, requestUser string, }) } - return account, apubAcc, nil + return account, accountable, nil } // getAccountByURI is a package internal form of .GetAccountByURI() that doesn't bother dereferencing featured posts on update. @@ -134,17 +145,18 @@ func (d *Dereferencer) getAccountByURI(ctx context.Context, requestUser string, }, nil) } - // Check whether needs update. - if accountUpToDate(account) { - // This is existing up-to-date account, ensure it is populated. + if accountUpToDate(account, false) { + // This is an existing account that is up-to-date, + // before returning ensure it is fully populated. if err := d.state.DB.PopulateAccount(ctx, account); err != nil { log.Errorf(ctx, "error populating existing account: %v", err) } + return account, nil, nil } // Try to update existing account model. - latest, apubAcc, err := d.enrichAccountSafely(ctx, + latest, accountable, err := d.enrichAccountSafely(ctx, requestUser, uri, account, @@ -157,14 +169,14 @@ func (d *Dereferencer) getAccountByURI(ctx context.Context, requestUser string, return account, nil, nil } - return latest, apubAcc, nil + return latest, accountable, nil } // GetAccountByUsernameDomain will attempt to fetch an accounts by its username@domain, first checking the database. In the case of a newly-met remote model, // or a remote model whose last_fetched date is beyond a certain interval, the account will be dereferenced. In the case of dereferencing, some low-priority // account information may be enqueued for asynchronous fetching, e.g. featured account statuses (pins). An ActivityPub object indicates the account was dereferenced. func (d *Dereferencer) GetAccountByUsernameDomain(ctx context.Context, requestUser string, username string, domain string) (*gtsmodel.Account, ap.Accountable, error) { - account, apubAcc, err := d.getAccountByUsernameDomain( + account, accountable, err := d.getAccountByUsernameDomain( ctx, requestUser, username, @@ -174,7 +186,7 @@ func (d *Dereferencer) GetAccountByUsernameDomain(ctx context.Context, requestUs return nil, nil, err } - if apubAcc != nil { + if accountable != nil { // This account was updated, enqueue re-dereference featured posts. d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) { if err := d.dereferenceAccountFeatured(ctx, requestUser, account); err != nil { @@ -183,12 +195,11 @@ func (d *Dereferencer) GetAccountByUsernameDomain(ctx context.Context, requestUs }) } - return account, apubAcc, nil + return account, accountable, nil } -// getAccountByUsernameDomain is a package internal form -// of .GetAccountByUsernameDomain() that doesn't bother -// dereferencing featured posts. +// getAccountByUsernameDomain is a package internal form of +// GetAccountByUsernameDomain() that doesn't bother deref of featured posts. func (d *Dereferencer) getAccountByUsernameDomain( ctx context.Context, requestUser string, @@ -219,7 +230,7 @@ func (d *Dereferencer) getAccountByUsernameDomain( } // Create and pass-through a new bare-bones model for dereferencing. - account, apubAcc, err := d.enrichAccountSafely(ctx, requestUser, nil, >smodel.Account{ + account, accountable, err := d.enrichAccountSafely(ctx, requestUser, nil, >smodel.Account{ ID: id.NewULID(), Username: username, Domain: domain, @@ -228,11 +239,11 @@ func (d *Dereferencer) getAccountByUsernameDomain( return nil, nil, err } - return account, apubAcc, nil + return account, accountable, nil } // Try to update existing account model. - latest, apubAcc, err := d.RefreshAccount(ctx, + latest, accountable, err := d.RefreshAccount(ctx, requestUser, account, nil, @@ -243,22 +254,32 @@ func (d *Dereferencer) getAccountByUsernameDomain( return account, nil, nil //nolint } - if apubAcc == nil { + if accountable == nil { // This is existing up-to-date account, ensure it is populated. if err := d.state.DB.PopulateAccount(ctx, latest); err != nil { log.Errorf(ctx, "error populating existing account: %v", err) } } - return latest, apubAcc, nil + return latest, accountable, nil } -// RefreshAccount updates the given account if remote and last_fetched is beyond fetch interval, or if force is set. An updated account model is returned, -// but in the case of dereferencing, some low-priority account information may be enqueued for asynchronous fetching, e.g. featured account statuses (pins). +// RefreshAccount updates the given account if remote and last_fetched is +// beyond fetch interval, or if force is set. An updated account model is +// returned, but in the case of dereferencing, some low-priority account info +// may be enqueued for asynchronous fetching, e.g. featured account statuses (pins). // An ActivityPub object indicates the account was dereferenced (i.e. updated). -func (d *Dereferencer) RefreshAccount(ctx context.Context, requestUser string, account *gtsmodel.Account, apubAcc ap.Accountable, force bool) (*gtsmodel.Account, ap.Accountable, error) { - // Check whether needs update (and not forced). - if accountUpToDate(account) && !force { +func (d *Dereferencer) RefreshAccount( + ctx context.Context, + requestUser string, + account *gtsmodel.Account, + accountable ap.Accountable, + force bool, +) (*gtsmodel.Account, ap.Accountable, error) { + // If no incoming data is provided, + // check whether account needs update. + if accountable == nil && + accountUpToDate(account, force) { return account, nil, nil } @@ -269,18 +290,18 @@ func (d *Dereferencer) RefreshAccount(ctx context.Context, requestUser string, a } // Try to update + deref passed account model. - latest, apubAcc, err := d.enrichAccountSafely(ctx, + latest, accountable, err := d.enrichAccountSafely(ctx, requestUser, uri, account, - apubAcc, + accountable, ) if err != nil { log.Errorf(ctx, "error enriching remote account: %v", err) return nil, nil, gtserror.Newf("error enriching remote account: %w", err) } - if apubAcc != nil { + if accountable != nil { // This account was updated, enqueue re-dereference featured posts. d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) { if err := d.dereferenceAccountFeatured(ctx, requestUser, latest); err != nil { @@ -289,14 +310,24 @@ func (d *Dereferencer) RefreshAccount(ctx context.Context, requestUser string, a }) } - return latest, apubAcc, nil + return latest, accountable, nil } -// RefreshAccountAsync enqueues the given account for an asychronous update fetching, if last_fetched is beyond fetch interval, or if forcc is set. -// This is a more optimized form of manually enqueueing .UpdateAccount() to the federation worker, since it only enqueues update if necessary. -func (d *Dereferencer) RefreshAccountAsync(ctx context.Context, requestUser string, account *gtsmodel.Account, apubAcc ap.Accountable, force bool) { - // Check whether needs update (and not forced). - if accountUpToDate(account) && !force { +// RefreshAccountAsync enqueues the given account for an asychronous +// update fetching, if last_fetched is beyond fetch interval, or if force +// is set. This is a more optimized form of manually enqueueing .UpdateAccount() +// to the federation worker, since it only enqueues update if necessary. +func (d *Dereferencer) RefreshAccountAsync( + ctx context.Context, + requestUser string, + account *gtsmodel.Account, + accountable ap.Accountable, + force bool, +) { + // If no incoming data is provided, + // check whether account needs update. + if accountable == nil && + accountUpToDate(account, force) { return } @@ -309,13 +340,13 @@ func (d *Dereferencer) RefreshAccountAsync(ctx context.Context, requestUser stri // Enqueue a worker function to enrich this account async. d.state.Workers.Federator.MustEnqueueCtx(ctx, func(ctx context.Context) { - latest, apubAcc, err := d.enrichAccountSafely(ctx, requestUser, uri, account, apubAcc) + latest, accountable, err := d.enrichAccountSafely(ctx, requestUser, uri, account, accountable) if err != nil { log.Errorf(ctx, "error enriching remote account: %v", err) return } - if apubAcc != nil { + if accountable != nil { // This account was updated, enqueue re-dereference featured posts. if err := d.dereferenceAccountFeatured(ctx, requestUser, latest); err != nil { log.Errorf(ctx, "error fetching account featured collection: %v", err) @@ -332,7 +363,7 @@ func (d *Dereferencer) enrichAccountSafely( requestUser string, uri *url.URL, account *gtsmodel.Account, - apubAcc ap.Accountable, + accountable ap.Accountable, ) (*gtsmodel.Account, ap.Accountable, error) { // Noop if account has been suspended. if !account.SuspendedAt.IsZero() { @@ -360,7 +391,7 @@ func (d *Dereferencer) enrichAccountSafely( requestUser, uri, account, - apubAcc, + accountable, ) if gtserror.StatusCode(err) >= 400 { @@ -521,6 +552,16 @@ func (d *Dereferencer) enrichAccount( } } + if latestAcc.Domain == "" { + // ensure we have a domain set by this point, + // otherwise it gets stored as a local user! + // + // TODO: there is probably a more granular way + // way of checking this in each of the above parts, + // and honestly it could do with a smol refactor. + return nil, nil, gtserror.Newf("empty domain for %s", uri) + } + // Ensure ID is set and update fetch time. latestAcc.ID = account.ID latestAcc.FetchedAt = time.Now() diff --git a/internal/federation/dereferencing/status.go b/internal/federation/dereferencing/status.go index 2a2b99d25..1eb7d6fdb 100644 --- a/internal/federation/dereferencing/status.go +++ b/internal/federation/dereferencing/status.go @@ -135,12 +135,13 @@ func (d *Dereferencer) getStatusByURI(ctx context.Context, requestUser string, u }, nil) } - // Check whether needs update. if statusUpToDate(status, false) { - // This is existing up-to-date status, ensure it is populated. + // This is an existing status that is up-to-date, + // before returning ensure it is fully populated. if err := d.state.DB.PopulateStatus(ctx, status); err != nil { log.Errorf(ctx, "error populating existing status: %v", err) } + return status, nil, false, nil } @@ -170,8 +171,10 @@ func (d *Dereferencer) RefreshStatus( statusable ap.Statusable, force bool, ) (*gtsmodel.Status, ap.Statusable, error) { - // Check whether status needs update. - if statusUpToDate(status, force) { + // If no incoming data is provided, + // check whether status needs update. + if statusable == nil && + statusUpToDate(status, force) { return status, nil, nil } @@ -215,8 +218,10 @@ func (d *Dereferencer) RefreshStatusAsync( statusable ap.Statusable, force bool, ) { - // Check whether status needs update. - if statusUpToDate(status, force) { + // If no incoming data is provided, + // check whether status needs update. + if statusable == nil && + statusUpToDate(status, force) { return }