[chore] status dereferencing improvements (#3255)
* search for mentions also by username,domain in status deref, handle deleted statuses in enrichStatusSafely() * return d.enrichStatusSafely() directly
This commit is contained in:
parent
540edef0c2
commit
3254ef1923
|
@ -20,6 +20,7 @@ package dereferencing
|
|||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"slices"
|
||||
"time"
|
||||
|
@ -75,7 +76,7 @@ func statusFresh(
|
|||
|
||||
// GetStatusByURI will attempt to fetch a status by its URI, first checking the database. In the case of a newly-met remote model, or a remote model whose 'last_fetched' date
|
||||
// is beyond a certain interval, the status will be dereferenced. In the case of dereferencing, some low-priority status information may be enqueued for asynchronous fetching,
|
||||
// e.g. dereferencing the status thread. Param 'syncParent' = true indicates to fetch status ancestors synchronously. An ActivityPub object indicates the status was dereferenced.
|
||||
// e.g. dereferencing the status thread. An ActivityPub object indicates the status was dereferenced.
|
||||
func (d *Dereferencer) GetStatusByURI(ctx context.Context, requestUser string, uri *url.URL) (*gtsmodel.Status, ap.Statusable, error) {
|
||||
|
||||
// Fetch and dereference / update status if necessary.
|
||||
|
@ -164,22 +165,13 @@ func (d *Dereferencer) getStatusByURI(ctx context.Context, requestUser string, u
|
|||
return status, nil, false, nil
|
||||
}
|
||||
|
||||
// Try to deref and update existing status model.
|
||||
latest, statusable, isNew, err := d.enrichStatusSafely(ctx,
|
||||
// Try to deref and update existing.
|
||||
return d.enrichStatusSafely(ctx,
|
||||
requestUser,
|
||||
uri,
|
||||
status,
|
||||
nil,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
// fallback to the
|
||||
// existing status.
|
||||
latest = status
|
||||
statusable = nil
|
||||
}
|
||||
|
||||
return latest, statusable, isNew, err
|
||||
}
|
||||
|
||||
// RefreshStatus is functionally equivalent to GetStatusByURI(), except that it requires a pre
|
||||
|
@ -211,9 +203,6 @@ func (d *Dereferencer) RefreshStatus(
|
|||
status,
|
||||
statusable,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
if statusable != nil {
|
||||
// Deref parents + children.
|
||||
|
@ -226,7 +215,7 @@ func (d *Dereferencer) RefreshStatus(
|
|||
)
|
||||
}
|
||||
|
||||
return latest, statusable, nil
|
||||
return latest, statusable, err
|
||||
}
|
||||
|
||||
// RefreshStatusAsync is functionally equivalent to RefreshStatus(), except that ALL
|
||||
|
@ -275,9 +264,10 @@ func (d *Dereferencer) RefreshStatusAsync(
|
|||
})
|
||||
}
|
||||
|
||||
// enrichStatusSafely wraps enrichStatus() to perform
|
||||
// it within the State{}.FedLocks mutexmap, which protects
|
||||
// dereferencing actions with per-URI mutex locks.
|
||||
// enrichStatusSafely wraps enrichStatus() to perform it within
|
||||
// a State{}.FedLocks mutexmap, which protects it within per-URI
|
||||
// mutex locks. This also handles necessary delete of now-deleted
|
||||
// statuses, and updating fetched_at on returned HTTP errors.
|
||||
func (d *Dereferencer) enrichStatusSafely(
|
||||
ctx context.Context,
|
||||
requestUser string,
|
||||
|
@ -307,25 +297,30 @@ func (d *Dereferencer) enrichStatusSafely(
|
|||
defer unlock()
|
||||
|
||||
// Perform status enrichment with passed vars.
|
||||
latest, apubStatus, err := d.enrichStatus(ctx,
|
||||
latest, statusable, err := d.enrichStatus(ctx,
|
||||
requestUser,
|
||||
uri,
|
||||
status,
|
||||
statusable,
|
||||
)
|
||||
|
||||
if gtserror.StatusCode(err) >= 400 {
|
||||
if isNew {
|
||||
// This was a new status enrich
|
||||
// attempt which failed before we
|
||||
// got to store it, so we can't
|
||||
// return anything useful.
|
||||
return nil, nil, isNew, err
|
||||
// Check for a returned HTTP code via error.
|
||||
switch code := gtserror.StatusCode(err); {
|
||||
|
||||
// Gone (410) definitely indicates deletion.
|
||||
// Remove status if it was an existing one.
|
||||
case code == http.StatusGone && !isNew:
|
||||
if err := d.state.DB.DeleteStatusByID(ctx, status.ID); err != nil {
|
||||
log.Error(ctx, "error deleting gone status %s: %v", uriStr, err)
|
||||
}
|
||||
|
||||
// We had this status stored already
|
||||
// before this enrichment attempt.
|
||||
//
|
||||
// Don't return any status.
|
||||
return nil, nil, false, err
|
||||
|
||||
// Any other HTTP error mesg
|
||||
// code, with existing status.
|
||||
case code >= 400 && !isNew:
|
||||
|
||||
// Update fetched_at to slow re-attempts
|
||||
// but don't return early. We can still
|
||||
// return the model we had stored already.
|
||||
|
@ -333,6 +328,16 @@ func (d *Dereferencer) enrichStatusSafely(
|
|||
if err := d.state.DB.UpdateStatus(ctx, status, "fetched_at"); err != nil {
|
||||
log.Error(ctx, "error updating %s fetched_at: %v", uriStr, err)
|
||||
}
|
||||
|
||||
// See below.
|
||||
fallthrough
|
||||
|
||||
// In case of error with an existing
|
||||
// status in the database, return error
|
||||
// but still return existing status.
|
||||
case err != nil && !isNew:
|
||||
latest = status
|
||||
statusable = nil
|
||||
}
|
||||
|
||||
// Unlock now
|
||||
|
@ -340,11 +345,6 @@ func (d *Dereferencer) enrichStatusSafely(
|
|||
unlock()
|
||||
|
||||
if errors.Is(err, db.ErrAlreadyExists) {
|
||||
// Ensure AP model isn't set,
|
||||
// otherwise this indicates WE
|
||||
// enriched the status.
|
||||
apubStatus = nil
|
||||
|
||||
// We leave 'isNew' set so that caller
|
||||
// still dereferences parents, otherwise
|
||||
// the version we pass back may not have
|
||||
|
@ -362,7 +362,7 @@ func (d *Dereferencer) enrichStatusSafely(
|
|||
}
|
||||
}
|
||||
|
||||
return latest, apubStatus, isNew, err
|
||||
return latest, statusable, isNew, err
|
||||
}
|
||||
|
||||
// enrichStatus will enrich the given status, whether a new
|
||||
|
@ -374,7 +374,11 @@ func (d *Dereferencer) enrichStatus(
|
|||
uri *url.URL,
|
||||
status *gtsmodel.Status,
|
||||
apubStatus ap.Statusable,
|
||||
) (*gtsmodel.Status, ap.Statusable, error) {
|
||||
) (
|
||||
*gtsmodel.Status,
|
||||
ap.Statusable,
|
||||
error,
|
||||
) {
|
||||
// Pre-fetch a transport for requesting username, used by later dereferencing.
|
||||
tsport, err := d.transportController.NewTransportForUsername(ctx, requestUser)
|
||||
if err != nil {
|
||||
|
@ -385,7 +389,7 @@ func (d *Dereferencer) enrichStatus(
|
|||
if blocked, err := d.state.DB.IsDomainBlocked(ctx, uri.Host); err != nil {
|
||||
return nil, nil, gtserror.Newf("error checking blocked domain: %w", err)
|
||||
} else if blocked {
|
||||
err = gtserror.Newf("%s is blocked", uri.Host)
|
||||
err := gtserror.Newf("%s is blocked", uri.Host)
|
||||
return nil, nil, gtserror.SetUnretrievable(err)
|
||||
}
|
||||
|
||||
|
@ -406,7 +410,7 @@ func (d *Dereferencer) enrichStatus(
|
|||
if err != nil {
|
||||
// ResolveStatusable will set gtserror.WrongType
|
||||
// on the returned error, so we don't need to do it here.
|
||||
err = gtserror.Newf("error resolving statusable %s: %w", uri, err)
|
||||
err := gtserror.Newf("error resolving statusable %s: %w", uri, err)
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
|
@ -448,11 +452,14 @@ func (d *Dereferencer) enrichStatus(
|
|||
// Ensure we have the author account of the status dereferenced (+ up-to-date). If this is a new status
|
||||
// (i.e. status.AccountID == "") then any error here is irrecoverable. status.AccountID must ALWAYS be set.
|
||||
if _, _, err := d.getAccountByURI(ctx, requestUser, attributedTo); err != nil && status.AccountID == "" {
|
||||
return nil, nil, gtserror.Newf("failed to dereference status author %s: %w", uri, err)
|
||||
|
||||
// Note that we specifically DO NOT wrap the error, instead collapsing it as string.
|
||||
// Errors fetching an account do not necessarily relate to dereferencing the status.
|
||||
return nil, nil, gtserror.Newf("failed to dereference status author %s: %v", uri, err)
|
||||
}
|
||||
|
||||
// ActivityPub model was recently dereferenced, so assume that passed status
|
||||
// may contain out-of-date information, convert AP model to our GTS model.
|
||||
// ActivityPub model was recently dereferenced, so assume passed status
|
||||
// may contain out-of-date information. Convert AP model to our GTS model.
|
||||
latestStatus, err := d.converter.ASStatusToStatus(ctx, apubStatus)
|
||||
if err != nil {
|
||||
return nil, nil, gtserror.Newf("error converting statusable to gts model for status %s: %w", uri, err)
|
||||
|
@ -603,11 +610,11 @@ func (d *Dereferencer) fetchStatusMentions(
|
|||
err error
|
||||
)
|
||||
|
||||
mention, alreadyExists, err = d.populateMentionTarget(
|
||||
ctx,
|
||||
// Search existing status for a mention already stored,
|
||||
// else ensure new mention's target account is populated.
|
||||
mention, alreadyExists, err = d.getPopulatedMention(ctx,
|
||||
requestUser,
|
||||
existing,
|
||||
status,
|
||||
mention,
|
||||
)
|
||||
if err != nil {
|
||||
|
@ -984,7 +991,7 @@ func (d *Dereferencer) fetchStatusEmojis(
|
|||
return nil
|
||||
}
|
||||
|
||||
// populateMentionTarget tries to populate the given
|
||||
// getPopulatedMention tries to populate the given
|
||||
// mention with the correct TargetAccount and (if not
|
||||
// yet set) TargetAccountURI, returning the populated
|
||||
// mention.
|
||||
|
@ -996,11 +1003,10 @@ func (d *Dereferencer) fetchStatusEmojis(
|
|||
// Otherwise, this function will try to parse first
|
||||
// the Href of the mention, and then the namestring,
|
||||
// to see who it targets, and go fetch that account.
|
||||
func (d *Dereferencer) populateMentionTarget(
|
||||
func (d *Dereferencer) getPopulatedMention(
|
||||
ctx context.Context,
|
||||
requestUser string,
|
||||
existing *gtsmodel.Status,
|
||||
status *gtsmodel.Status,
|
||||
mention *gtsmodel.Mention,
|
||||
) (
|
||||
*gtsmodel.Mention,
|
||||
|
@ -1010,8 +1016,8 @@ func (d *Dereferencer) populateMentionTarget(
|
|||
// Mentions can be created using Name or Href.
|
||||
// Prefer Href (TargetAccountURI), fall back to Name.
|
||||
if mention.TargetAccountURI != "" {
|
||||
// Look for existing mention with this URI.
|
||||
// If we already have it we can return early.
|
||||
|
||||
// Look for existing mention with target account's URI, if so use this.
|
||||
existingMention, ok := existing.GetMentionByTargetURI(mention.TargetAccountURI)
|
||||
if ok && existingMention.ID != "" {
|
||||
return existingMention, true, nil
|
||||
|
@ -1020,33 +1026,47 @@ func (d *Dereferencer) populateMentionTarget(
|
|||
// Ensure that mention account URI is parseable.
|
||||
accountURI, err := url.Parse(mention.TargetAccountURI)
|
||||
if err != nil {
|
||||
err = gtserror.Newf("invalid account uri %q: %w", mention.TargetAccountURI, err)
|
||||
err := gtserror.Newf("invalid account uri %q: %w", mention.TargetAccountURI, err)
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
// Ensure we have the account of the mention target dereferenced.
|
||||
mention.TargetAccount, _, err = d.getAccountByURI(ctx, requestUser, accountURI)
|
||||
// Ensure we have account of the mention target dereferenced.
|
||||
mention.TargetAccount, _, err = d.getAccountByURI(ctx,
|
||||
requestUser,
|
||||
accountURI,
|
||||
)
|
||||
if err != nil {
|
||||
err = gtserror.Newf("failed to dereference account %s: %w", accountURI, err)
|
||||
err := gtserror.Newf("failed to dereference account %s: %w", accountURI, err)
|
||||
return nil, false, err
|
||||
}
|
||||
} else {
|
||||
// Href wasn't set. Find the target account using namestring.
|
||||
|
||||
// Href wasn't set, extract the username and domain parts from namestring.
|
||||
username, domain, err := util.ExtractNamestringParts(mention.NameString)
|
||||
if err != nil {
|
||||
err = gtserror.Newf("failed to parse namestring %s: %w", mention.NameString, err)
|
||||
err := gtserror.Newf("failed to parse namestring %s: %w", mention.NameString, err)
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
mention.TargetAccount, _, err = d.getAccountByUsernameDomain(ctx, requestUser, username, domain)
|
||||
// Look for existing mention with username domain target, if so use this.
|
||||
existingMention, ok := existing.GetMentionByUsernameDomain(username, domain)
|
||||
if ok && existingMention.ID != "" {
|
||||
return existingMention, true, nil
|
||||
}
|
||||
|
||||
// Ensure we have the account of the mention target dereferenced.
|
||||
mention.TargetAccount, _, err = d.getAccountByUsernameDomain(ctx,
|
||||
requestUser,
|
||||
username,
|
||||
domain,
|
||||
)
|
||||
if err != nil {
|
||||
err = gtserror.Newf("failed to dereference account %s: %w", mention.NameString, err)
|
||||
err := gtserror.Newf("failed to dereference account %s: %w", mention.NameString, err)
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
// Look for existing mention with this URI.
|
||||
mention.TargetAccountURI = mention.TargetAccount.URI
|
||||
existingMention, ok := existing.GetMentionByTargetURI(mention.TargetAccountURI)
|
||||
// Look for existing mention with target account's URI, if so use this.
|
||||
existingMention, ok = existing.GetMentionByTargetURI(mention.TargetAccountURI)
|
||||
if ok && existingMention.ID != "" {
|
||||
return existingMention, true, nil
|
||||
}
|
||||
|
|
|
@ -237,7 +237,7 @@ func (d *Dereferencer) DereferenceStatusDescendants(ctx context.Context, usernam
|
|||
|
||||
// Keep track of already dereferenced collection
|
||||
// pages for this thread to prevent recursion.
|
||||
derefdPages := make(map[string]struct{}, 10)
|
||||
derefdPages := make(map[string]struct{}, 16)
|
||||
|
||||
// frame represents a single stack frame when
|
||||
// iteratively derefencing status descendants.
|
||||
|
|
|
@ -49,15 +49,16 @@ type Mention struct {
|
|||
//
|
||||
// This will not be put in the database, it's just for convenience.
|
||||
NameString string `bun:"-"`
|
||||
|
||||
// TargetAccountURI is the AP ID (uri) of the user mentioned.
|
||||
//
|
||||
// This will not be put in the database, it's just for convenience.
|
||||
TargetAccountURI string `bun:"-"`
|
||||
|
||||
// TargetAccountURL is the web url of the user mentioned.
|
||||
//
|
||||
// This will not be put in the database, it's just for convenience.
|
||||
TargetAccountURL string `bun:"-"`
|
||||
// A pointer to the gtsmodel account of the mentioned account.
|
||||
}
|
||||
|
||||
// ParseMentionFunc describes a function that takes a lowercase account namestring
|
||||
|
|
|
@ -184,6 +184,35 @@ func (s *Status) GetMentionByTargetURI(uri string) (*Mention, bool) {
|
|||
return nil, false
|
||||
}
|
||||
|
||||
// GetMentionByUsernameDomain fetches the Mention associated with given
|
||||
// username and domains, typically extracted from a mention Namestring.
|
||||
func (s *Status) GetMentionByUsernameDomain(username, domain string) (*Mention, bool) {
|
||||
for _, mention := range s.Mentions {
|
||||
|
||||
// We can only check if target
|
||||
// account is set on the mention.
|
||||
account := mention.TargetAccount
|
||||
if account == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Usernames must always match.
|
||||
if account.Username != username {
|
||||
continue
|
||||
}
|
||||
|
||||
// Finally, either domains must
|
||||
// match or an empty domain may
|
||||
// be permitted if account local.
|
||||
if account.Domain == domain ||
|
||||
(domain == "" && account.IsLocal()) {
|
||||
return mention, true
|
||||
}
|
||||
}
|
||||
|
||||
return nil, false
|
||||
}
|
||||
|
||||
// GetTagByName searches status for Tag{} with name.
|
||||
func (s *Status) GetTagByName(name string) (*Tag, bool) {
|
||||
for _, tag := range s.Tags {
|
||||
|
|
Loading…
Reference in New Issue