From a3aa6042d7516b230a3eaac7b65c92202fa60a34 Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Fri, 26 Jan 2024 14:17:10 +0100 Subject: [PATCH] [bugfix] Don't return Account or Status if new and dereferencing failed, other small fixes (#2563) * tidy up account, status, webfingering logic a wee bit * go fmt * invert published check * alter resp initialization * get Published from account in typeutils * don't instantiate error for no darn good reason * shadow err * don't repeat error codes in wrapped errors * don't wrap error unnecessarily --- internal/ap/interfaces.go | 1 + internal/federation/dereferencing/account.go | 190 ++++++++++++++----- internal/federation/dereferencing/finger.go | 105 +++++++--- internal/federation/dereferencing/status.go | 48 +++-- internal/gtsmodel/account.go | 6 + internal/regexes/regexes.go | 33 ++-- internal/typeutils/astointernal.go | 14 +- internal/typeutils/astointernal_test.go | 1 + internal/typeutils/converter_test.go | 2 + internal/uris/uri.go | 16 ++ internal/util/namestring.go | 185 ++++++++++++------ internal/util/namestring_test.go | 70 +++---- 12 files changed, 461 insertions(+), 210 deletions(-) diff --git a/internal/ap/interfaces.go b/internal/ap/interfaces.go index f548dff0b..45ddbfba7 100644 --- a/internal/ap/interfaces.go +++ b/internal/ap/interfaces.go @@ -163,6 +163,7 @@ type Accountable interface { WithManuallyApprovesFollowers WithEndpoints WithTag + WithPublished } // Statusable represents the minimum activitypub interface for representing a 'status'. diff --git a/internal/federation/dereferencing/account.go b/internal/federation/dereferencing/account.go index dcb6116d2..067203780 100644 --- a/internal/federation/dereferencing/account.go +++ b/internal/federation/dereferencing/account.go @@ -52,7 +52,7 @@ func accountUpToDate(account *gtsmodel.Account, force bool) bool { return true } - if !account.CreatedAt.IsZero() && account.IsInstance() { + if account.IsInstance() && !account.IsNew() { // Existing instance account. No need for update. return true } @@ -232,8 +232,8 @@ func (d *Dereferencer) getAccountByUsernameDomain( // Create and pass-through a new bare-bones model for dereferencing. account, accountable, err := d.enrichAccountSafely(ctx, requestUser, nil, >smodel.Account{ ID: id.NewULID(), - Username: username, Domain: domain, + Username: username, }, nil) if err != nil { return nil, nil, err @@ -372,17 +372,18 @@ func (d *Dereferencer) enrichAccountSafely( // By default use account.URI // as the per-URI deref lock. - uriStr := account.URI - - if uriStr == "" { + var lockKey string + if account.URI != "" { + lockKey = account.URI + } else { // No URI is set yet, instead generate a faux-one from user+domain. - uriStr = "https://" + account.Domain + "/user/" + account.Username + lockKey = "https://" + account.Domain + "/users/" + account.Username } // Acquire per-URI deref lock, wraping unlock // to safely defer in case of panic, while still // performing more granular unlocks when needed. - unlock := d.state.FedLocks.Lock(uriStr) + unlock := d.state.FedLocks.Lock(lockKey) unlock = doOnce(unlock) defer unlock() @@ -394,10 +395,30 @@ func (d *Dereferencer) enrichAccountSafely( accountable, ) - if gtserror.StatusCode(err) >= 400 { - // Update fetch-at to slow re-attempts. + if code := gtserror.StatusCode(err); code >= 400 { + // No matter what, log the error + // so instance admins have an idea + // why something isn't working. + log.Info(ctx, err) + + if account.IsNew() { + // This was a new account enrich + // attempt which failed before we + // got to store it, so we can't + // return anything useful. + return nil, nil, err + } + + // We had this account stored already + // before this enrichment attempt. + // + // Update fetched_at to slow re-attempts + // but don't return early. We can still + // return the model we had stored already. account.FetchedAt = time.Now() - _ = d.state.DB.UpdateAccount(ctx, account, "fetched_at") + if err := d.state.DB.UpdateAccount(ctx, account, "fetched_at"); err != nil { + log.Errorf(ctx, "error updating account fetched_at: %v", err) + } } // Unlock now @@ -414,7 +435,7 @@ func (d *Dereferencer) enrichAccountSafely( // in a call to db.Put(Account). Look again in DB by URI. latest, err = d.state.DB.GetAccountByURI(ctx, account.URI) if err != nil { - err = gtserror.Newf("error getting account %s from database after race: %w", uriStr, err) + err = gtserror.Newf("error getting account %s from database after race: %w", lockKey, err) } } @@ -440,32 +461,44 @@ func (d *Dereferencer) enrichAccount( if account.Username != "" { // A username was provided so we can attempt a webfinger, this ensures up-to-date accountdomain info. accDomain, accURI, err := d.fingerRemoteAccount(ctx, tsport, account.Username, account.Domain) - if err != nil { - if account.URI == "" { - // this is a new account (to us) with username@domain - // but failed webfinger, nothing more we can do. - err := gtserror.Newf("error webfingering account: %w", err) - return nil, nil, gtserror.SetUnretrievable(err) + switch { + + case err != nil && account.URI == "": + // This is a new account (to us) with username@domain + // but failed webfinger, nothing more we can do. + err := gtserror.Newf("error webfingering account: %w", err) + return nil, nil, gtserror.SetUnretrievable(err) + + case err != nil: + // Simply log this error and move on, + // we already have an account URI. + log.Errorf(ctx, + "error webfingering[1] remote account %s@%s: %v", + account.Username, account.Domain, err, + ) + + case err == nil && account.Domain != accDomain: + // After webfinger, we now have correct account domain from which we can do a final DB check. + alreadyAcct, err := d.state.DB.GetAccountByUsernameDomain(ctx, account.Username, accDomain) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return nil, nil, gtserror.Newf("db err looking for account again after webfinger: %w", err) } - // Simply log this error and move on, we already have an account URI. - log.Errorf(ctx, "error webfingering[1] remote account %s@%s: %v", account.Username, account.Domain, err) - } - - if err == nil { - if account.Domain != accDomain { - // After webfinger, we now have correct account domain from which we can do a final DB check. - alreadyAccount, err := d.state.DB.GetAccountByUsernameDomain(ctx, account.Username, accDomain) - if err != nil && !errors.Is(err, db.ErrNoEntries) { - return nil, nil, gtserror.Newf("db err looking for account again after webfinger: %w", err) - } - - if alreadyAccount != nil { - // Enrich existing account. - account = alreadyAccount - } + if alreadyAcct != nil { + // We had this account stored under + // the discovered accountDomain. + // Proceed with this account. + account = alreadyAcct } + // Whether we had the account or not, we + // now have webfinger info relevant to the + // account, so fallthrough to set webfinger + // info on either the account we just found, + // or the stub account we were passed. + fallthrough + + case err == nil: // Update account with latest info. account.URI = accURI.String() account.Domain = accDomain @@ -474,13 +507,31 @@ func (d *Dereferencer) enrichAccount( } if uri == nil { - // No URI provided / found, must parse from account. + // No URI provided / found, + // must parse from account. uri, err = url.Parse(account.URI) if err != nil { - return nil, nil, gtserror.Newf("invalid uri %q: %w", account.URI, err) + return nil, nil, gtserror.Newf( + "invalid uri %q: %w", + account.URI, gtserror.SetUnretrievable(err), + ) + } + + if uri.Scheme != "http" && uri.Scheme != "https" { + err = errors.New("account URI scheme must be http or https") + return nil, nil, gtserror.Newf( + "invalid uri %q: %w", + account.URI, gtserror.SetUnretrievable(err), + ) } } + /* + BY THIS POINT we must have an account URI set, + either provided, pinned to the account, or + obtained via webfinger call. + */ + // Check whether this account URI is a blocked domain / subdomain. if blocked, err := d.state.DB.IsDomainBlocked(ctx, uri.Host); err != nil { return nil, nil, gtserror.Newf("error checking blocked domain: %w", err) @@ -493,27 +544,45 @@ func (d *Dereferencer) enrichAccount( defer d.stopHandshake(requestUser, uri) if apubAcc == nil { + // We were not given any (partial) ActivityPub + // version of this account as a parameter. // Dereference latest version of the account. b, err := tsport.Dereference(ctx, uri) if err != nil { - err := gtserror.Newf("error deferencing %s: %w", uri, err) + err := gtserror.Newf("error dereferencing %s: %w", uri, err) return nil, nil, gtserror.SetUnretrievable(err) } // Attempt to resolve ActivityPub acc from data. apubAcc, err = ap.ResolveAccountable(ctx, b) if err != nil { - return nil, nil, gtserror.Newf("error resolving accountable from data for account %s: %w", uri, err) + // ResolveAccountable will set Unretrievable/WrongType + // on the returned error, so we don't need to do it here. + err = gtserror.Newf("error resolving accountable from data for account %s: %w", uri, err) + return nil, nil, err } } + /* + BY THIS POINT we must have the ActivityPub + representation of the account, either provided, + or obtained via a dereference call. + */ + // Convert the dereferenced AP account object to our GTS model. + // + // We put this in the variable latestAcc because we might want + // to compare the provided account model with this fresh version, + // in order to check if anything changed since we last saw it. latestAcc, err := d.converter.ASRepresentationToAccount(ctx, apubAcc, account.Domain, ) if err != nil { - return nil, nil, gtserror.Newf("error converting accountable to gts model for account %s: %w", uri, err) + // ASRepresentationToAccount will set Malformed on the + // returned error, so we don't need to do it here. + err = gtserror.Newf("error converting accountable to gts model for account %s: %w", uri, err) + return nil, nil, err } if account.Username == "" { @@ -528,14 +597,15 @@ func (d *Dereferencer) enrichAccount( // from example.org to somewhere.else: we want to take somewhere.else // as the accountDomain then, not the example.org we were redirected from. - // Assume the host from the returned ActivityPub representation. - idProp := apubAcc.GetJSONLDId() - if idProp == nil || !idProp.IsIRI() { + // Assume the host from the returned + // ActivityPub representation. + id := ap.GetJSONLDId(apubAcc) + if id == nil { return nil, nil, gtserror.New("no id property found on person, or id was not an iri") } // Get IRI host value. - accHost := idProp.GetIRI().Host + accHost := id.Host latestAcc.Domain, _, err = d.fingerRemoteAccount(ctx, tsport, @@ -553,7 +623,7 @@ func (d *Dereferencer) enrichAccount( } if latestAcc.Domain == "" { - // ensure we have a domain set by this point, + // Ensure we have a domain set by this point, // otherwise it gets stored as a local user! // // TODO: there is probably a more granular way @@ -562,7 +632,16 @@ func (d *Dereferencer) enrichAccount( return nil, nil, gtserror.Newf("empty domain for %s", uri) } - // Ensure ID is set and update fetch time. + /* + BY THIS POINT we have more or less a fullly-formed + representation of the target account, derived from + a combination of webfinger lookups and dereferencing. + Further fetching beyond this point is for peripheral + things like account avatar, header, emojis. + */ + + // Ensure internal db ID is + // set and update fetch time. latestAcc.ID = account.ID latestAcc.FetchedAt = time.Now() @@ -581,12 +660,14 @@ func (d *Dereferencer) enrichAccount( log.Errorf(ctx, "error fetching remote emojis for account %s: %v", uri, err) } - if account.CreatedAt.IsZero() { - // CreatedAt will be zero if no local copy was - // found in one of the GetAccountBy___() functions. - // - // Set time of creation from the last-fetched date. - latestAcc.CreatedAt = latestAcc.FetchedAt + if account.IsNew() { + // Prefer published/created time from + // apubAcc, fall back to FetchedAt value. + if latestAcc.CreatedAt.IsZero() { + latestAcc.CreatedAt = latestAcc.FetchedAt + } + + // Set time of update from the last-fetched date. latestAcc.UpdatedAt = latestAcc.FetchedAt // This is new, put it in the database. @@ -595,11 +676,16 @@ func (d *Dereferencer) enrichAccount( return nil, nil, gtserror.Newf("error putting in database: %w", err) } } else { + // Prefer published time from apubAcc, + // fall back to previous stored value. + if latestAcc.CreatedAt.IsZero() { + latestAcc.CreatedAt = account.CreatedAt + } + // Set time of update from the last-fetched date. latestAcc.UpdatedAt = latestAcc.FetchedAt - // Use existing account values. - latestAcc.CreatedAt = account.CreatedAt + // Carry over existing account language. latestAcc.Language = account.Language // This is an existing account, update the model in the database. diff --git a/internal/federation/dereferencing/finger.go b/internal/federation/dereferencing/finger.go index a81afa5ea..514a058ba 100644 --- a/internal/federation/dereferencing/finger.go +++ b/internal/federation/dereferencing/finger.go @@ -20,54 +20,109 @@ package dereferencing import ( "context" "encoding/json" - "errors" - "fmt" "net/url" "strings" apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model" + "github.com/superseriousbusiness/gotosocial/internal/gtserror" + "github.com/superseriousbusiness/gotosocial/internal/log" "github.com/superseriousbusiness/gotosocial/internal/transport" "github.com/superseriousbusiness/gotosocial/internal/util" ) -func (d *Dereferencer) fingerRemoteAccount(ctx context.Context, transport transport.Transport, targetUsername string, targetHost string) (accountDomain string, accountURI *url.URL, err error) { - b, err := transport.Finger(ctx, targetUsername, targetHost) +// fingerRemoteAccount performs a webfinger call for the +// given username and host, using the provided transport. +// +// The webfinger response will be parsed, and the subject +// domain and AP URI will be extracted and returned. +// +// In case the response cannot be parsed, or the response +// does not contain a valid subject string or AP URI, an +// error will be returned instead. +func (d *Dereferencer) fingerRemoteAccount( + ctx context.Context, + transport transport.Transport, + username string, + host string, +) ( + string, // discovered account domain + *url.URL, // discovered account URI + error, +) { + // Assemble target namestring for logging. + var target = "@" + username + "@" + host + + b, err := transport.Finger(ctx, username, host) if err != nil { - err = fmt.Errorf("fingerRemoteAccount: error fingering @%s@%s: %s", targetUsername, targetHost, err) - return + err = gtserror.Newf("error webfingering %s: %w", target, err) + return "", nil, err } - resp := &apimodel.WellKnownResponse{} - if err = json.Unmarshal(b, resp); err != nil { - err = fmt.Errorf("fingerRemoteAccount: could not unmarshal server response as WebfingerAccountResponse while dereferencing @%s@%s: %s", targetUsername, targetHost, err) - return + var resp apimodel.WellKnownResponse + if err := json.Unmarshal(b, &resp); err != nil { + err = gtserror.Newf("error parsing response as JSON for %s: %w", target, err) + return "", nil, err } if len(resp.Links) == 0 { - err = fmt.Errorf("fingerRemoteAccount: no links found in webfinger response %s", string(b)) - return + err = gtserror.Newf("no links found in response for %s", target) + return "", nil, err } if resp.Subject == "" { - err = fmt.Errorf("fingerRemoteAccount: no subject found in webfinger response %s", string(b)) - return + err = gtserror.Newf("no subject found in response for %s", target) + return "", nil, err } - _, accountDomain, err = util.ExtractWebfingerParts(resp.Subject) + _, accountDomain, err := util.ExtractWebfingerParts(resp.Subject) if err != nil { - err = fmt.Errorf("fingerRemoteAccount: error extracting webfinger subject parts: %s", err) + err = gtserror.Newf("error extracting subject parts for %s: %w", target, err) + return "", nil, err } - // look through the links for the first one that matches what we need - for _, l := range resp.Links { - if l.Rel == "self" && (strings.EqualFold(l.Type, "application/activity+json") || strings.EqualFold(l.Type, "application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\"")) { - if uri, thiserr := url.Parse(l.Href); thiserr == nil && (uri.Scheme == "http" || uri.Scheme == "https") { - // found it! - accountURI = uri - return - } + // Look through links for the first + // one that matches what we need: + // + // - Must be self link. + // - Must be AP type. + // - Valid https/http URI. + for _, link := range resp.Links { + if link.Rel != "self" { + // Not self link, ignore. + continue } + + if !strings.EqualFold(link.Type, "application/activity+json") && + !strings.EqualFold(link.Type, "application/ld+json; profile=\"https://www.w3.org/ns/activitystreams\"") { + // Not an AP type, ignore. + continue + } + + uri, err := url.Parse(link.Href) + if err != nil { + log.Warnf(ctx, + "invalid href for ActivityPub self link %s for %s: %v", + link.Href, target, err, + ) + + // Funky URL, ignore. + continue + } + + if uri.Scheme != "http" && uri.Scheme != "https" { + log.Warnf(ctx, + "invalid href for ActivityPub self link %s for %s: schema must be http or https", + link.Href, target, + ) + + // Can't handle this + // schema, ignore. + continue + } + + // All looks good, return happily! + return accountDomain, uri, nil } - return "", nil, errors.New("fingerRemoteAccount: no match found in webfinger response") + return "", nil, gtserror.Newf("no suitable self, AP-type link found in webfinger response for %s", target) } diff --git a/internal/federation/dereferencing/status.go b/internal/federation/dereferencing/status.go index 1eb7d6fdb..7dc22a354 100644 --- a/internal/federation/dereferencing/status.go +++ b/internal/federation/dereferencing/status.go @@ -294,10 +294,30 @@ func (d *Dereferencer) enrichStatusSafely( apubStatus, ) - if gtserror.StatusCode(err) >= 400 { - // Update fetch-at to slow re-attempts. + if code := gtserror.StatusCode(err); code >= 400 { + // No matter what, log the error + // so instance admins have an idea + // why something isn't working. + log.Info(ctx, err) + + if isNew { + // This was a new status enrich + // attempt which failed before we + // got to store it, so we can't + // return anything useful. + return nil, nil, isNew, err + } + + // We had this status stored already + // before this enrichment attempt. + // + // Update fetched_at to slow re-attempts + // but don't return early. We can still + // return the model we had stored already. status.FetchedAt = time.Now() - _ = d.state.DB.UpdateStatus(ctx, status, "fetched_at") + if err := d.state.DB.UpdateStatus(ctx, status, "fetched_at"); err != nil { + log.Errorf(ctx, "error updating status fetched_at: %v", err) + } } // Unlock now @@ -358,7 +378,7 @@ func (d *Dereferencer) enrichStatus( // Dereference latest version of the status. b, err := tsport.Dereference(ctx, uri) if err != nil { - err := gtserror.Newf("error deferencing %s: %w", uri, err) + err := gtserror.Newf("error dereferencing %s: %w", uri, err) return nil, nil, gtserror.SetUnretrievable(err) } @@ -388,16 +408,21 @@ func (d *Dereferencer) enrichStatus( return nil, nil, gtserror.Newf("error converting statusable to gts model for status %s: %w", uri, err) } - // Use existing status ID. - latestStatus.ID = status.ID - if latestStatus.ID == "" { - - // Generate new status ID from the provided creation date. + // Check if we've previously + // stored this status in the DB. + // If we have, it'll be ID'd. + var isNew = (status.ID == "") + if isNew { + // No ID, we haven't stored this status before. + // Generate new status ID from the status publication time. latestStatus.ID, err = id.NewULIDFromTime(latestStatus.CreatedAt) if err != nil { log.Errorf(ctx, "invalid created at date (falling back to 'now'): %v", err) latestStatus.ID = id.NewULID() // just use "now" } + } else { + // Reuse existing status ID. + latestStatus.ID = status.ID } // Carry-over values and set fetch time. @@ -436,10 +461,7 @@ func (d *Dereferencer) enrichStatus( return nil, nil, gtserror.Newf("error populating emojis for status %s: %w", uri, err) } - if status.CreatedAt.IsZero() { - // CreatedAt will be zero if no local copy was - // found in one of the GetStatusBy___() functions. - // + if isNew { // This is new, put the status in the database. err := d.state.DB.PutStatus(ctx, latestStatus) if err != nil { diff --git a/internal/gtsmodel/account.go b/internal/gtsmodel/account.go index 578d4c811..3c908cd04 100644 --- a/internal/gtsmodel/account.go +++ b/internal/gtsmodel/account.go @@ -94,6 +94,12 @@ func (a *Account) IsRemote() bool { return !a.IsLocal() } +// IsNew returns whether an account is "new" in the sense +// that it has not been previously stored in the database. +func (a *Account) IsNew() bool { + return a.CreatedAt.IsZero() +} + // IsInstance returns whether account is an instance internal actor account. func (a *Account) IsInstance() bool { if a.IsLocal() { diff --git a/internal/regexes/regexes.go b/internal/regexes/regexes.go index 347a8a98b..25fcfc03a 100644 --- a/internal/regexes/regexes.go +++ b/internal/regexes/regexes.go @@ -57,20 +57,22 @@ const ( Path parts / capture. */ - userPathPrefix = `^/?` + users + `/(` + usernameRelaxed + `)` - userPath = userPathPrefix + `$` - publicKeyPath = userPathPrefix + `/` + publicKey + `$` - inboxPath = userPathPrefix + `/` + inbox + `$` - outboxPath = userPathPrefix + `/` + outbox + `$` - followersPath = userPathPrefix + `/` + followers + `$` - followingPath = userPathPrefix + `/` + following + `$` - likedPath = userPathPrefix + `/` + liked + `$` - followPath = userPathPrefix + `/` + follow + `/(` + ulid + `)$` - likePath = userPathPrefix + `/` + liked + `/(` + ulid + `)$` - statusesPath = userPathPrefix + `/` + statuses + `/(` + ulid + `)$` - blockPath = userPathPrefix + `/` + blocks + `/(` + ulid + `)$` - reportPath = `^/?` + reports + `/(` + ulid + `)$` - filePath = `^/?(` + ulid + `)/([a-z]+)/([a-z]+)/(` + ulid + `)\.([a-z0-9]+)$` + userPathPrefix = `^/?` + users + `/(` + usernameRelaxed + `)` + userPath = userPathPrefix + `$` + userWebPathPrefix = `^/?` + `@(` + usernameRelaxed + `)` + userWebPath = userWebPathPrefix + `$` + publicKeyPath = userPathPrefix + `/` + publicKey + `$` + inboxPath = userPathPrefix + `/` + inbox + `$` + outboxPath = userPathPrefix + `/` + outbox + `$` + followersPath = userPathPrefix + `/` + followers + `$` + followingPath = userPathPrefix + `/` + following + `$` + likedPath = userPathPrefix + `/` + liked + `$` + followPath = userPathPrefix + `/` + follow + `/(` + ulid + `)$` + likePath = userPathPrefix + `/` + liked + `/(` + ulid + `)$` + statusesPath = userPathPrefix + `/` + statuses + `/(` + ulid + `)$` + blockPath = userPathPrefix + `/` + blocks + `/(` + ulid + `)$` + reportPath = `^/?` + reports + `/(` + ulid + `)$` + filePath = `^/?(` + ulid + `)/([a-z]+)/([a-z]+)/(` + ulid + `)\.([a-z0-9]+)$` ) var ( @@ -110,6 +112,9 @@ var ( // UserPath validates and captures the username part from eg /users/example_username. UserPath = regexp.MustCompile(userPath) + // UserWebPath validates and captures the username part from eg /@example_username. + UserWebPath = regexp.MustCompile(userWebPath) + // PublicKeyPath parses a path that validates and captures the username part from eg /users/example_username/main-key PublicKeyPath = regexp.MustCompile(publicKeyPath) diff --git a/internal/typeutils/astointernal.go b/internal/typeutils/astointernal.go index 5bcb35d75..bcddb27e5 100644 --- a/internal/typeutils/astointernal.go +++ b/internal/typeutils/astointernal.go @@ -77,6 +77,18 @@ func (c *Converter) ASRepresentationToAccount(ctx context.Context, accountable a return nil, gtserror.SetMalformed(err) } + // Extract published time if possible. + // + // This denotes original creation time + // of the account on the remote instance. + // + // Not every implementation uses this property; + // so don't bother warning if we can't find it. + if pub := ap.GetPublished(accountable); !pub.IsZero() { + acct.CreatedAt = pub + acct.UpdatedAt = pub + } + // Extract a preferred name (display name), fallback to username. if displayName := ap.ExtractName(accountable); displayName != "" { acct.DisplayName = displayName @@ -314,7 +326,7 @@ func (c *Converter) ASStatusToStatus(ctx context.Context, statusable ap.Statusab // status.Published // - // Extract published time for the boost, + // Extract published time for the status, // zero-time will fall back to db defaults. if pub := ap.GetPublished(statusable); !pub.IsZero() { status.CreatedAt = pub diff --git a/internal/typeutils/astointernal_test.go b/internal/typeutils/astointernal_test.go index 4be2434e5..86c40fc50 100644 --- a/internal/typeutils/astointernal_test.go +++ b/internal/typeutils/astointernal_test.go @@ -138,6 +138,7 @@ func (suite *ASToInternalTestSuite) TestParseGargron() { acct, err := suite.typeconverter.ASRepresentationToAccount(context.Background(), rep, "") suite.NoError(err) suite.Equal("https://mastodon.social/inbox", *acct.SharedInboxURI) + suite.Equal(int64(1458086400), acct.CreatedAt.Unix()) } func (suite *ASToInternalTestSuite) TestParseReplyWithMention() { diff --git a/internal/typeutils/converter_test.go b/internal/typeutils/converter_test.go index 8a0dcd0fd..38376c87a 100644 --- a/internal/typeutils/converter_test.go +++ b/internal/typeutils/converter_test.go @@ -279,6 +279,8 @@ const ( "url": "https://mastodon.social/@Gargron", "manuallyApprovesFollowers": false, "discoverable": true, + "indexable": true, + "published": "2016-03-16T00:00:00Z", "devices": "https://mastodon.social/users/Gargron/collections/devices", "alsoKnownAs": [ "https://tooting.ai/users/Gargron" diff --git a/internal/uris/uri.go b/internal/uris/uri.go index ca98b6416..d12e24fea 100644 --- a/internal/uris/uri.go +++ b/internal/uris/uri.go @@ -248,6 +248,11 @@ func IsUserPath(id *url.URL) bool { return regexes.UserPath.MatchString(id.Path) } +// IsUserWebPath returns true if the given URL path corresponds to eg /@example_username +func IsUserWebPath(id *url.URL) bool { + return regexes.UserWebPath.MatchString(id.Path) +} + // IsInboxPath returns true if the given URL path corresponds to eg /users/example_username/inbox func IsInboxPath(id *url.URL) bool { return regexes.InboxPath.MatchString(id.Path) @@ -326,6 +331,17 @@ func ParseUserPath(id *url.URL) (username string, err error) { return } +// ParseUserPath returns the username from a path such as /@example_username +func ParseUserWebPath(id *url.URL) (username string, err error) { + matches := regexes.UserWebPath.FindStringSubmatch(id.Path) + if len(matches) != 2 { + err = fmt.Errorf("expected 2 matches but matches length was %d", len(matches)) + return + } + username = matches[1] + return +} + // ParseInboxPath returns the username from a path such as /users/example_username/inbox func ParseInboxPath(id *url.URL) (username string, err error) { matches := regexes.InboxPath.FindStringSubmatch(id.Path) diff --git a/internal/util/namestring.go b/internal/util/namestring.go index e510fe43f..bb351c6e8 100644 --- a/internal/util/namestring.go +++ b/internal/util/namestring.go @@ -23,101 +23,168 @@ import ( "strings" "github.com/superseriousbusiness/gotosocial/internal/regexes" + "github.com/superseriousbusiness/gotosocial/internal/uris" ) // ExtractNamestringParts extracts the username test_user and // the domain example.org from a string like @test_user@example.org. // // If nothing is matched, it will return an error. -func ExtractNamestringParts(mention string) (username, host string, err error) { - matches := regexes.MentionName.FindStringSubmatch(mention) +func ExtractNamestringParts(namestring string) (username, host string, err error) { + matches := regexes.MentionName.FindStringSubmatch(namestring) switch len(matches) { case 2: return matches[1], "", nil case 3: return matches[1], matches[2], nil default: - return "", "", fmt.Errorf("couldn't match mention %s", mention) + return "", "", fmt.Errorf("couldn't match namestring %s", namestring) } } -// ExtractWebfingerParts returns the username and domain from either an -// account query or an actor URI. +// ExtractWebfingerParts returns the username and domain from the "subject" +// part of a webfinger response: either an account namestring or an actor URI. // -// All implementations in the wild generate webfinger account resource +// All AP implementations in the wild perform webfinger account resource // queries with the "acct" scheme and without a leading "@"" on the username. // This is also the format the "subject" in a webfinger response adheres to. // -// Despite this fact, we're being permissive about a single leading @. This -// makes a query for acct:user@domain.tld and acct:@user@domain.tld -// equivalent. But a query for acct:@@user@domain.tld will have its username -// returned with the @ prefix. +// Despite this fact, we're permissive about a single leading @. This makes +// a query for "acct:user@domain.tld" and "acct:@user@domain.tld" equivalent. // -// We also permit a resource of user@domain.tld or @user@domain.tld, without -// a scheme. In that case it gets interpreted as if it was using the "acct" -// scheme. +// We also permit a resource of "user@domain.tld" or "@user@domain.tld", without +// a scheme. In that case it gets interpreted as if it was using "acct:". // -// When parsing fails, an error is returned. -func ExtractWebfingerParts(webfinger string) (username, host string, err error) { - orig := webfinger - - u, oerr := url.ParseRequestURI(webfinger) - if oerr != nil { - // Most likely reason for failing to parse is if the "acct" scheme was - // missing but a :port was included. So try an extra time with the scheme. - u, err = url.ParseRequestURI("acct:" + webfinger) - if err != nil { - return "", "", fmt.Errorf("failed to parse %s with acct sheme: %w", orig, oerr) - } +// Will error if parsing fails, or if the extracted username or domain are empty. +func ExtractWebfingerParts(subject string) ( + string, // username + string, // domain + error, +) { + u, err := url.ParseRequestURI(subject) + if err != nil { + // Most likely reason for failing to parse is if + // the "acct" scheme was missing but a :port was + // included. So try an extra time with the scheme. + u, err = url.ParseRequestURI("acct:" + subject) + } + if err != nil { + return "", "", fmt.Errorf("failed to parse %s: %w", subject, err) } - if u.Scheme == "http" || u.Scheme == "https" { - return ExtractWebfingerPartsFromURI(u) + switch u.Scheme { + + // Subject looks like + // "https://example.org/users/whatever" + // or "https://example.org/@whatever". + case "http", "https": + return partsFromURI(u) + + // Subject looks like + // "acct:whatever@example.org" + // or "acct:@whatever@example.org". + case "acct": + // Pass string without "acct:" prefix. + return partsFromNamestring(u.Opaque) + + // Subject was probably a relative URL. + // Fail since we need the domain. + case "": + return "", "", fmt.Errorf("no scheme for resource %s", subject) } - if u.Scheme != "acct" { - return "", "", fmt.Errorf("unsupported scheme: %s for resource: %s", u.Scheme, orig) - } - - stripped := strings.TrimPrefix(u.Opaque, "@") - userDomain := strings.Split(stripped, "@") - if len(userDomain) != 2 { - return "", "", fmt.Errorf("failed to extract user and domain from: %s", orig) - } - return userDomain[0], userDomain[1], nil + return "", "", fmt.Errorf("unsupported scheme %s for resource %s", u.Scheme, subject) } -// ExtractWebfingerPartsFromURI returns the user and domain extracted from -// the passed in URI. The URI should be an actor URI. +// partsFromNamestring returns the username +// and host parts extracted from a passed-in actor +// namestring of the format "whatever@example.org". // -// The domain returned is the hostname, and the user will be extracted -// from either /@test_user or /users/test_user. These two paths match the -// "aliasses" we include in our webfinger response and are also present in -// our "links". -// -// Like with ExtractWebfingerParts, we're being permissive about a single -// leading @. -// -// Errors are returned in case we end up with an empty domain or username. -func ExtractWebfingerPartsFromURI(uri *url.URL) (username, host string, err error) { - host = uri.Host - if host == "" { - return "", "", fmt.Errorf("failed to extract domain from: %s", uri) +// The function returns an error if username or +// host cannot be extracted. +func partsFromNamestring(namestring string) ( + string, // username + string, // host + error, +) { + // Trim all leading "@" symbols, + // and then inject just one "@". + namestring = strings.TrimLeft(namestring, "@") + namestring = "@" + namestring + + username, host, err := ExtractNamestringParts(namestring) + if err != nil { + return "", "", err } - // strip any leading slashes - path := strings.TrimLeft(uri.Path, "/") - segs := strings.Split(path, "/") + if username == "" { + err := fmt.Errorf("failed to extract username from: %s", namestring) + return "", "", err + } + + if host == "" { + err := fmt.Errorf("failed to extract domain from: %s", namestring) + return "", "", err + } + + return username, host, nil +} + +// partsFromURI returns the username and host +// extracted from the passed in actor URI. +// +// The username will be extracted from one of +// the patterns "/@whatever" or "/users/whatever". +// These paths match the "aliases" and "links" +// we include in our own webfinger responses. +// +// This function tries to be permissive with +// regard to leading "@" symbols. Nevertheless, +// an error will be returned if username or host +// cannot be extracted. +func partsFromURI(uri *url.URL) ( + string, // username + string, // host + error, +) { + host := uri.Host + if host == "" { + err := fmt.Errorf("failed to extract domain from: %s", uri) + return "", "", err + } + + // Copy the URL, taking + // only the parts we need. + short := &url.URL{ + Path: uri.Path, + } + + // Try "/users/whatever". + username, err := uris.ParseUserPath(short) + if err == nil && username != "" { + return username, host, nil + } + + // Try "/@whatever" + username, err = uris.ParseUserWebPath(short) + if err == nil && username != "" { + return username, host, nil + } + + // Try some exotic fallbacks like + // "/users/@whatever", "/@@whatever", etc. + short.Path = strings.TrimLeft(short.Path, "/") + segs := strings.Split(short.Path, "/") if segs[0] == "users" { username = segs[1] } else { username = segs[0] } - username = strings.TrimPrefix(username, "@") - if username == "" { - return "", "", fmt.Errorf("failed to extract username from: %s", uri) + username = strings.TrimLeft(username, "@") + if username != "" { + return username, host, nil } - return + return "", "", fmt.Errorf("failed to extract username from: %s", uri) } diff --git a/internal/util/namestring_test.go b/internal/util/namestring_test.go index 09beaeb8a..2ce5af65c 100644 --- a/internal/util/namestring_test.go +++ b/internal/util/namestring_test.go @@ -18,7 +18,6 @@ package util_test import ( - "net/url" "testing" "github.com/stretchr/testify/suite" @@ -39,11 +38,26 @@ func (suite *NamestringSuite) TestExtractWebfingerParts() { {in: "stonerkitty.monster@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"}, {in: "stonerkitty.monster@stonerkitty.monster:8080", username: "stonerkitty.monster", domain: "stonerkitty.monster:8080"}, {in: "@stonerkitty.monster@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"}, - {in: "acct:@@stonerkitty.monster@stonerkitty.monster", err: "failed to extract user and domain from: acct:@@stonerkitty.monster@stonerkitty.monster"}, - {in: "acct:@stonerkitty.monster@@stonerkitty.monster", err: "failed to extract user and domain from: acct:@stonerkitty.monster@@stonerkitty.monster"}, - {in: "@@stonerkitty.monster@stonerkitty.monster", err: "failed to extract user and domain from: @@stonerkitty.monster@stonerkitty.monster"}, - {in: "@stonerkitty.monster@@stonerkitty.monster", err: "failed to extract user and domain from: @stonerkitty.monster@@stonerkitty.monster"}, - {in: "s3:stonerkitty.monster@stonerkitty.monster", err: "unsupported scheme: s3 for resource: s3:stonerkitty.monster@stonerkitty.monster"}, + {in: "acct:@@stonerkitty.monster@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"}, + {in: "acct:@stonerkitty.monster@@stonerkitty.monster", err: "couldn't match namestring @stonerkitty.monster@@stonerkitty.monster"}, + {in: "@@stonerkitty.monster@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"}, + {in: "@stonerkitty.monster@@stonerkitty.monster", err: "couldn't match namestring @stonerkitty.monster@@stonerkitty.monster"}, + {in: "s3:stonerkitty.monster@stonerkitty.monster", err: "unsupported scheme s3 for resource s3:stonerkitty.monster@stonerkitty.monster"}, + {in: "https://stonerkitty.monster/users/stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"}, + {in: "https://stonerkitty.monster/users/@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"}, + {in: "https://stonerkitty.monster/@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"}, + {in: "https://stonerkitty.monster/@@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"}, + {in: "https://stonerkitty.monster:8080/users/stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster:8080"}, + {in: "https://stonerkitty.monster/users/stonerkitty.monster/evil", username: "stonerkitty.monster", domain: "stonerkitty.monster"}, + {in: "https://stonerkitty.monster/@stonerkitty.monster/evil", username: "stonerkitty.monster", domain: "stonerkitty.monster"}, + {in: "/@stonerkitty.monster", err: "no scheme for resource /@stonerkitty.monster"}, + {in: "/users/stonerkitty.monster", err: "no scheme for resource /users/stonerkitty.monster"}, + {in: "@stonerkitty.monster", err: "failed to extract domain from: @stonerkitty.monster"}, + {in: "users/stonerkitty.monster", err: "couldn't match namestring @users/stonerkitty.monster"}, + {in: "https://stonerkitty.monster/users/", err: "failed to extract username from: https://stonerkitty.monster/users/"}, + {in: "https://stonerkitty.monster/users/@", err: "failed to extract username from: https://stonerkitty.monster/users/@"}, + {in: "https://stonerkitty.monster/@", err: "failed to extract username from: https://stonerkitty.monster/@"}, + {in: "https://stonerkitty.monster/", err: "failed to extract username from: https://stonerkitty.monster/"}, } for _, tt := range tests { @@ -56,45 +70,9 @@ func (suite *NamestringSuite) TestExtractWebfingerParts() { suite.Equal(tt.username, username) suite.Equal(tt.domain, domain) } else { - suite.EqualError(err, tt.err) - } - }) - } -} - -func (suite *NamestringSuite) TestExtractWebfingerPartsFromURI() { - tests := []struct { - in, username, domain, err string - }{ - {in: "https://stonerkitty.monster/users/stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"}, - {in: "https://stonerkitty.monster/users/@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"}, - {in: "https://stonerkitty.monster/@stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster"}, - {in: "https://stonerkitty.monster/@@stonerkitty.monster", username: "@stonerkitty.monster", domain: "stonerkitty.monster"}, - {in: "https://stonerkitty.monster:8080/users/stonerkitty.monster", username: "stonerkitty.monster", domain: "stonerkitty.monster:8080"}, - {in: "https://stonerkitty.monster/users/stonerkitty.monster/evil", username: "stonerkitty.monster", domain: "stonerkitty.monster"}, - {in: "https://stonerkitty.monster/@stonerkitty.monster/evil", username: "stonerkitty.monster", domain: "stonerkitty.monster"}, - {in: "/@stonerkitty.monster", err: "failed to extract domain from: /@stonerkitty.monster"}, - {in: "/users/stonerkitty.monster", err: "failed to extract domain from: /users/stonerkitty.monster"}, - {in: "@stonerkitty.monster", err: "failed to extract domain from: @stonerkitty.monster"}, - {in: "users/stonerkitty.monster", err: "failed to extract domain from: users/stonerkitty.monster"}, - {in: "https://stonerkitty.monster/users/", err: "failed to extract username from: https://stonerkitty.monster/users/"}, - {in: "https://stonerkitty.monster/users/@", err: "failed to extract username from: https://stonerkitty.monster/users/@"}, - {in: "https://stonerkitty.monster/@", err: "failed to extract username from: https://stonerkitty.monster/@"}, - {in: "https://stonerkitty.monster/", err: "failed to extract username from: https://stonerkitty.monster/"}, - } - - for _, tt := range tests { - tt := tt - suite.Run(tt.in, func() { - suite.T().Parallel() - uri, _ := url.Parse(tt.in) - username, domain, err := util.ExtractWebfingerPartsFromURI(uri) - if tt.err == "" { - suite.NoError(err) - suite.Equal(tt.username, username) - suite.Equal(tt.domain, domain) - } else { - suite.EqualError(err, tt.err) + if !suite.EqualError(err, tt.err) { + suite.T().Logf("expected error %s", tt.err) + } } }) } @@ -107,7 +85,7 @@ func (suite *NamestringSuite) TestExtractNamestring() { {in: "@stonerkitty.monster@stonerkitty.monster", username: "stonerkitty.monster", host: "stonerkitty.monster"}, {in: "@stonerkitty.monster", username: "stonerkitty.monster"}, {in: "@someone@somewhere", username: "someone", host: "somewhere"}, - {in: "", err: "couldn't match mention "}, + {in: "", err: "couldn't match namestring "}, } for _, tt := range tests {