2023-03-12 16:00:57 +01:00
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
2021-08-10 13:32:39 +02:00
package dereferencing
import (
"context"
"errors"
2023-05-12 11:15:54 +02:00
"io"
2021-08-10 13:32:39 +02:00
"net/url"
2023-05-12 11:15:54 +02:00
"time"
2021-08-10 13:32:39 +02:00
"github.com/superseriousbusiness/gotosocial/internal/ap"
2022-11-29 10:24:55 +01:00
"github.com/superseriousbusiness/gotosocial/internal/config"
2022-09-12 13:03:23 +02:00
"github.com/superseriousbusiness/gotosocial/internal/db"
2023-05-28 14:08:35 +02:00
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
2021-08-10 13:32:39 +02:00
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/id"
2022-07-19 10:47:55 +02:00
"github.com/superseriousbusiness/gotosocial/internal/log"
2022-01-09 18:41:22 +01:00
"github.com/superseriousbusiness/gotosocial/internal/media"
2023-03-01 18:52:44 +01:00
"github.com/superseriousbusiness/gotosocial/internal/transport"
2021-08-10 13:32:39 +02:00
)
2023-05-12 11:15:54 +02:00
// statusUpToDate returns whether the given status model is both updateable
// (i.e. remote status) and whether it needs an update based on `fetched_at`.
func statusUpToDate ( status * gtsmodel . Status ) bool {
if * status . Local {
// Can't update local statuses.
return true
2021-08-10 13:32:39 +02:00
}
2023-05-12 11:15:54 +02:00
// If this status was updated recently (last interval), we return as-is.
if next := status . FetchedAt . Add ( 2 * time . Hour ) ; time . Now ( ) . Before ( next ) {
return true
2022-11-15 19:45:15 +01:00
}
2023-05-12 11:15:54 +02:00
return false
2021-08-10 13:32:39 +02:00
}
2023-05-12 11:15:54 +02:00
// GetStatus: implements Dereferencer{}.GetStatus().
func ( d * deref ) GetStatusByURI ( ctx context . Context , requestUser string , uri * url . URL ) ( * gtsmodel . Status , ap . Statusable , error ) {
// Fetch and dereference status if necessary.
status , apubStatus , err := d . getStatusByURI ( ctx ,
requestUser ,
uri ,
)
if err != nil {
return nil , nil , err
}
if apubStatus != nil {
// This status was updated, enqueue re-dereferencing the whole thread.
d . state . Workers . Federator . MustEnqueueCtx ( ctx , func ( ctx context . Context ) {
d . dereferenceThread ( ctx , requestUser , uri , status , apubStatus )
} )
}
return status , apubStatus , nil
}
// getStatusByURI is a package internal form of .GetStatusByURI() that doesn't bother dereferencing the whole thread on update.
func ( d * deref ) getStatusByURI ( ctx context . Context , requestUser string , uri * url . URL ) ( * gtsmodel . Status , ap . Statusable , error ) {
var (
status * gtsmodel . Status
uriStr = uri . String ( )
err error
)
// Search the database for existing status with ID URI.
status , err = d . state . DB . GetStatusByURI ( ctx , uriStr )
if err != nil && ! errors . Is ( err , db . ErrNoEntries ) {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "error checking database for status %s by uri: %w" , uriStr , err )
2021-08-10 13:32:39 +02:00
}
2022-11-29 10:24:55 +01:00
if status == nil {
2023-05-12 11:15:54 +02:00
// Else, search the database for existing by ID URL.
status , err = d . state . DB . GetStatusByURL ( ctx , uriStr )
if err != nil && ! errors . Is ( err , db . ErrNoEntries ) {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "error checking database for status %s by url: %w" , uriStr , err )
2022-11-29 10:24:55 +01:00
}
2022-05-23 17:40:03 +02:00
}
2023-05-12 11:15:54 +02:00
if status == nil {
// Ensure that this is isn't a search for a local status.
if uri . Host == config . GetHost ( ) || uri . Host == config . GetAccountDomain ( ) {
return nil , nil , NewErrNotRetrievable ( err ) // this will be db.ErrNoEntries
2022-11-29 10:24:55 +01:00
}
2023-05-12 11:15:54 +02:00
// Create and pass-through a new bare-bones model for deref.
return d . enrichStatus ( ctx , requestUser , uri , & gtsmodel . Status {
Local : func ( ) * bool { var false bool ; return & false } ( ) ,
URI : uriStr ,
} , nil )
2022-11-29 10:24:55 +01:00
}
2023-05-12 11:15:54 +02:00
// Try to update + deref existing status model.
latest , apubStatus , err := d . enrichStatus ( ctx ,
requestUser ,
uri ,
status ,
nil ,
)
2023-03-01 18:52:44 +01:00
if err != nil {
2023-05-12 11:15:54 +02:00
log . Errorf ( ctx , "error enriching remote status: %v" , err )
2023-03-01 18:52:44 +01:00
2023-05-12 11:15:54 +02:00
// Update fetch-at to slow re-attempts.
status . FetchedAt = time . Now ( )
_ = d . state . DB . UpdateStatus ( ctx , status , "fetched_at" )
2022-11-29 10:24:55 +01:00
2023-05-12 11:15:54 +02:00
// Fallback to existing.
return status , nil , nil
2021-08-10 13:32:39 +02:00
}
2023-05-12 11:15:54 +02:00
return latest , apubStatus , nil
}
2021-08-10 13:32:39 +02:00
2023-05-12 11:15:54 +02:00
// RefreshStatus: implements Dereferencer{}.RefreshStatus().
func ( d * deref ) RefreshStatus ( ctx context . Context , requestUser string , status * gtsmodel . Status , apubStatus ap . Statusable , force bool ) ( * gtsmodel . Status , ap . Statusable , error ) {
// Check whether needs update.
if statusUpToDate ( status ) {
return status , nil , nil
2021-08-10 13:32:39 +02:00
}
2023-05-12 11:15:54 +02:00
// Parse the URI from status.
uri , err := url . Parse ( status . URI )
2021-08-10 13:32:39 +02:00
if err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "invalid status uri %q: %w" , status . URI , err )
2021-08-10 13:32:39 +02:00
}
2023-05-12 11:15:54 +02:00
// Try to update + deref existing status model.
latest , apubStatus , err := d . enrichStatus ( ctx ,
requestUser ,
uri ,
status ,
apubStatus ,
)
2022-05-23 17:40:03 +02:00
if err != nil {
2023-05-12 11:15:54 +02:00
return nil , nil , err
2022-05-23 17:40:03 +02:00
}
2021-08-10 13:32:39 +02:00
2023-05-12 11:15:54 +02:00
// This status was updated, enqueue re-dereferencing the whole thread.
d . state . Workers . Federator . MustEnqueueCtx ( ctx , func ( ctx context . Context ) {
d . dereferenceThread ( ctx , requestUser , uri , latest , apubStatus )
} )
2021-08-10 13:32:39 +02:00
2023-05-12 11:15:54 +02:00
return latest , apubStatus , nil
2021-08-10 13:32:39 +02:00
}
2023-05-12 11:15:54 +02:00
// RefreshStatusAsync: implements Dereferencer{}.RefreshStatusAsync().
func ( d * deref ) RefreshStatusAsync ( ctx context . Context , requestUser string , status * gtsmodel . Status , apubStatus ap . Statusable , force bool ) {
// Check whether needs update.
if statusUpToDate ( status ) {
return
2021-08-10 13:32:39 +02:00
}
2023-05-12 11:15:54 +02:00
// Parse the URI from status.
uri , err := url . Parse ( status . URI )
2021-08-10 13:32:39 +02:00
if err != nil {
2023-05-28 14:08:35 +02:00
log . Errorf ( ctx , "invalid status uri %q: %v" , status . URI , err )
2023-05-12 11:15:54 +02:00
return
2021-08-10 13:32:39 +02:00
}
2023-05-12 11:15:54 +02:00
// Enqueue a worker function to re-fetch this status async.
d . state . Workers . Federator . MustEnqueueCtx ( ctx , func ( ctx context . Context ) {
latest , apubStatus , err := d . enrichStatus ( ctx , requestUser , uri , status , apubStatus )
if err != nil {
log . Errorf ( ctx , "error enriching remote status: %v" , err )
return
}
// This status was updated, re-dereference the whole thread.
d . dereferenceThread ( ctx , requestUser , uri , latest , apubStatus )
} )
2021-08-10 13:32:39 +02:00
}
2023-05-12 11:15:54 +02:00
// enrichStatus will enrich the given status, whether a new barebones model, or existing model from the database. It handles necessary dereferencing etc.
func ( d * deref ) enrichStatus ( ctx context . Context , requestUser string , uri * url . URL , status * gtsmodel . Status , apubStatus ap . Statusable ) ( * gtsmodel . Status , ap . Statusable , error ) {
// Pre-fetch a transport for requesting username, used by later dereferencing.
tsport , err := d . transportController . NewTransportForUsername ( ctx , requestUser )
2021-08-10 13:32:39 +02:00
if err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "couldn't create transport: %w" , err )
2021-08-10 13:32:39 +02:00
}
2023-05-12 11:15:54 +02:00
// Check whether this account URI is a blocked domain / subdomain.
if blocked , err := d . state . DB . IsDomainBlocked ( ctx , uri . Host ) ; err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "error checking blocked domain: %w" , err )
2023-05-12 11:15:54 +02:00
} else if blocked {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "%s is blocked" , uri . Host )
2021-08-10 13:32:39 +02:00
}
2023-05-12 11:15:54 +02:00
var derefd bool
if apubStatus == nil {
// Dereference latest version of the status.
b , err := tsport . Dereference ( ctx , uri )
2021-08-10 13:32:39 +02:00
if err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , & ErrNotRetrievable { gtserror . Newf ( "error deferencing %s: %w" , uri , err ) }
2021-08-10 13:32:39 +02:00
}
2023-05-12 11:15:54 +02:00
// Attempt to resolve ActivityPub status from data.
apubStatus , err = ap . ResolveStatusable ( ctx , b )
if err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "error resolving statusable from data for account %s: %w" , uri , err )
2023-05-12 11:15:54 +02:00
}
2021-08-10 13:32:39 +02:00
2023-05-12 11:15:54 +02:00
// Mark as deref'd.
derefd = true
}
2021-08-10 13:32:39 +02:00
2023-05-12 11:15:54 +02:00
// Get the attributed-to status in order to fetch profile.
attributedTo , err := ap . ExtractAttributedTo ( apubStatus )
if err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . New ( "attributedTo was empty" )
2022-09-12 13:03:23 +02:00
}
2021-08-10 13:32:39 +02:00
2023-05-12 11:15:54 +02:00
// Ensure we have the author account of the status dereferenced (+ up-to-date).
if author , _ , err := d . getAccountByURI ( ctx , requestUser , attributedTo ) ; err != nil {
if status . AccountID == "" {
// Provided status account is nil, i.e. this is a new status / author, so a deref fail is unrecoverable.
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "failed to dereference status author %s: %w" , uri , err )
2023-05-12 11:15:54 +02:00
}
} else if status . AccountID != "" && status . AccountID != author . ID {
// There already existed an account for this status author, but account ID changed. This shouldn't happen!
log . Warnf ( ctx , "status author account ID changed: old=%s new=%s" , status . AccountID , author . ID )
2021-08-29 12:03:08 +02:00
}
2023-05-12 11:15:54 +02:00
// By default we assume that apubStatus has been passed,
// indicating that the given status is already latest.
latestStatus := status
if derefd {
// ActivityPub model was recently dereferenced, so assume that passed status
// may contain out-of-date information, convert AP model to our GTS model.
latestStatus , err = d . typeConverter . ASStatusToStatus ( ctx , apubStatus )
if err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "error converting statusable to gts model for status %s: %w" , uri , err )
2021-09-01 11:08:21 +02:00
}
2021-08-29 12:03:08 +02:00
}
2023-05-12 11:15:54 +02:00
// Use existing status ID.
latestStatus . ID = status . ID
2021-08-29 12:03:08 +02:00
2023-05-12 11:15:54 +02:00
if latestStatus . ID == "" {
// Generate new status ID from the provided creation date.
latestStatus . ID , err = id . NewULIDFromTime ( latestStatus . CreatedAt )
if err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "invalid created at date: %w" , err )
2021-08-10 13:32:39 +02:00
}
2023-05-12 11:15:54 +02:00
}
2021-08-10 13:32:39 +02:00
2023-05-12 11:15:54 +02:00
// Carry-over values and set fetch time.
latestStatus . FetchedAt = time . Now ( )
latestStatus . Local = status . Local
// Ensure the status' mentions are populated, and pass in existing to check for changes.
if err := d . fetchStatusMentions ( ctx , requestUser , status , latestStatus ) ; err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "error populating mentions for status %s: %w" , uri , err )
2023-05-12 11:15:54 +02:00
}
// TODO: populateStatusTags()
// Ensure the status' media attachments are populated, passing in existing to check for changes.
if err := d . fetchStatusAttachments ( ctx , tsport , status , latestStatus ) ; err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "error populating attachments for status %s: %w" , uri , err )
2023-05-12 11:15:54 +02:00
}
// Ensure the status' emoji attachments are populated, passing in existing to check for changes.
if err := d . fetchStatusEmojis ( ctx , requestUser , status , latestStatus ) ; err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "error populating emojis for status %s: %w" , uri , err )
2023-05-12 11:15:54 +02:00
}
if status . CreatedAt . IsZero ( ) {
// CreatedAt will be zero if no local copy was
// found in one of the GetStatusBy___() functions.
//
// This is new, put the status in the database.
err := d . state . DB . PutStatus ( ctx , latestStatus )
if errors . Is ( err , db . ErrAlreadyExists ) {
// TODO: replace this quick fix with per-URI deref locks.
latestStatus , err = d . state . DB . GetStatusByURI ( ctx , latestStatus . URI )
return latestStatus , nil , err
2021-08-10 13:32:39 +02:00
}
if err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "error putting in database: %w" , err )
2021-08-10 13:32:39 +02:00
}
2023-05-12 11:15:54 +02:00
} else {
// This is an existing status, update the model in the database.
if err := d . state . DB . UpdateStatus ( ctx , latestStatus ) ; err != nil {
2023-05-28 14:08:35 +02:00
return nil , nil , gtserror . Newf ( "error updating database: %w" , err )
2023-05-12 11:15:54 +02:00
}
}
return latestStatus , apubStatus , nil
}
2021-08-10 13:32:39 +02:00
2023-05-12 11:15:54 +02:00
func ( d * deref ) fetchStatusMentions ( ctx context . Context , requestUser string , existing * gtsmodel . Status , status * gtsmodel . Status ) error {
// Allocate new slice to take the yet-to-be created mention IDs.
status . MentionIDs = make ( [ ] string , len ( status . Mentions ) )
2021-08-29 12:03:08 +02:00
2023-05-12 11:15:54 +02:00
for i := range status . Mentions {
mention := status . Mentions [ i ]
2021-08-29 12:03:08 +02:00
2023-05-12 11:15:54 +02:00
// Look for existing mention with target account URI first.
existing , ok := existing . GetMentionByTargetURI ( mention . TargetAccountURI )
if ok && existing . ID != "" {
status . Mentions [ i ] = existing
status . MentionIDs [ i ] = existing . ID
continue
2021-08-29 12:03:08 +02:00
}
2023-05-12 11:15:54 +02:00
// Ensure that mention account URI is parseable.
accountURI , err := url . Parse ( mention . TargetAccountURI )
if err != nil {
log . Errorf ( ctx , "invalid account uri %q: %v" , mention . TargetAccountURI , err )
2021-08-20 12:26:56 +02:00
continue
}
2021-08-10 13:32:39 +02:00
2023-05-12 11:15:54 +02:00
// Ensure we have the account of the mention target dereferenced.
mention . TargetAccount , _ , err = d . getAccountByURI ( ctx , requestUser , accountURI )
2021-08-10 13:32:39 +02:00
if err != nil {
2023-05-12 11:15:54 +02:00
log . Errorf ( ctx , "failed to dereference account %s: %v" , accountURI , err )
continue
2021-08-20 12:26:56 +02:00
}
2023-05-12 11:15:54 +02:00
// Generate new ID according to status creation.
mention . ID , err = id . NewULIDFromTime ( status . CreatedAt )
if err != nil {
log . Errorf ( ctx , "invalid created at date: %v" , err )
mention . ID = id . NewULID ( ) // just use "now"
2021-08-10 13:32:39 +02:00
}
2023-05-12 11:15:54 +02:00
// Set known further mention details.
mention . CreatedAt = status . CreatedAt
mention . UpdatedAt = status . UpdatedAt
mention . OriginAccount = status . Account
mention . OriginAccountID = status . AccountID
mention . OriginAccountURI = status . AccountURI
mention . TargetAccountID = mention . TargetAccount . ID
mention . TargetAccountURI = mention . TargetAccount . URI
mention . TargetAccountURL = mention . TargetAccount . URL
mention . StatusID = status . ID
mention . Status = status
// Place the new mention into the database.
if err := d . state . DB . PutMention ( ctx , mention ) ; err != nil {
2023-05-28 14:08:35 +02:00
return gtserror . Newf ( "error putting mention in database: %w" , err )
2021-08-10 13:32:39 +02:00
}
2021-08-29 12:03:08 +02:00
2023-05-12 11:15:54 +02:00
// Set the *new* mention and ID.
status . Mentions [ i ] = mention
status . MentionIDs [ i ] = mention . ID
2021-08-10 13:32:39 +02:00
}
2021-08-29 12:03:08 +02:00
2023-05-12 11:15:54 +02:00
for i := 0 ; i < len ( status . MentionIDs ) ; i ++ {
if status . MentionIDs [ i ] == "" {
// This is a failed mention population, likely due
// to invalid incoming data / now-deleted accounts.
copy ( status . Mentions [ i : ] , status . Mentions [ i + 1 : ] )
copy ( status . MentionIDs [ i : ] , status . MentionIDs [ i + 1 : ] )
status . Mentions = status . Mentions [ : len ( status . Mentions ) - 1 ]
status . MentionIDs = status . MentionIDs [ : len ( status . MentionIDs ) - 1 ]
}
}
2021-08-10 13:32:39 +02:00
2021-08-29 12:03:08 +02:00
return nil
}
2023-05-12 11:15:54 +02:00
func ( d * deref ) fetchStatusAttachments ( ctx context . Context , tsport transport . Transport , existing * gtsmodel . Status , status * gtsmodel . Status ) error {
// Allocate new slice to take the yet-to-be fetched attachment IDs.
status . AttachmentIDs = make ( [ ] string , len ( status . Attachments ) )
for i := range status . Attachments {
placeholder := status . Attachments [ i ]
// Look for existing media attachment with remoet URL first.
existing , ok := existing . GetAttachmentByRemoteURL ( placeholder . RemoteURL )
if ok && existing . ID != "" {
status . Attachments [ i ] = existing
status . AttachmentIDs [ i ] = existing . ID
continue
}
// Ensure a valid media attachment remote URL.
remoteURL , err := url . Parse ( placeholder . RemoteURL )
if err != nil {
log . Errorf ( ctx , "invalid remote media url %q: %v" , placeholder . RemoteURL , err )
continue
}
// Start pre-processing remote media at remote URL.
processing , err := d . mediaManager . PreProcessMedia ( ctx , func ( ctx context . Context ) ( io . ReadCloser , int64 , error ) {
return tsport . DereferenceMedia ( ctx , remoteURL )
2023-05-28 14:08:35 +02:00
} , status . AccountID , & media . AdditionalMediaInfo {
2023-05-12 11:15:54 +02:00
StatusID : & status . ID ,
RemoteURL : & placeholder . RemoteURL ,
Description : & placeholder . Description ,
Blurhash : & placeholder . Blurhash ,
2022-01-09 18:41:22 +01:00
} )
2021-08-29 12:03:08 +02:00
if err != nil {
2023-05-12 11:15:54 +02:00
log . Errorf ( ctx , "error processing attachment: %v" , err )
2022-01-08 17:17:01 +01:00
continue
}
2023-05-12 11:15:54 +02:00
// Force attachment loading *right now*.
media , err := processing . LoadAttachment ( ctx )
2022-01-08 17:17:01 +01:00
if err != nil {
2023-05-12 11:15:54 +02:00
log . Errorf ( ctx , "error loading attachment: %v" , err )
2021-09-01 11:08:21 +02:00
continue
2021-08-29 12:03:08 +02:00
}
2023-05-12 11:15:54 +02:00
// Set the *new* attachment and ID.
status . Attachments [ i ] = media
status . AttachmentIDs [ i ] = media . ID
2021-08-29 12:03:08 +02:00
}
2023-05-12 11:15:54 +02:00
for i := 0 ; i < len ( status . AttachmentIDs ) ; i ++ {
if status . AttachmentIDs [ i ] == "" {
// This is a failed attachment population, this may
// be due to us not currently supporting a media type.
copy ( status . Attachments [ i : ] , status . Attachments [ i + 1 : ] )
copy ( status . AttachmentIDs [ i : ] , status . AttachmentIDs [ i + 1 : ] )
status . Attachments = status . Attachments [ : len ( status . Attachments ) - 1 ]
status . AttachmentIDs = status . AttachmentIDs [ : len ( status . AttachmentIDs ) - 1 ]
}
}
2021-08-29 12:03:08 +02:00
return nil
}
2023-05-12 11:15:54 +02:00
func ( d * deref ) fetchStatusEmojis ( ctx context . Context , requestUser string , existing * gtsmodel . Status , status * gtsmodel . Status ) error {
// Fetch the full-fleshed-out emoji objects for our status.
emojis , err := d . populateEmojis ( ctx , status . Emojis , requestUser )
2022-09-26 11:56:01 +02:00
if err != nil {
2023-05-28 14:08:35 +02:00
return gtserror . Newf ( "failed to populate emojis: %w" , err )
2022-09-26 11:56:01 +02:00
}
2022-09-12 13:03:23 +02:00
2023-05-12 11:15:54 +02:00
// Iterate over and get their IDs.
2022-09-26 11:56:01 +02:00
emojiIDs := make ( [ ] string , 0 , len ( emojis ) )
for _ , e := range emojis {
emojiIDs = append ( emojiIDs , e . ID )
2022-09-12 13:03:23 +02:00
}
2023-05-12 11:15:54 +02:00
// Set known emoji details.
2022-09-26 11:56:01 +02:00
status . Emojis = emojis
2022-09-12 13:03:23 +02:00
status . EmojiIDs = emojiIDs
2021-08-29 12:03:08 +02:00
2021-08-10 13:32:39 +02:00
return nil
}