2023-03-12 16:00:57 +01:00
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
2021-08-10 13:32:39 +02:00
package dereferencing
import (
2021-08-25 15:34:33 +02:00
"context"
2021-08-10 13:32:39 +02:00
"net/url"
2022-07-19 10:47:55 +02:00
"codeberg.org/gruf/go-kv"
2023-06-03 11:35:15 +02:00
"github.com/superseriousbusiness/activity/pub"
2022-09-25 13:09:41 +02:00
"github.com/superseriousbusiness/activity/streams/vocab"
2021-08-10 13:32:39 +02:00
"github.com/superseriousbusiness/gotosocial/internal/ap"
2021-12-07 13:31:39 +01:00
"github.com/superseriousbusiness/gotosocial/internal/config"
2023-05-28 14:08:35 +02:00
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
2022-09-25 13:09:41 +02:00
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
2022-07-19 10:47:55 +02:00
"github.com/superseriousbusiness/gotosocial/internal/log"
2021-12-20 15:19:53 +01:00
"github.com/superseriousbusiness/gotosocial/internal/uris"
2021-08-10 13:32:39 +02:00
)
2022-09-25 13:09:41 +02:00
// maxIter defines how many iterations of descendants or
// ancesters we are willing to follow before returning error.
const maxIter = 1000
2023-05-12 11:15:54 +02:00
// dereferenceThread will dereference statuses both above and below the given status in a thread, it returns no error and is intended to be called asychronously.
func ( d * deref ) dereferenceThread ( ctx context . Context , username string , statusIRI * url . URL , status * gtsmodel . Status , statusable ap . Statusable ) {
2022-09-25 13:09:41 +02:00
// Ensure that ancestors have been fully dereferenced
if err := d . dereferenceStatusAncestors ( ctx , username , status ) ; err != nil {
2023-05-28 14:08:35 +02:00
log . Error ( ctx , err ) // log entry and error will include caller prefixes
2021-08-10 13:32:39 +02:00
}
2022-09-25 13:09:41 +02:00
// Ensure that descendants have been fully dereferenced
if err := d . dereferenceStatusDescendants ( ctx , username , statusIRI , statusable ) ; err != nil {
2023-05-28 14:08:35 +02:00
log . Error ( ctx , err ) // log entry and error will include caller prefixes
2021-08-10 13:32:39 +02:00
}
}
2022-09-25 13:09:41 +02:00
// dereferenceAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way.
func ( d * deref ) dereferenceStatusAncestors ( ctx context . Context , username string , status * gtsmodel . Status ) error {
// Take ref to original
ogIRI := status . URI
2022-07-19 10:47:55 +02:00
2022-09-25 13:09:41 +02:00
// Start log entry with fields
2023-02-17 12:02:29 +01:00
l := log . WithContext ( ctx ) .
WithFields ( kv . Fields {
{ "username" , username } ,
{ "statusIRI" , ogIRI } ,
} ... )
2021-08-10 13:32:39 +02:00
2022-09-25 13:09:41 +02:00
// Log function start
l . Trace ( "beginning" )
2021-08-10 13:32:39 +02:00
2022-09-25 13:09:41 +02:00
for i := 0 ; i < maxIter ; i ++ {
2021-08-10 13:32:39 +02:00
if status . InReplyToURI == "" {
// status doesn't reply to anything
return nil
}
2022-06-11 16:25:41 +02:00
2022-09-25 13:09:41 +02:00
// Parse this status's replied IRI
replyIRI , err := url . Parse ( status . InReplyToURI )
2021-08-10 13:32:39 +02:00
if err != nil {
2023-05-28 14:08:35 +02:00
return gtserror . Newf ( "invalid status InReplyToURI %q: %w" , status . InReplyToURI , err )
2021-08-10 13:32:39 +02:00
}
2022-06-11 16:25:41 +02:00
2022-09-25 13:09:41 +02:00
if replyIRI . Host == config . GetHost ( ) {
l . Tracef ( "following local status ancestors: %s" , status . InReplyToURI )
2021-08-10 13:32:39 +02:00
2022-09-25 13:09:41 +02:00
// This is our status, extract ID from path
_ , id , err := uris . ParseStatusesPath ( replyIRI )
if err != nil {
2023-05-28 14:08:35 +02:00
return gtserror . Newf ( "invalid local status IRI %q: %w" , status . InReplyToURI , err )
2022-09-25 13:09:41 +02:00
}
// Fetch this status from the database
2023-05-12 11:15:54 +02:00
localStatus , err := d . state . DB . GetStatusByID ( ctx , id )
2022-09-25 13:09:41 +02:00
if err != nil {
2023-05-28 14:08:35 +02:00
return gtserror . Newf ( "error fetching local status %q: %w" , id , err )
2022-09-25 13:09:41 +02:00
}
// Set the fetched status
status = localStatus
} else {
l . Tracef ( "following remote status ancestors: %s" , status . InReplyToURI )
// Fetch the remote status found at this IRI
2023-06-03 11:35:15 +02:00
remoteStatus , _ , err := d . getStatusByURI (
ctx ,
2023-05-12 11:15:54 +02:00
username ,
replyIRI ,
)
2022-09-25 13:09:41 +02:00
if err != nil {
2023-05-28 14:08:35 +02:00
return gtserror . Newf ( "error fetching remote status %q: %w" , status . InReplyToURI , err )
2022-09-25 13:09:41 +02:00
}
2021-08-10 13:32:39 +02:00
2022-09-25 13:09:41 +02:00
// Set the fetched status
status = remoteStatus
}
2021-08-10 13:32:39 +02:00
}
2023-05-28 14:08:35 +02:00
return gtserror . Newf ( "reached %d ancestor iterations for %q" , maxIter , ogIRI )
2021-08-10 13:32:39 +02:00
}
2022-09-25 13:09:41 +02:00
func ( d * deref ) dereferenceStatusDescendants ( ctx context . Context , username string , statusIRI * url . URL , parent ap . Statusable ) error {
// Take ref to original
ogIRI := statusIRI
2022-07-19 10:47:55 +02:00
2022-09-25 13:09:41 +02:00
// Start log entry with fields
2023-02-17 12:02:29 +01:00
l := log . WithContext ( ctx ) .
WithFields ( kv . Fields {
{ "username" , username } ,
{ "statusIRI" , ogIRI } ,
} ... )
2021-08-10 13:32:39 +02:00
2022-09-25 13:09:41 +02:00
// Log function start
l . Trace ( "beginning" )
// frame represents a single stack frame when iteratively
// dereferencing status descendants. where statusIRI and
// statusable are of the status whose children we are to
// descend, page is the current activity streams collection
// page of entities we are on (as we often push a frame to
// stack mid-paging), and item___ are entity iterators for
// this activity streams collection page.
type frame struct {
statusIRI * url . URL
statusable ap . Statusable
page ap . CollectionPageable
itemIter vocab . ActivityStreamsItemsPropertyIterator
2021-08-10 13:32:39 +02:00
}
2022-09-25 13:09:41 +02:00
var (
// current is the current stack frame
current * frame
// stack is a list of "shelved" descendand iterator
// frames. this is pushed to when a child status frame
// is found that we need to further iterate down, and
// popped from into 'current' when that child's tree
// of further descendants is exhausted.
stack = [ ] * frame {
{
// Starting input is first frame
statusIRI : statusIRI ,
statusable : parent ,
} ,
}
2021-08-10 13:32:39 +02:00
2022-09-25 13:09:41 +02:00
// popStack will remove and return the top frame
// from the stack, or nil if currently empty.
popStack = func ( ) * frame {
if len ( stack ) == 0 {
return nil
}
2021-08-10 13:32:39 +02:00
2022-09-25 13:09:41 +02:00
// Get frame index
idx := len ( stack ) - 1
2021-08-10 13:32:39 +02:00
2022-09-25 13:09:41 +02:00
// Pop last frame
frame := stack [ idx ]
stack = stack [ : idx ]
2021-08-10 13:32:39 +02:00
2022-09-25 13:09:41 +02:00
return frame
}
)
2021-08-10 13:32:39 +02:00
2022-09-25 13:09:41 +02:00
stackLoop :
for i := 0 ; i < maxIter ; i ++ {
// Pop next frame, nil means we are at end
if current = popStack ( ) ; current == nil {
return nil
2021-08-10 13:32:39 +02:00
}
2022-09-25 13:09:41 +02:00
if current . page == nil {
if current . statusIRI . Host == config . GetHost ( ) {
2023-06-03 11:35:15 +02:00
// This is a local status, no looping to do
2022-09-25 13:09:41 +02:00
continue stackLoop
}
l . Tracef ( "following remote status descendants: %s" , current . statusIRI )
// Look for an attached status replies (as collection)
replies := current . statusable . GetActivityStreamsReplies ( )
2022-09-26 10:14:36 +02:00
if replies == nil {
2022-09-25 13:09:41 +02:00
continue stackLoop
}
// Get the status replies collection
collection := replies . GetActivityStreamsCollection ( )
2022-09-26 10:14:36 +02:00
if collection == nil {
continue stackLoop
}
2022-09-25 13:09:41 +02:00
// Get the "first" property of the replies collection
first := collection . GetActivityStreamsFirst ( )
2022-09-26 10:14:36 +02:00
if first == nil {
2022-09-25 13:09:41 +02:00
continue stackLoop
}
// Set the first activity stream collection page
current . page = first . GetActivityStreamsCollectionPage ( )
2022-09-26 10:14:36 +02:00
if current . page == nil {
continue stackLoop
}
2021-08-10 13:32:39 +02:00
}
2022-09-26 10:14:36 +02:00
pageLoop :
for {
2022-09-25 13:09:41 +02:00
if current . itemIter == nil {
2022-09-26 10:14:36 +02:00
// Get the items associated with this page
2022-09-25 13:09:41 +02:00
items := current . page . GetActivityStreamsItems ( )
2022-09-26 10:14:36 +02:00
if items == nil {
continue stackLoop
}
2022-09-25 13:09:41 +02:00
// Start off the item iterator
current . itemIter = items . Begin ( )
2021-08-10 13:32:39 +02:00
}
2022-09-25 13:09:41 +02:00
itemLoop :
2022-09-26 10:50:14 +02:00
for {
2023-06-03 11:35:15 +02:00
// Check for remaining iter
2022-09-26 10:50:14 +02:00
if current . itemIter == nil {
break itemLoop
}
2021-08-10 13:32:39 +02:00
2023-06-03 11:35:15 +02:00
// Get current item iterator
itemIter := current . itemIter
// Set the next available iterator
current . itemIter = itemIter . Next ( )
2022-09-25 13:09:41 +02:00
2023-06-03 11:35:15 +02:00
// Check for available IRI on item
itemIRI , _ := pub . ToId ( itemIter )
2022-09-25 13:09:41 +02:00
if itemIRI == nil {
continue itemLoop
}
if itemIRI . Host == config . GetHost ( ) {
// This child is one of ours,
continue itemLoop
}
2023-05-12 11:15:54 +02:00
// Dereference the remote status and store in the database.
_ , statusable , err := d . getStatusByURI ( ctx , username , itemIRI )
2022-09-25 13:09:41 +02:00
if err != nil {
2023-05-12 11:15:54 +02:00
l . Errorf ( "error dereferencing remote status %s: %v" , itemIRI , err )
continue itemLoop
}
if statusable == nil {
// Already up-to-date.
2022-09-25 13:09:41 +02:00
continue itemLoop
2021-08-10 13:32:39 +02:00
}
2022-09-25 13:09:41 +02:00
// Put current and next frame at top of stack
stack = append ( stack , current , & frame {
statusIRI : itemIRI ,
statusable : statusable ,
} )
2022-09-26 09:39:59 +02:00
// Now start at top of loop
continue stackLoop
2021-08-10 13:32:39 +02:00
}
2022-09-25 13:09:41 +02:00
// Get the current page's "next" property
pageNext := current . page . GetActivityStreamsNext ( )
2023-06-22 21:46:36 +02:00
if pageNext == nil || ! pageNext . IsIRI ( ) {
2022-09-25 13:09:41 +02:00
continue stackLoop
}
2023-06-22 21:46:36 +02:00
// Get the IRI of the "next" property.
2022-09-25 13:09:41 +02:00
pageNextIRI := pageNext . GetIRI ( )
2023-06-22 21:46:36 +02:00
// Ensure this isn't a self-referencing page...
// We don't need to store / check against a map of IRIs
// as our getStatusByIRI() function above prevents iter'ing
// over statuses that have been dereferenced recently, due to
// the `fetched_at` field preventing frequent refetches.
if id := current . page . GetJSONLDId ( ) ; id != nil &&
pageNextIRI . String ( ) == id . Get ( ) . String ( ) {
log . Warnf ( ctx , "self referencing collection page: %s" , pageNextIRI )
2022-09-26 10:14:36 +02:00
continue stackLoop
}
2022-09-25 13:09:41 +02:00
// Dereference this next collection page by its IRI
2023-05-12 11:15:54 +02:00
collectionPage , err := d . dereferenceCollectionPage ( ctx ,
username ,
pageNextIRI ,
)
2022-09-25 13:09:41 +02:00
if err != nil {
l . Errorf ( "error dereferencing remote collection page %q: %s" , pageNextIRI . String ( ) , err )
continue stackLoop
}
// Set the updated collection page
current . page = collectionPage
2022-09-26 10:14:36 +02:00
continue pageLoop
2021-08-10 13:32:39 +02:00
}
}
2023-05-28 14:08:35 +02:00
return gtserror . Newf ( "reached %d descendant iterations for %q" , maxIter , ogIRI . String ( ) )
2021-08-10 13:32:39 +02:00
}