From 638f023a1c03a17881be76e8a3de3ceda417393d Mon Sep 17 00:00:00 2001 From: tobi <31960611+tsmethurst@users.noreply.github.com> Date: Mon, 21 Aug 2023 15:03:04 +0200 Subject: [PATCH] [performance] Tweak media attachment cleanup; replace stale index (#2143) --- internal/cleaner/media.go | 70 ++++++++------ internal/db/bundb/media.go | 91 ------------------- internal/db/bundb/media_test.go | 15 +-- ...230821075342_attachment_cleanup_updates.go | 61 +++++++++++++ internal/db/media.go | 20 ---- 5 files changed, 104 insertions(+), 153 deletions(-) create mode 100644 internal/db/bundb/migrations/20230821075342_attachment_cleanup_updates.go diff --git a/internal/cleaner/media.go b/internal/cleaner/media.go index 8b11a30bf..9aca7aa20 100644 --- a/internal/cleaner/media.go +++ b/internal/cleaner/media.go @@ -323,8 +323,8 @@ func (m *Media) pruneUnused(ctx context.Context, media *gtsmodel.MediaAttachment l := log.WithContext(ctx). WithField("media", media.ID) - // Check whether we have the required account for media. - account, missing, err := m.getRelatedAccount(ctx, media) + // Check whether we have the account that owns the media. + account, missing, err := m.getOwningAccount(ctx, media) if err != nil { return false, err } else if missing { @@ -371,8 +371,8 @@ func (m *Media) fixCacheState(ctx context.Context, media *gtsmodel.MediaAttachme l := log.WithContext(ctx). WithField("media", media.ID) - // Check whether we have the required account for media. - _, missingAccount, err := m.getRelatedAccount(ctx, media) + // Check whether we have the account that owns the media. + _, missingAccount, err := m.getOwningAccount(ctx, media) if err != nil { return false, err } else if missingAccount { @@ -428,32 +428,42 @@ func (m *Media) uncacheRemote(ctx context.Context, after time.Time, media *gtsmo l := log.WithContext(ctx). WithField("media", media.ID) - // Check whether we have the required account for media. - account, missing, err := m.getRelatedAccount(ctx, media) - if err != nil { - return false, err - } else if missing { - l.Debug("skipping due to missing account") - return false, nil - } + // There are two possibilities here: + // + // 1. Media is an avatar or header; we should uncache + // it if we haven't seen the account recently. + // 2. Media is attached to a status; we should uncache + // it if we haven't seen the status recently. + if *media.Avatar || *media.Header { + // Check whether we have the account that owns the media. + account, missing, err := m.getOwningAccount(ctx, media) + if err != nil { + return false, err + } else if missing { + // PruneUnused will take care of this case. + l.Debug("skipping due to missing account") + return false, nil + } - if account != nil && account.FetchedAt.After(after) { - l.Debug("skipping due to recently fetched account") - return false, nil - } + if account != nil && account.FetchedAt.After(after) { + l.Debug("skipping due to recently fetched account") + return false, nil + } + } else { + // Check whether we have the status that media is attached to. + status, missing, err := m.getRelatedStatus(ctx, media) + if err != nil { + return false, err + } else if missing { + // PruneUnused will take care of this case. + l.Debug("skipping due to missing status") + return false, nil + } - // Check whether we have the required status for media. - status, missing, err := m.getRelatedStatus(ctx, media) - if err != nil { - return false, err - } else if missing { - l.Debug("skipping due to missing status") - return false, nil - } - - if status != nil && status.FetchedAt.After(after) { - l.Debug("skipping due to recently fetched status") - return false, nil + if status != nil && status.FetchedAt.After(after) { + l.Debug("skipping due to recently fetched status") + return false, nil + } } // This media is too old, uncache it. @@ -461,13 +471,13 @@ func (m *Media) uncacheRemote(ctx context.Context, after time.Time, media *gtsmo return true, m.uncache(ctx, media) } -func (m *Media) getRelatedAccount(ctx context.Context, media *gtsmodel.MediaAttachment) (*gtsmodel.Account, bool, error) { +func (m *Media) getOwningAccount(ctx context.Context, media *gtsmodel.MediaAttachment) (*gtsmodel.Account, bool, error) { if media.AccountID == "" { // no related account. return nil, false, nil } - // Load the account related to this media. + // Load the account that owns this media. account, err := m.state.DB.GetAccountByID( gtscontext.SetBarebones(ctx), media.AccountID, diff --git a/internal/db/bundb/media.go b/internal/db/bundb/media.go index 7f079cb34..fe6aefa90 100644 --- a/internal/db/bundb/media.go +++ b/internal/db/bundb/media.go @@ -200,23 +200,6 @@ func (m *mediaDB) DeleteAttachment(ctx context.Context, id string) error { return err } -func (m *mediaDB) CountRemoteOlderThan(ctx context.Context, olderThan time.Time) (int, error) { - q := m.db. - NewSelect(). - TableExpr("? AS ?", bun.Ident("media_attachments"), bun.Ident("media_attachment")). - Column("media_attachment.id"). - Where("? = ?", bun.Ident("media_attachment.cached"), true). - Where("? IS NOT NULL", bun.Ident("media_attachment.remote_url")). - Where("? < ?", bun.Ident("media_attachment.created_at"), olderThan) - - count, err := q.Count(ctx) - if err != nil { - return 0, err - } - - return count, nil -} - func (m *mediaDB) GetAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) { attachmentIDs := make([]string, 0, limit) @@ -286,77 +269,3 @@ func (m *mediaDB) GetCachedAttachmentsOlderThan(ctx context.Context, olderThan t return m.GetAttachmentsByIDs(ctx, attachmentIDs) } - -func (m *mediaDB) GetAvatarsAndHeaders(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) { - attachmentIDs := make([]string, 0, limit) - - q := m.db.NewSelect(). - TableExpr("? AS ?", bun.Ident("media_attachments"), bun.Ident("media_attachment")). - Column("media_attachment.id"). - WhereGroup(" AND ", func(innerQ *bun.SelectQuery) *bun.SelectQuery { - return innerQ. - WhereOr("? = ?", bun.Ident("media_attachment.avatar"), true). - WhereOr("? = ?", bun.Ident("media_attachment.header"), true) - }). - Order("media_attachment.id DESC") - - if maxID != "" { - q = q.Where("? < ?", bun.Ident("media_attachment.id"), maxID) - } - - if limit != 0 { - q = q.Limit(limit) - } - - if err := q.Scan(ctx, &attachmentIDs); err != nil { - return nil, err - } - - return m.GetAttachmentsByIDs(ctx, attachmentIDs) -} - -func (m *mediaDB) GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, error) { - attachmentIDs := make([]string, 0, limit) - - q := m.db. - NewSelect(). - TableExpr("? AS ?", bun.Ident("media_attachments"), bun.Ident("media_attachment")). - Column("media_attachment.id"). - Where("? = ?", bun.Ident("media_attachment.cached"), true). - Where("? = ?", bun.Ident("media_attachment.avatar"), false). - Where("? = ?", bun.Ident("media_attachment.header"), false). - Where("? < ?", bun.Ident("media_attachment.created_at"), olderThan). - Where("? IS NULL", bun.Ident("media_attachment.remote_url")). - Where("? IS NULL", bun.Ident("media_attachment.status_id")). - Order("media_attachment.created_at DESC") - - if limit != 0 { - q = q.Limit(limit) - } - - if err := q.Scan(ctx, &attachmentIDs); err != nil { - return nil, err - } - - return m.GetAttachmentsByIDs(ctx, attachmentIDs) -} - -func (m *mediaDB) CountLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time) (int, error) { - q := m.db. - NewSelect(). - TableExpr("? AS ?", bun.Ident("media_attachments"), bun.Ident("media_attachment")). - Column("media_attachment.id"). - Where("? = ?", bun.Ident("media_attachment.cached"), true). - Where("? = ?", bun.Ident("media_attachment.avatar"), false). - Where("? = ?", bun.Ident("media_attachment.header"), false). - Where("? < ?", bun.Ident("media_attachment.created_at"), olderThan). - Where("? IS NULL", bun.Ident("media_attachment.remote_url")). - Where("? IS NULL", bun.Ident("media_attachment.status_id")) - - count, err := q.Count(ctx) - if err != nil { - return 0, err - } - - return count, nil -} diff --git a/internal/db/bundb/media_test.go b/internal/db/bundb/media_test.go index 59b927119..691c81729 100644 --- a/internal/db/bundb/media_test.go +++ b/internal/db/bundb/media_test.go @@ -23,7 +23,6 @@ import ( "time" "github.com/stretchr/testify/suite" - "github.com/superseriousbusiness/gotosocial/testrig" ) type MediaTestSuite struct { @@ -43,20 +42,12 @@ func (suite *MediaTestSuite) TestGetOlder() { suite.Len(attachments, 2) } -func (suite *MediaTestSuite) TestGetAvisAndHeaders() { +func (suite *MediaTestSuite) TestGetCachedAttachmentsOlderThan() { ctx := context.Background() - attachments, err := suite.db.GetAvatarsAndHeaders(ctx, "", 20) + attachments, err := suite.db.GetCachedAttachmentsOlderThan(ctx, time.Now(), 20) suite.NoError(err) - suite.Len(attachments, 3) -} - -func (suite *MediaTestSuite) TestGetLocalUnattachedOlderThan() { - ctx := context.Background() - - attachments, err := suite.db.GetLocalUnattachedOlderThan(ctx, testrig.TimeMustParse("2090-06-04T13:12:00Z"), 10) - suite.NoError(err) - suite.Len(attachments, 1) + suite.Len(attachments, 2) } func TestMediaTestSuite(t *testing.T) { diff --git a/internal/db/bundb/migrations/20230821075342_attachment_cleanup_updates.go b/internal/db/bundb/migrations/20230821075342_attachment_cleanup_updates.go new file mode 100644 index 000000000..2ee0a0c7d --- /dev/null +++ b/internal/db/bundb/migrations/20230821075342_attachment_cleanup_updates.go @@ -0,0 +1,61 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package migrations + +import ( + "context" + + "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/uptrace/bun" +) + +func init() { + up := func(ctx context.Context, db *bun.DB) error { + return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + log.Info(ctx, "dropping previous media attachment cleanup index, please wait and don't interrupt it (this may take a while)") + if _, err := tx. + NewDropIndex(). + Index("media_attachments_cleanup_idx"). + Exec(ctx); err != nil { + return err + } + + log.Info(ctx, "creating new media attachment cleanup index, please wait and don't interrupt it (this may take a while)") + if _, err := tx. + NewCreateIndex(). + Table("media_attachments"). + Index("media_attachments_cleanup_idx"). + Column("cached", "created_at"). + Exec(ctx); err != nil { + return err + } + + return nil + }) + } + + down := func(ctx context.Context, db *bun.DB) error { + return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + return nil + }) + } + + if err := Migrations.Register(up, down); err != nil { + panic(err) + } +} diff --git a/internal/db/media.go b/internal/db/media.go index 94a365c26..0ef03226b 100644 --- a/internal/db/media.go +++ b/internal/db/media.go @@ -50,24 +50,4 @@ type Media interface { // GetCachedAttachmentsOlderThan gets limit n remote attachments (including avatars and headers) older than // the given time. These will be returned in order of attachment.created_at descending (i.e. newest to oldest). GetCachedAttachmentsOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, error) - - // CountRemoteOlderThan is like GetRemoteOlderThan, except instead of getting limit n attachments, - // it just counts how many remote attachments in the database (including avatars and headers) meet - // the olderThan criteria. - CountRemoteOlderThan(ctx context.Context, olderThan time.Time) (int, error) - - // GetAvatarsAndHeaders fetches limit n avatars and headers with an id < maxID. These headers - // and avis may be in use or not; the caller should check this if it's important. - GetAvatarsAndHeaders(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) - - // GetLocalUnattachedOlderThan fetches limit n local media attachments (including avatars and headers), older than - // the given time, which aren't header or avatars, and aren't attached to a status. In other words, attachments which were - // uploaded but never used for whatever reason, or attachments that were attached to a status which was subsequently deleted. - // - // These will be returned in order of attachment.created_at descending (newest to oldest in other words). - GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, error) - - // CountLocalUnattachedOlderThan is like GetLocalUnattachedOlderThan, except instead of getting limit n attachments, - // it just counts how many local attachments in the database meet the olderThan criteria. - CountLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time) (int, error) }