From 9eff0d46e49b947dc2642207ee49ed657eb6b565 Mon Sep 17 00:00:00 2001 From: kim <89579420+NyaaaWhatsUpDoc@users.noreply.github.com> Date: Mon, 24 Jul 2023 13:14:13 +0100 Subject: [PATCH] [feature/performance] support uncaching remote emoji + scheduled cleanup functions (#1987) --- .../action/admin/media/prune/all.go | 2 +- internal/cleaner/cleaner.go | 12 +- internal/cleaner/cleaner_test.go | 80 ++++ internal/cleaner/emoji.go | 321 ++++++++++++-- internal/cleaner/emoji_test.go | 402 ++++++++++++++++++ internal/cleaner/media.go | 54 ++- internal/db/account.go | 3 + internal/db/bundb/account.go | 49 +++ internal/db/bundb/emoji.go | 86 +++- internal/db/bundb/media.go | 78 ++-- internal/db/bundb/media_test.go | 2 +- .../20230724100000_emoji_cleanup.go | 55 +++ internal/db/bundb/report.go | 2 +- internal/db/bundb/search.go | 39 +- internal/db/bundb/status.go | 34 +- internal/db/bundb/status_test.go | 4 +- internal/db/bundb/util.go | 36 ++ internal/db/emoji.go | 11 +- internal/db/media.go | 12 +- internal/db/status.go | 9 +- internal/gtsmodel/emoji.go | 1 + internal/media/manager.go | 59 ++- internal/media/processingemoji.go | 42 +- internal/processing/admin/media.go | 2 +- internal/processing/fromclientapi.go | 2 +- internal/processing/media/getfile.go | 54 ++- internal/processing/report/create.go | 2 +- internal/regexes/regexes.go | 2 +- internal/storage/storage.go | 3 + internal/typeutils/internaltofrontend.go | 2 +- test/run-postgres.sh | 37 ++ test/run-sqlite.sh | 7 + testrig/testmodels.go | 2 + 33 files changed, 1287 insertions(+), 219 deletions(-) create mode 100644 internal/cleaner/cleaner_test.go create mode 100644 internal/cleaner/emoji_test.go create mode 100644 internal/db/bundb/migrations/20230724100000_emoji_cleanup.go create mode 100755 test/run-postgres.sh create mode 100755 test/run-sqlite.sh diff --git a/cmd/gotosocial/action/admin/media/prune/all.go b/cmd/gotosocial/action/admin/media/prune/all.go index 7642fe928..90c08c7db 100644 --- a/cmd/gotosocial/action/admin/media/prune/all.go +++ b/cmd/gotosocial/action/admin/media/prune/all.go @@ -50,7 +50,7 @@ var All action.GTSAction = func(ctx context.Context) error { // Perform the actual pruning with logging. prune.cleaner.Media().All(ctx, days) - prune.cleaner.Emoji().All(ctx) + prune.cleaner.Emoji().All(ctx, days) // Perform a cleanup of storage (for removed local dirs). if err := prune.storage.Storage.Clean(ctx); err != nil { diff --git a/internal/cleaner/cleaner.go b/internal/cleaner/cleaner.go index ee1e4785f..70497c10e 100644 --- a/internal/cleaner/cleaner.go +++ b/internal/cleaner/cleaner.go @@ -61,19 +61,19 @@ func (c *Cleaner) Media() *Media { return &c.media } -// checkFiles checks for each of the provided files, and calls onMissing() if any of them are missing. Returns true if missing. -func (c *Cleaner) checkFiles(ctx context.Context, onMissing func() error, files ...string) (bool, error) { +// haveFiles returns whether all of the provided files exist within current storage. +func (c *Cleaner) haveFiles(ctx context.Context, files ...string) (bool, error) { for _, file := range files { // Check whether each file exists in storage. have, err := c.state.Storage.Has(ctx, file) if err != nil { return false, gtserror.Newf("error checking storage for %s: %w", file, err) } else if !have { - // Missing files, perform hook. - return true, onMissing() + // Missing file(s). + return false, nil } } - return false, nil + return true, nil } // removeFiles removes the provided files, returning the number of them returned. @@ -129,7 +129,7 @@ func scheduleJobs(c *Cleaner) { c.state.Workers.Scheduler.Schedule(sched.NewJob(func(start time.Time) { log.Info(nil, "starting media clean") c.Media().All(doneCtx, config.GetMediaRemoteCacheDays()) - c.Emoji().All(doneCtx) + c.Emoji().All(doneCtx, config.GetMediaRemoteCacheDays()) log.Infof(nil, "finished media clean after %s", time.Since(start)) }).EveryAt(midnight, day)) } diff --git a/internal/cleaner/cleaner_test.go b/internal/cleaner/cleaner_test.go new file mode 100644 index 000000000..d23dac504 --- /dev/null +++ b/internal/cleaner/cleaner_test.go @@ -0,0 +1,80 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cleaner_test + +import ( + "testing" + + "github.com/stretchr/testify/suite" + "github.com/superseriousbusiness/gotosocial/internal/cleaner" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" + "github.com/superseriousbusiness/gotosocial/internal/state" + "github.com/superseriousbusiness/gotosocial/testrig" +) + +type CleanerTestSuite struct { + state state.State + cleaner *cleaner.Cleaner + emojis map[string]*gtsmodel.Emoji + suite.Suite +} + +func TestCleanerTestSuite(t *testing.T) { + suite.Run(t, &CleanerTestSuite{}) +} + +func (suite *CleanerTestSuite) SetupSuite() { + testrig.InitTestConfig() + testrig.InitTestLog() +} + +func (suite *CleanerTestSuite) SetupTest() { + // Initialize gts caches. + suite.state.Caches.Init() + + // Ensure scheduler started (even if unused). + suite.state.Workers.Scheduler.Start(nil) + + // Initialize test database. + _ = testrig.NewTestDB(&suite.state) + testrig.StandardDBSetup(suite.state.DB, nil) + + // Initialize test storage (in-memory). + suite.state.Storage = testrig.NewInMemoryStorage() + + // Initialize test cleaner instance. + suite.cleaner = cleaner.New(&suite.state) + + // Allocate new test model emojis. + suite.emojis = testrig.NewTestEmojis() +} + +func (suite *CleanerTestSuite) TearDownTest() { + testrig.StandardDBTeardown(suite.state.DB) +} + +// mapvals extracts a slice of values from the values contained within the map. +func mapvals[Key comparable, Val any](m map[Key]Val) []Val { + var i int + vals := make([]Val, len(m)) + for _, val := range m { + vals[i] = val + i++ + } + return vals +} diff --git a/internal/cleaner/emoji.go b/internal/cleaner/emoji.go index 35e579171..d2baec7e8 100644 --- a/internal/cleaner/emoji.go +++ b/internal/cleaner/emoji.go @@ -20,6 +20,7 @@ package cleaner import ( "context" "errors" + "time" "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/superseriousbusiness/gotosocial/internal/gtscontext" @@ -36,22 +37,26 @@ type Emoji struct { // All will execute all cleaner.Emoji utilities synchronously, including output logging. // Context will be checked for `gtscontext.DryRun()` in order to actually perform the action. -func (e *Emoji) All(ctx context.Context) { - e.LogPruneMissing(ctx) +func (e *Emoji) All(ctx context.Context, maxRemoteDays int) { + t := time.Now().Add(-24 * time.Hour * time.Duration(maxRemoteDays)) + e.LogUncacheRemote(ctx, t) e.LogFixBroken(ctx) + e.LogPruneUnused(ctx) + e.LogFixCacheStates(ctx) + _ = e.state.Storage.Storage.Clean(ctx) } -// LogPruneMissing performs emoji.PruneMissing(...), logging the start and outcome. -func (e *Emoji) LogPruneMissing(ctx context.Context) { - log.Info(ctx, "start") - if n, err := e.PruneMissing(ctx); err != nil { +// LogUncacheRemote performs Emoji.UncacheRemote(...), logging the start and outcome. +func (e *Emoji) LogUncacheRemote(ctx context.Context, olderThan time.Time) { + log.Infof(ctx, "start older than: %s", olderThan.Format(time.Stamp)) + if n, err := e.UncacheRemote(ctx, olderThan); err != nil { log.Error(ctx, err) } else { - log.Infof(ctx, "pruned: %d", n) + log.Infof(ctx, "uncached: %d", n) } } -// LogFixBroken performs emoji.FixBroken(...), logging the start and outcome. +// LogFixBroken performs Emoji.FixBroken(...), logging the start and outcome. func (e *Emoji) LogFixBroken(ctx context.Context) { log.Info(ctx, "start") if n, err := e.FixBroken(ctx); err != nil { @@ -61,20 +66,43 @@ func (e *Emoji) LogFixBroken(ctx context.Context) { } } -// PruneMissing will delete emoji with missing files from the database and storage driver. -// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function -// should be updated to match media.FixCacheStat() if we ever support emoji uncaching. -func (e *Emoji) PruneMissing(ctx context.Context) (int, error) { - var ( - total int - maxID string - ) +// LogPruneUnused performs Emoji.PruneUnused(...), logging the start and outcome. +func (e *Emoji) LogPruneUnused(ctx context.Context) { + log.Info(ctx, "start") + if n, err := e.PruneUnused(ctx); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "pruned: %d", n) + } +} + +// LogFixCacheStates performs Emoji.FixCacheStates(...), logging the start and outcome. +func (e *Emoji) LogFixCacheStates(ctx context.Context) { + log.Info(ctx, "start") + if n, err := e.FixCacheStates(ctx); err != nil { + log.Error(ctx, err) + } else { + log.Infof(ctx, "fixed: %d", n) + } +} + +// UncacheRemote will uncache all remote emoji older than given input time. Context +// will be checked for `gtscontext.DryRun()` in order to actually perform the action. +func (e *Emoji) UncacheRemote(ctx context.Context, olderThan time.Time) (int, error) { + var total int + + // Drop time by a minute to improve search, + // (i.e. make it olderThan inclusive search). + olderThan = olderThan.Add(-time.Minute) + + // Store recent time. + mostRecent := olderThan for { - // Fetch the next batch of emoji media up to next ID. - emojis, err := e.state.DB.GetEmojis(ctx, maxID, selectLimit) + // Fetch the next batch of cached emojis older than last-set time. + emojis, err := e.state.DB.GetCachedEmojisOlderThan(ctx, olderThan, selectLimit) if err != nil && !errors.Is(err, db.ErrNoEntries) { - return total, gtserror.Newf("error getting emojis: %w", err) + return total, gtserror.Newf("error getting remote emoji: %w", err) } if len(emojis) == 0 { @@ -82,17 +110,20 @@ func (e *Emoji) PruneMissing(ctx context.Context) (int, error) { break } - // Use last as the next 'maxID' value. - maxID = emojis[len(emojis)-1].ID + // Use last created-at as the next 'olderThan' value. + olderThan = emojis[len(emojis)-1].CreatedAt for _, emoji := range emojis { - // Check / fix missing emoji media. - fixed, err := e.pruneMissing(ctx, emoji) + // Check / uncache each remote emoji. + uncached, err := e.uncacheRemote(ctx, + mostRecent, + emoji, + ) if err != nil { return total, err } - if fixed { + if uncached { // Update // count. total++ @@ -145,22 +176,197 @@ func (e *Emoji) FixBroken(ctx context.Context) (int, error) { return total, nil } -func (e *Emoji) pruneMissing(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { - return e.checkFiles(ctx, func() error { - // Emoji missing files, delete it. - // NOTE: if we ever support uncaching - // of emojis, change to e.uncache(). - // In that case we should also rename - // this function to match the media - // equivalent -> fixCacheState(). - log.WithContext(ctx). - WithField("emoji", emoji.ID). - Debug("deleting due to missing emoji") - return e.delete(ctx, emoji) - }, +// PruneUnused will delete all unused emoji media from the database and storage driver. +// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function +// should be updated to match media.FixCacheStat() if we ever support emoji uncaching. +func (e *Emoji) PruneUnused(ctx context.Context) (int, error) { + var ( + total int + maxID string + ) + + for { + // Fetch the next batch of emoji media up to next ID. + emojis, err := e.state.DB.GetRemoteEmojis(ctx, maxID, selectLimit) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return total, gtserror.Newf("error getting remote emojis: %w", err) + } + + if len(emojis) == 0 { + // reached end. + break + } + + // Use last as the next 'maxID' value. + maxID = emojis[len(emojis)-1].ID + + for _, emoji := range emojis { + // Check / prune unused emoji media. + fixed, err := e.pruneUnused(ctx, emoji) + if err != nil { + return total, err + } + + if fixed { + // Update + // count. + total++ + } + } + } + + return total, nil +} + +// FixCacheStatus will check all emoji for up-to-date cache status (i.e. in storage driver). +// Context will be checked for `gtscontext.DryRun()` to perform the action. NOTE: this function +// should be updated to match media.FixCacheStat() if we ever support emoji uncaching. +func (e *Emoji) FixCacheStates(ctx context.Context) (int, error) { + var ( + total int + maxID string + ) + + for { + // Fetch the next batch of emoji media up to next ID. + emojis, err := e.state.DB.GetRemoteEmojis(ctx, maxID, selectLimit) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return total, gtserror.Newf("error getting remote emojis: %w", err) + } + + if len(emojis) == 0 { + // reached end. + break + } + + // Use last as the next 'maxID' value. + maxID = emojis[len(emojis)-1].ID + + for _, emoji := range emojis { + // Check / fix required emoji cache states. + fixed, err := e.fixCacheState(ctx, emoji) + if err != nil { + return total, err + } + + if fixed { + // Update + // count. + total++ + } + } + } + + return total, nil +} + +func (e *Emoji) pruneUnused(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { + // Start a log entry for emoji. + l := log.WithContext(ctx). + WithField("emoji", emoji.ID) + + // Load any related accounts using this emoji. + accounts, err := e.getRelatedAccounts(ctx, emoji) + if err != nil { + return false, err + } else if len(accounts) > 0 { + l.Debug("skipping as account emoji in use") + return false, nil + } + + // Load any related statuses using this emoji. + statuses, err := e.getRelatedStatuses(ctx, emoji) + if err != nil { + return false, err + } else if len(statuses) > 0 { + l.Debug("skipping as status emoji in use") + return false, nil + } + + // Check not recently created, give it some time to be "used" again. + if time.Now().Add(-24 * time.Hour * 7).Before(emoji.CreatedAt) { + l.Debug("skipping due to recently created") + return false, nil + } + + // Emoji totally unused, delete it. + l.Debug("deleting unused emoji") + return true, e.delete(ctx, emoji) +} + +func (e *Emoji) fixCacheState(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { + // Start a log entry for emoji. + l := log.WithContext(ctx). + WithField("emoji", emoji.ID) + + // Check whether files exist. + exist, err := e.haveFiles(ctx, emoji.ImageStaticPath, emoji.ImagePath, ) + if err != nil { + return false, err + } + + switch { + case *emoji.Cached && !exist: + // Mark as uncached if expected files don't exist. + l.Debug("cached=true exists=false => marking uncached") + return true, e.uncache(ctx, emoji) + + case !*emoji.Cached && exist: + // Remove files if we don't expect them to exist. + l.Debug("cached=false exists=true => removing files") + _, err := e.removeFiles(ctx, + emoji.ImageStaticPath, + emoji.ImagePath, + ) + return true, err + + default: + return false, nil + } +} + +func (e *Emoji) uncacheRemote(ctx context.Context, after time.Time, emoji *gtsmodel.Emoji) (bool, error) { + if !*emoji.Cached { + // Already uncached. + return false, nil + } + + // Start a log entry for emoji. + l := log.WithContext(ctx). + WithField("emoji", emoji.ID) + + // Load any related accounts using this emoji. + accounts, err := e.getRelatedAccounts(ctx, emoji) + if err != nil { + return false, err + } + + for _, account := range accounts { + if account.FetchedAt.After(after) { + l.Debug("skipping due to recently fetched account") + return false, nil + } + } + + // Load any related statuses using this emoji. + statuses, err := e.getRelatedStatuses(ctx, emoji) + if err != nil { + return false, err + } + + for _, status := range statuses { + if status.FetchedAt.After(after) { + l.Debug("skipping due to recently fetched status") + return false, nil + } + } + + // This emoji is too old, uncache it. + l.Debug("uncaching old remote emoji") + return true, e.uncache(ctx, emoji) } func (e *Emoji) fixBroken(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { @@ -214,6 +420,47 @@ func (e *Emoji) getRelatedCategory(ctx context.Context, emoji *gtsmodel.Emoji) ( return category, false, nil } +func (e *Emoji) getRelatedAccounts(ctx context.Context, emoji *gtsmodel.Emoji) ([]*gtsmodel.Account, error) { + accounts, err := e.state.DB.GetAccountsUsingEmoji(ctx, emoji.ID) + if err != nil { + return nil, gtserror.Newf("error fetching accounts using emoji %s: %w", emoji.ID, err) + } + return accounts, nil +} + +func (e *Emoji) getRelatedStatuses(ctx context.Context, emoji *gtsmodel.Emoji) ([]*gtsmodel.Status, error) { + statuses, err := e.state.DB.GetStatusesUsingEmoji(ctx, emoji.ID) + if err != nil { + return nil, gtserror.Newf("error fetching statuses using emoji %s: %w", emoji.ID, err) + } + return statuses, nil +} + +func (e *Emoji) uncache(ctx context.Context, emoji *gtsmodel.Emoji) error { + if gtscontext.DryRun(ctx) { + // Dry run, do nothing. + return nil + } + + // Remove emoji and static. + _, err := e.removeFiles(ctx, + emoji.ImagePath, + emoji.ImageStaticPath, + ) + if err != nil { + return gtserror.Newf("error removing emoji files: %w", err) + } + + // Update emoji to reflect that we no longer have it cached. + log.Debugf(ctx, "marking emoji as uncached: %s", emoji.ID) + emoji.Cached = func() *bool { i := false; return &i }() + if err := e.state.DB.UpdateEmoji(ctx, emoji, "cached"); err != nil { + return gtserror.Newf("error updating emoji: %w", err) + } + + return nil +} + func (e *Emoji) delete(ctx context.Context, emoji *gtsmodel.Emoji) error { if gtscontext.DryRun(ctx) { // Dry run, do nothing. diff --git a/internal/cleaner/emoji_test.go b/internal/cleaner/emoji_test.go new file mode 100644 index 000000000..81fde6e48 --- /dev/null +++ b/internal/cleaner/emoji_test.go @@ -0,0 +1,402 @@ +package cleaner_test + +import ( + "context" + "errors" + "time" + + "github.com/superseriousbusiness/gotosocial/internal/config" + "github.com/superseriousbusiness/gotosocial/internal/db" + "github.com/superseriousbusiness/gotosocial/internal/gtscontext" + "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" +) + +func (suite *CleanerTestSuite) TestEmojiUncacheRemote() { + suite.testEmojiUncacheRemote( + context.Background(), + mapvals(suite.emojis), + ) +} + +func (suite *CleanerTestSuite) TestEmojiUncacheRemoteDryRun() { + suite.testEmojiUncacheRemote( + gtscontext.SetDryRun(context.Background()), + mapvals(suite.emojis), + ) +} + +func (suite *CleanerTestSuite) TestEmojiFixBroken() { + suite.testEmojiFixBroken( + context.Background(), + mapvals(suite.emojis), + ) +} + +func (suite *CleanerTestSuite) TestEmojiFixBrokenDryRun() { + suite.testEmojiFixBroken( + gtscontext.SetDryRun(context.Background()), + mapvals(suite.emojis), + ) +} + +func (suite *CleanerTestSuite) TestEmojiPruneUnused() { + suite.testEmojiPruneUnused( + context.Background(), + mapvals(suite.emojis), + ) +} + +func (suite *CleanerTestSuite) TestEmojiPruneUnusedDryRun() { + suite.testEmojiPruneUnused( + gtscontext.SetDryRun(context.Background()), + mapvals(suite.emojis), + ) +} + +func (suite *CleanerTestSuite) TestEmojiFixCacheStates() { + suite.testEmojiFixCacheStates( + context.Background(), + mapvals(suite.emojis), + ) +} + +func (suite *CleanerTestSuite) TestEmojiFixCacheStatesDryRun() { + suite.testEmojiFixCacheStates( + gtscontext.SetDryRun(context.Background()), + mapvals(suite.emojis), + ) +} + +func (suite *CleanerTestSuite) testEmojiUncacheRemote(ctx context.Context, emojis []*gtsmodel.Emoji) { + var uncacheIDs []string + + // Test state. + t := suite.T() + + // Get max remote cache days to keep. + days := config.GetMediaRemoteCacheDays() + olderThan := time.Now().Add(-24 * time.Hour * time.Duration(days)) + + for _, emoji := range emojis { + // Check whether this emoji should be uncached. + ok, err := suite.shouldUncacheEmoji(ctx, emoji, olderThan) + if err != nil { + t.Fatalf("error checking whether emoji should be uncached: %v", err) + } + + if ok { + // Mark this emoji ID as to be uncached. + uncacheIDs = append(uncacheIDs, emoji.ID) + } + } + + // Attempt to uncache remote emojis. + found, err := suite.cleaner.Emoji().UncacheRemote(ctx, olderThan) + if err != nil { + t.Errorf("error uncaching remote emojis: %v", err) + return + } + + // Check expected were uncached. + if found != len(uncacheIDs) { + t.Errorf("expected %d emojis to be uncached, %d were", len(uncacheIDs), found) + return + } + + if gtscontext.DryRun(ctx) { + // nothing else to test. + return + } + + for _, id := range uncacheIDs { + // Fetch the emoji by ID that should now be uncached. + emoji, err := suite.state.DB.GetEmojiByID(ctx, id) + if err != nil { + t.Fatalf("error fetching emoji from database: %v", err) + } + + // Check cache state. + if *emoji.Cached { + t.Errorf("emoji %s@%s should have been uncached", emoji.Shortcode, emoji.Domain) + } + + // Check that the emoji files in storage have been deleted. + if ok, err := suite.state.Storage.Has(ctx, emoji.ImagePath); err != nil { + t.Fatalf("error checking storage for emoji: %v", err) + } else if ok { + t.Errorf("emoji %s@%s image path should not exist", emoji.Shortcode, emoji.Domain) + } else if ok, err := suite.state.Storage.Has(ctx, emoji.ImageStaticPath); err != nil { + t.Fatalf("error checking storage for emoji: %v", err) + } else if ok { + t.Errorf("emoji %s@%s image static path should not exist", emoji.Shortcode, emoji.Domain) + } + } +} + +func (suite *CleanerTestSuite) shouldUncacheEmoji(ctx context.Context, emoji *gtsmodel.Emoji, after time.Time) (bool, error) { + if emoji.ImageRemoteURL == "" { + // Local emojis are never uncached. + return false, nil + } + + if emoji.Cached == nil || !*emoji.Cached { + // Emoji is already uncached. + return false, nil + } + + // Get related accounts using this emoji (if any). + accounts, err := suite.state.DB.GetAccountsUsingEmoji(ctx, emoji.ID) + if err != nil { + return false, err + } + + // Check if accounts are recently updated. + for _, account := range accounts { + if account.FetchedAt.After(after) { + return false, nil + } + } + + // Get related statuses using this emoji (if any). + statuses, err := suite.state.DB.GetStatusesUsingEmoji(ctx, emoji.ID) + if err != nil { + return false, err + } + + // Check if statuses are recently updated. + for _, status := range statuses { + if status.FetchedAt.After(after) { + return false, nil + } + } + + return true, nil +} + +func (suite *CleanerTestSuite) testEmojiFixBroken(ctx context.Context, emojis []*gtsmodel.Emoji) { + var fixIDs []string + + // Test state. + t := suite.T() + + for _, emoji := range emojis { + // Check whether this emoji should be fixed. + ok, err := suite.shouldFixBrokenEmoji(ctx, emoji) + if err != nil { + t.Fatalf("error checking whether emoji should be fixed: %v", err) + } + + if ok { + // Mark this emoji ID as to be fixed. + fixIDs = append(fixIDs, emoji.ID) + } + } + + // Attempt to fix broken emojis. + found, err := suite.cleaner.Emoji().FixBroken(ctx) + if err != nil { + t.Errorf("error fixing broken emojis: %v", err) + return + } + + // Check expected were fixed. + if found != len(fixIDs) { + t.Errorf("expected %d emojis to be fixed, %d were", len(fixIDs), found) + return + } + + if gtscontext.DryRun(ctx) { + // nothing else to test. + return + } + + for _, id := range fixIDs { + // Fetch the emoji by ID that should now be fixed. + emoji, err := suite.state.DB.GetEmojiByID(ctx, id) + if err != nil { + t.Fatalf("error fetching emoji from database: %v", err) + } + + // Ensure category was cleared. + if emoji.CategoryID != "" { + t.Errorf("emoji %s@%s should have empty category", emoji.Shortcode, emoji.Domain) + } + } +} + +func (suite *CleanerTestSuite) shouldFixBrokenEmoji(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { + if emoji.CategoryID == "" { + // no category issue. + return false, nil + } + + // Get the related category for this emoji. + category, err := suite.state.DB.GetEmojiCategory(ctx, emoji.CategoryID) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + return false, nil + } + + return (category == nil), nil +} + +func (suite *CleanerTestSuite) testEmojiPruneUnused(ctx context.Context, emojis []*gtsmodel.Emoji) { + var pruneIDs []string + + // Test state. + t := suite.T() + + for _, emoji := range emojis { + // Check whether this emoji should be pruned. + ok, err := suite.shouldPruneEmoji(ctx, emoji) + if err != nil { + t.Fatalf("error checking whether emoji should be pruned: %v", err) + } + + if ok { + // Mark this emoji ID as to be pruned. + pruneIDs = append(pruneIDs, emoji.ID) + } + } + + // Attempt to prune emojis. + found, err := suite.cleaner.Emoji().PruneUnused(ctx) + if err != nil { + t.Errorf("error fixing broken emojis: %v", err) + return + } + + // Check expected were pruned. + if found != len(pruneIDs) { + t.Errorf("expected %d emojis to be pruned, %d were", len(pruneIDs), found) + return + } + + if gtscontext.DryRun(ctx) { + // nothing else to test. + return + } + + for _, id := range pruneIDs { + // Fetch the emoji by ID that should now be pruned. + emoji, err := suite.state.DB.GetEmojiByID(ctx, id) + if err != nil && !errors.Is(err, db.ErrNoEntries) { + t.Fatalf("error fetching emoji from database: %v", err) + } + + // Ensure gone. + if emoji != nil { + t.Errorf("emoji %s@%s should have been pruned", emoji.Shortcode, emoji.Domain) + } + } +} + +func (suite *CleanerTestSuite) shouldPruneEmoji(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { + if emoji.ImageRemoteURL == "" { + // Local emojis are never pruned. + return false, nil + } + + // Get related accounts using this emoji (if any). + accounts, err := suite.state.DB.GetAccountsUsingEmoji(ctx, emoji.ID) + if err != nil { + return false, err + } else if len(accounts) > 0 { + return false, nil + } + + // Get related statuses using this emoji (if any). + statuses, err := suite.state.DB.GetStatusesUsingEmoji(ctx, emoji.ID) + if err != nil { + return false, err + } else if len(statuses) > 0 { + return false, nil + } + + return true, nil +} + +func (suite *CleanerTestSuite) testEmojiFixCacheStates(ctx context.Context, emojis []*gtsmodel.Emoji) { + var fixIDs []string + + // Test state. + t := suite.T() + + for _, emoji := range emojis { + // Check whether this emoji should be fixed. + ok, err := suite.shouldFixEmojiCacheState(ctx, emoji) + if err != nil { + t.Fatalf("error checking whether emoji should be fixed: %v", err) + } + + if ok { + // Mark this emoji ID as to be fixed. + fixIDs = append(fixIDs, emoji.ID) + } + } + + // Attempt to fix broken emoji cache states. + found, err := suite.cleaner.Emoji().FixCacheStates(ctx) + if err != nil { + t.Errorf("error fixing broken emojis: %v", err) + return + } + + // Check expected were fixed. + if found != len(fixIDs) { + t.Errorf("expected %d emojis to be fixed, %d were", len(fixIDs), found) + return + } + + if gtscontext.DryRun(ctx) { + // nothing else to test. + return + } + + for _, id := range fixIDs { + // Fetch the emoji by ID that should now be fixed. + emoji, err := suite.state.DB.GetEmojiByID(ctx, id) + if err != nil { + t.Fatalf("error fetching emoji from database: %v", err) + } + + // Ensure emoji cache state has been fixed. + ok, err := suite.shouldFixEmojiCacheState(ctx, emoji) + if err != nil { + t.Fatalf("error checking whether emoji should be fixed: %v", err) + } else if ok { + t.Errorf("emoji %s@%s cache state should have been fixed", emoji.Shortcode, emoji.Domain) + } + } +} + +func (suite *CleanerTestSuite) shouldFixEmojiCacheState(ctx context.Context, emoji *gtsmodel.Emoji) (bool, error) { + // Check whether emoji image path exists. + haveImage, err := suite.state.Storage.Has(ctx, emoji.ImagePath) + if err != nil { + return false, err + } + + // Check whether emoji static path exists. + haveStatic, err := suite.state.Storage.Has(ctx, emoji.ImageStaticPath) + if err != nil { + return false, err + } + + switch exists := (haveImage && haveStatic); { + case emoji.Cached != nil && + *emoji.Cached && !exists: + // (cached can be nil in tests) + // Cached but missing files. + return true, nil + + case emoji.Cached != nil && + !*emoji.Cached && exists: + // (cached can be nil in tests) + // Uncached but unexpected files. + return true, nil + + default: + // No cache state issue. + return false, nil + } +} diff --git a/internal/cleaner/media.go b/internal/cleaner/media.go index 51a0aea6d..8b11a30bf 100644 --- a/internal/cleaner/media.go +++ b/internal/cleaner/media.go @@ -96,9 +96,9 @@ func (m *Media) PruneOrphaned(ctx context.Context) (int, error) { // All media files in storage will have path fitting: {$account}/{$type}/{$size}/{$id}.{$ext} if err := m.state.Storage.WalkKeys(ctx, func(ctx context.Context, path string) error { + // Check for our expected fileserver path format. if !regexes.FilePath.MatchString(path) { - // This is not our expected media - // path format, skip this one. + log.Warn(ctx, "unexpected storage item: %s", path) return nil } @@ -177,10 +177,10 @@ func (m *Media) UncacheRemote(ctx context.Context, olderThan time.Time) (int, er mostRecent := olderThan for { - // Fetch the next batch of attachments older than last-set time. - attachments, err := m.state.DB.GetRemoteOlderThan(ctx, olderThan, selectLimit) + // Fetch the next batch of cached attachments older than last-set time. + attachments, err := m.state.DB.GetCachedAttachmentsOlderThan(ctx, olderThan, selectLimit) if err != nil && !errors.Is(err, db.ErrNoEntries) { - return total, gtserror.Newf("error getting remote media: %w", err) + return total, gtserror.Newf("error getting remote attachments: %w", err) } if len(attachments) == 0 { @@ -220,9 +220,9 @@ func (m *Media) FixCacheStates(ctx context.Context) (int, error) { for { // Fetch the next batch of media attachments up to next max ID. - attachments, err := m.state.DB.GetAttachments(ctx, maxID, selectLimit) + attachments, err := m.state.DB.GetRemoteAttachments(ctx, maxID, selectLimit) if err != nil && !errors.Is(err, db.ErrNoEntries) { - return total, gtserror.Newf("error getting avatars / headers: %w", err) + return total, gtserror.Newf("error getting remote attachments: %w", err) } if len(attachments) == 0 { @@ -323,7 +323,7 @@ func (m *Media) pruneUnused(ctx context.Context, media *gtsmodel.MediaAttachment l := log.WithContext(ctx). WithField("media", media.ID) - // Check whether we have the required account for media. + // Check whether we have the required account for media. account, missing, err := m.getRelatedAccount(ctx, media) if err != nil { return false, err @@ -367,14 +367,6 @@ func (m *Media) pruneUnused(ctx context.Context, media *gtsmodel.MediaAttachment } func (m *Media) fixCacheState(ctx context.Context, media *gtsmodel.MediaAttachment) (bool, error) { - if !*media.Cached { - // We ignore uncached media, a - // false negative is a much better - // situation than a false positive, - // re-cache will just overwrite it. - return false, nil - } - // Start a log entry for media. l := log.WithContext(ctx). WithField("media", media.ID) @@ -397,15 +389,33 @@ func (m *Media) fixCacheState(ctx context.Context, media *gtsmodel.MediaAttachme return false, nil } - // So we know this a valid cached media entry. - // Check that we have the files on disk required.... - return m.checkFiles(ctx, func() error { - l.Debug("uncaching due to missing media") - return m.uncache(ctx, media) - }, + // Check whether files exist. + exist, err := m.haveFiles(ctx, media.Thumbnail.Path, media.File.Path, ) + if err != nil { + return false, err + } + + switch { + case *media.Cached && !exist: + // Mark as uncached if expected files don't exist. + l.Debug("cached=true exists=false => uncaching") + return true, m.uncache(ctx, media) + + case !*media.Cached && exist: + // Remove files if we don't expect them to exist. + l.Debug("cached=false exists=true => deleting") + _, err := m.removeFiles(ctx, + media.Thumbnail.Path, + media.File.Path, + ) + return true, err + + default: + return false, nil + } } func (m *Media) uncacheRemote(ctx context.Context, after time.Time, media *gtsmodel.MediaAttachment) (bool, error) { diff --git a/internal/db/account.go b/internal/db/account.go index 4a08918b0..2e113c35e 100644 --- a/internal/db/account.go +++ b/internal/db/account.go @@ -73,6 +73,9 @@ type Account interface { // GetAccountFaves fetches faves/likes created by the target accountID. GetAccountFaves(ctx context.Context, accountID string) ([]*gtsmodel.StatusFave, Error) + // GetAccountsUsingEmoji fetches all account models using emoji with given ID stored in their 'emojis' column. + GetAccountsUsingEmoji(ctx context.Context, emojiID string) ([]*gtsmodel.Account, error) + // GetAccountStatusesCount is a shortcut for the common action of counting statuses produced by accountID. CountAccountStatuses(ctx context.Context, accountID string) (int, Error) diff --git a/internal/db/bundb/account.go b/internal/db/bundb/account.go index 17339732e..179db6bb3 100644 --- a/internal/db/bundb/account.go +++ b/internal/db/bundb/account.go @@ -56,6 +56,27 @@ func (a *accountDB) GetAccountByID(ctx context.Context, id string) (*gtsmodel.Ac ) } +func (a *accountDB) GetAccountsByIDs(ctx context.Context, ids []string) ([]*gtsmodel.Account, error) { + accounts := make([]*gtsmodel.Account, 0, len(ids)) + + for _, id := range ids { + // Attempt to fetch account from DB. + account, err := a.GetAccountByID( + gtscontext.SetBarebones(ctx), + id, + ) + if err != nil { + log.Errorf(ctx, "error getting account %q: %v", id, err) + continue + } + + // Append account to return slice. + accounts = append(accounts, account) + } + + return accounts, nil +} + func (a *accountDB) GetAccountByURI(ctx context.Context, uri string) (*gtsmodel.Account, db.Error) { return a.getAccount( ctx, @@ -444,6 +465,34 @@ func (a *accountDB) GetAccountCustomCSSByUsername(ctx context.Context, username return account.CustomCSS, nil } +func (a *accountDB) GetAccountsUsingEmoji(ctx context.Context, emojiID string) ([]*gtsmodel.Account, error) { + var accountIDs []string + + // Create SELECT account query. + q := a.conn.NewSelect(). + Table("accounts"). + Column("id") + + // Append a WHERE LIKE clause to the query + // that checks the `emoji` column for any + // text containing this specific emoji ID. + // + // The reason we do this instead of doing a + // `WHERE ? IN (emojis)` is that the latter + // ends up being much MUCH slower, and the + // database stores this ID-array-column as + // text anyways, allowing a simple LIKE query. + q = whereLike(q, "emojis", emojiID) + + // Execute the query, scanning destination into accountIDs. + if _, err := q.Exec(ctx, &accountIDs); err != nil { + return nil, a.conn.ProcessError(err) + } + + // Convert account IDs into account objects. + return a.GetAccountsByIDs(ctx, accountIDs) +} + func (a *accountDB) GetAccountFaves(ctx context.Context, accountID string) ([]*gtsmodel.StatusFave, db.Error) { faves := new([]*gtsmodel.StatusFave) diff --git a/internal/db/bundb/emoji.go b/internal/db/bundb/emoji.go index 60c140264..321b5c0e7 100644 --- a/internal/db/bundb/emoji.go +++ b/internal/db/bundb/emoji.go @@ -126,12 +126,20 @@ func (e *emojiDB) DeleteEmojiByID(ctx context.Context, id string) db.Error { return err } - // Select all accounts using this emoji. - if _, err := tx.NewSelect(). + // Prepare SELECT accounts query. + aq := tx.NewSelect(). Table("accounts"). - Column("id"). - Where("? IN (emojis)", id). - Exec(ctx, &accountIDs); err != nil { + Column("id") + + // Append a WHERE LIKE clause to the query + // that checks the `emoji` column for any + // text containing this specific emoji ID. + // + // (see GetStatusesUsingEmoji() for details.) + aq = whereLike(aq, "emojis", id) + + // Select all accounts using this emoji into accountIDss. + if _, err := aq.Exec(ctx, &accountIDs); err != nil { return err } @@ -162,12 +170,20 @@ func (e *emojiDB) DeleteEmojiByID(ctx context.Context, id string) db.Error { } } - // Select all statuses using this emoji. - if _, err := tx.NewSelect(). + // Prepare SELECT statuses query. + sq := tx.NewSelect(). Table("statuses"). - Column("id"). - Where("? IN (emojis)", id). - Exec(ctx, &statusIDs); err != nil { + Column("id") + + // Append a WHERE LIKE clause to the query + // that checks the `emoji` column for any + // text containing this specific emoji ID. + // + // (see GetStatusesUsingEmoji() for details.) + sq = whereLike(sq, "emojis", id) + + // Select all statuses using this emoji into statusIDs. + if _, err := sq.Exec(ctx, &statusIDs); err != nil { return err } @@ -328,7 +344,7 @@ func (e *emojiDB) GetEmojisBy(ctx context.Context, domain string, includeDisable } func (e *emojiDB) GetEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error) { - emojiIDs := []string{} + var emojiIDs []string q := e.conn.NewSelect(). Table("emojis"). @@ -336,7 +352,7 @@ func (e *emojiDB) GetEmojis(ctx context.Context, maxID string, limit int) ([]*gt Order("id DESC") if maxID != "" { - q = q.Where("? < ?", bun.Ident("id"), maxID) + q = q.Where("id < ?", maxID) } if limit != 0 { @@ -350,6 +366,52 @@ func (e *emojiDB) GetEmojis(ctx context.Context, maxID string, limit int) ([]*gt return e.GetEmojisByIDs(ctx, emojiIDs) } +func (e *emojiDB) GetRemoteEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error) { + var emojiIDs []string + + q := e.conn.NewSelect(). + Table("emojis"). + Column("id"). + Where("domain IS NOT NULL"). + Order("id DESC") + + if maxID != "" { + q = q.Where("id < ?", maxID) + } + + if limit != 0 { + q = q.Limit(limit) + } + + if err := q.Scan(ctx, &emojiIDs); err != nil { + return nil, e.conn.ProcessError(err) + } + + return e.GetEmojisByIDs(ctx, emojiIDs) +} + +func (e *emojiDB) GetCachedEmojisOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.Emoji, error) { + var emojiIDs []string + + q := e.conn.NewSelect(). + Table("emojis"). + Column("id"). + Where("cached = true"). + Where("domain IS NOT NULL"). + Where("created_at < ?", olderThan). + Order("created_at DESC") + + if limit != 0 { + q = q.Limit(limit) + } + + if err := q.Scan(ctx, &emojiIDs); err != nil { + return nil, e.conn.ProcessError(err) + } + + return e.GetEmojisByIDs(ctx, emojiIDs) +} + func (e *emojiDB) GetUseableEmojis(ctx context.Context) ([]*gtsmodel.Emoji, db.Error) { emojiIDs := []string{} diff --git a/internal/db/bundb/media.go b/internal/db/bundb/media.go index 80a4f8bbe..c190df44a 100644 --- a/internal/db/bundb/media.go +++ b/internal/db/bundb/media.go @@ -232,29 +232,6 @@ func (m *mediaDB) DeleteAttachment(ctx context.Context, id string) error { return m.conn.ProcessError(err) } -func (m *mediaDB) GetRemoteOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, db.Error) { - attachmentIDs := []string{} - - q := m.conn. - NewSelect(). - TableExpr("? AS ?", bun.Ident("media_attachments"), bun.Ident("media_attachment")). - Column("media_attachment.id"). - Where("? = ?", bun.Ident("media_attachment.cached"), true). - Where("? < ?", bun.Ident("media_attachment.created_at"), olderThan). - Where("? IS NOT NULL", bun.Ident("media_attachment.remote_url")). - Order("media_attachment.created_at DESC") - - if limit != 0 { - q = q.Limit(limit) - } - - if err := q.Scan(ctx, &attachmentIDs); err != nil { - return nil, m.conn.ProcessError(err) - } - - return m.GetAttachmentsByIDs(ctx, attachmentIDs) -} - func (m *mediaDB) CountRemoteOlderThan(ctx context.Context, olderThan time.Time) (int, db.Error) { q := m.conn. NewSelect(). @@ -273,7 +250,7 @@ func (m *mediaDB) CountRemoteOlderThan(ctx context.Context, olderThan time.Time) } func (m *mediaDB) GetAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) { - attachmentIDs := []string{} + attachmentIDs := make([]string, 0, limit) q := m.conn.NewSelect(). Table("media_attachments"). @@ -281,7 +258,7 @@ func (m *mediaDB) GetAttachments(ctx context.Context, maxID string, limit int) ( Order("id DESC") if maxID != "" { - q = q.Where("? < ?", bun.Ident("id"), maxID) + q = q.Where("id < ?", maxID) } if limit != 0 { @@ -295,8 +272,55 @@ func (m *mediaDB) GetAttachments(ctx context.Context, maxID string, limit int) ( return m.GetAttachmentsByIDs(ctx, attachmentIDs) } +func (m *mediaDB) GetRemoteAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) { + attachmentIDs := make([]string, 0, limit) + + q := m.conn.NewSelect(). + Table("media_attachments"). + Column("id"). + Where("remote_url IS NOT NULL"). + Order("id DESC") + + if maxID != "" { + q = q.Where("id < ?", maxID) + } + + if limit != 0 { + q = q.Limit(limit) + } + + if err := q.Scan(ctx, &attachmentIDs); err != nil { + return nil, m.conn.ProcessError(err) + } + + return m.GetAttachmentsByIDs(ctx, attachmentIDs) +} + +func (m *mediaDB) GetCachedAttachmentsOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, db.Error) { + attachmentIDs := make([]string, 0, limit) + + q := m.conn. + NewSelect(). + Table("media_attachments"). + Column("id"). + Where("cached = true"). + Where("remote_url IS NOT NULL"). + Where("created_at < ?", olderThan). + Order("created_at DESC") + + if limit != 0 { + q = q.Limit(limit) + } + + if err := q.Scan(ctx, &attachmentIDs); err != nil { + return nil, m.conn.ProcessError(err) + } + + return m.GetAttachmentsByIDs(ctx, attachmentIDs) +} + func (m *mediaDB) GetAvatarsAndHeaders(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, db.Error) { - attachmentIDs := []string{} + attachmentIDs := make([]string, 0, limit) q := m.conn.NewSelect(). TableExpr("? AS ?", bun.Ident("media_attachments"), bun.Ident("media_attachment")). @@ -324,7 +348,7 @@ func (m *mediaDB) GetAvatarsAndHeaders(ctx context.Context, maxID string, limit } func (m *mediaDB) GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, db.Error) { - attachmentIDs := []string{} + attachmentIDs := make([]string, 0, limit) q := m.conn. NewSelect(). diff --git a/internal/db/bundb/media_test.go b/internal/db/bundb/media_test.go index 6b419022a..59b927119 100644 --- a/internal/db/bundb/media_test.go +++ b/internal/db/bundb/media_test.go @@ -38,7 +38,7 @@ func (suite *MediaTestSuite) TestGetAttachmentByID() { } func (suite *MediaTestSuite) TestGetOlder() { - attachments, err := suite.db.GetRemoteOlderThan(context.Background(), time.Now(), 20) + attachments, err := suite.db.GetCachedAttachmentsOlderThan(context.Background(), time.Now(), 20) suite.NoError(err) suite.Len(attachments, 2) } diff --git a/internal/db/bundb/migrations/20230724100000_emoji_cleanup.go b/internal/db/bundb/migrations/20230724100000_emoji_cleanup.go new file mode 100644 index 000000000..0ee501395 --- /dev/null +++ b/internal/db/bundb/migrations/20230724100000_emoji_cleanup.go @@ -0,0 +1,55 @@ +// GoToSocial +// Copyright (C) GoToSocial Authors admin@gotosocial.org +// SPDX-License-Identifier: AGPL-3.0-or-later +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package migrations + +import ( + "context" + "strings" + + "github.com/uptrace/bun" +) + +func init() { + up := func(ctx context.Context, db *bun.DB) error { + _, err := db.ExecContext(ctx, "ALTER TABLE emojis ADD COLUMN cached BOOLEAN DEFAULT false") + + if err != nil && !(strings.Contains(err.Error(), "already exists") || strings.Contains(err.Error(), "duplicate column name") || strings.Contains(err.Error(), "SQLSTATE 42701")) { + return err + } + + if _, err := db.NewUpdate(). + Table("emojis"). + Where("disabled = false"). + Set("cached = true"). + Exec(ctx); err != nil { + return err + } + + return nil + } + + down := func(ctx context.Context, db *bun.DB) error { + return db.RunInTx(ctx, nil, func(ctx context.Context, tx bun.Tx) error { + return nil + }) + } + + if err := Migrations.Register(up, down); err != nil { + panic(err) + } +} diff --git a/internal/db/bundb/report.go b/internal/db/bundb/report.go index e017a8906..ee8aa1cb3 100644 --- a/internal/db/bundb/report.go +++ b/internal/db/bundb/report.go @@ -149,7 +149,7 @@ func (r *reportDB) getReport(ctx context.Context, lookup string, dbQuery func(*g if len(report.StatusIDs) > 0 { // Fetch reported statuses - report.Statuses, err = r.state.DB.GetStatuses(ctx, report.StatusIDs) + report.Statuses, err = r.state.DB.GetStatusesByIDs(ctx, report.StatusIDs) if err != nil { return nil, fmt.Errorf("error getting status mentions: %w", err) } diff --git a/internal/db/bundb/search.go b/internal/db/bundb/search.go index c05ebb8b1..1d7eefd48 100644 --- a/internal/db/bundb/search.go +++ b/internal/db/bundb/search.go @@ -19,7 +19,6 @@ package bundb import ( "context" - "strings" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/id" @@ -61,40 +60,6 @@ type searchDB struct { state *state.State } -// replacer is a thread-safe string replacer which escapes -// common SQLite + Postgres `LIKE` wildcard chars using the -// escape character `\`. Initialized as a var in this package -// so it can be reused. -var replacer = strings.NewReplacer( - `\`, `\\`, // Escape char. - `%`, `\%`, // Zero or more char. - `_`, `\_`, // Exactly one char. -) - -// whereSubqueryLike appends a WHERE clause to the -// given SelectQuery q, which searches for matches -// of searchQuery in the given subQuery using LIKE. -func whereSubqueryLike( - q *bun.SelectQuery, - subQuery *bun.SelectQuery, - searchQuery string, -) *bun.SelectQuery { - // Escape existing wildcard + escape - // chars in the search query string. - searchQuery = replacer.Replace(searchQuery) - - // Add our own wildcards back in; search - // zero or more chars around the query. - searchQuery = `%` + searchQuery + `%` - - // Append resulting WHERE - // clause to the main query. - return q.Where( - "(?) LIKE ? ESCAPE ?", - subQuery, searchQuery, `\`, - ) -} - // Query example (SQLite): // // SELECT "account"."id" FROM "accounts" AS "account" @@ -167,7 +132,7 @@ func (s *searchDB) SearchForAccounts( // Search using LIKE for matches of query // string within accountText subquery. - q = whereSubqueryLike(q, accountTextSubq, query) + q = whereLike(q, accountTextSubq, query) if limit > 0 { // Limit amount of accounts returned. @@ -345,7 +310,7 @@ func (s *searchDB) SearchForStatuses( // Search using LIKE for matches of query // string within statusText subquery. - q = whereSubqueryLike(q, statusTextSubq, query) + q = whereLike(q, statusTextSubq, query) if limit > 0 { // Limit amount of statuses returned. diff --git a/internal/db/bundb/status.go b/internal/db/bundb/status.go index 0dffbabcc..ccfc9fd4b 100644 --- a/internal/db/bundb/status.go +++ b/internal/db/bundb/status.go @@ -58,18 +58,18 @@ func (s *statusDB) GetStatusByID(ctx context.Context, id string) (*gtsmodel.Stat ) } -func (s *statusDB) GetStatuses(ctx context.Context, ids []string) ([]*gtsmodel.Status, db.Error) { +func (s *statusDB) GetStatusesByIDs(ctx context.Context, ids []string) ([]*gtsmodel.Status, error) { statuses := make([]*gtsmodel.Status, 0, len(ids)) for _, id := range ids { - // Attempt fetch from DB + // Attempt to fetch status from DB. status, err := s.GetStatusByID(ctx, id) if err != nil { log.Errorf(ctx, "error getting status %q: %v", id, err) continue } - // Append status + // Append status to return slice. statuses = append(statuses, status) } @@ -429,6 +429,34 @@ func (s *statusDB) DeleteStatusByID(ctx context.Context, id string) db.Error { }) } +func (s *statusDB) GetStatusesUsingEmoji(ctx context.Context, emojiID string) ([]*gtsmodel.Status, error) { + var statusIDs []string + + // Create SELECT status query. + q := s.conn.NewSelect(). + Table("statuses"). + Column("id") + + // Append a WHERE LIKE clause to the query + // that checks the `emoji` column for any + // text containing this specific emoji ID. + // + // The reason we do this instead of doing a + // `WHERE ? IN (emojis)` is that the latter + // ends up being much MUCH slower, and the + // database stores this ID-array-column as + // text anyways, allowing a simple LIKE query. + q = whereLike(q, "emojis", emojiID) + + // Execute the query, scanning destination into statusIDs. + if _, err := q.Exec(ctx, &statusIDs); err != nil { + return nil, s.conn.ProcessError(err) + } + + // Convert status IDs into status objects. + return s.GetStatusesByIDs(ctx, statusIDs) +} + func (s *statusDB) GetStatusParents(ctx context.Context, status *gtsmodel.Status, onlyDirect bool) ([]*gtsmodel.Status, db.Error) { if onlyDirect { // Only want the direct parent, no further than first level diff --git a/internal/db/bundb/status_test.go b/internal/db/bundb/status_test.go index cab7501f9..a69608796 100644 --- a/internal/db/bundb/status_test.go +++ b/internal/db/bundb/status_test.go @@ -50,13 +50,13 @@ func (suite *StatusTestSuite) TestGetStatusByID() { suite.True(*status.Likeable) } -func (suite *StatusTestSuite) TestGetStatusesByID() { +func (suite *StatusTestSuite) TestGetStatusesByIDs() { ids := []string{ suite.testStatuses["local_account_1_status_1"].ID, suite.testStatuses["local_account_2_status_3"].ID, } - statuses, err := suite.db.GetStatuses(context.Background(), ids) + statuses, err := suite.db.GetStatusesByIDs(context.Background(), ids) if err != nil { suite.FailNow(err.Error()) } diff --git a/internal/db/bundb/util.go b/internal/db/bundb/util.go index 06bb289d3..bdd45d1e7 100644 --- a/internal/db/bundb/util.go +++ b/internal/db/bundb/util.go @@ -18,10 +18,46 @@ package bundb import ( + "strings" + "github.com/superseriousbusiness/gotosocial/internal/db" "github.com/uptrace/bun" ) +// likeEscaper is a thread-safe string replacer which escapes +// common SQLite + Postgres `LIKE` wildcard chars using the +// escape character `\`. Initialized as a var in this package +// so it can be reused. +var likeEscaper = strings.NewReplacer( + `\`, `\\`, // Escape char. + `%`, `\%`, // Zero or more char. + `_`, `\_`, // Exactly one char. +) + +// whereSubqueryLike appends a WHERE clause to the +// given SelectQuery, which searches for matches +// of `search` in the given subQuery using LIKE. +func whereLike( + query *bun.SelectQuery, + subject interface{}, + search string, +) *bun.SelectQuery { + // Escape existing wildcard + escape + // chars in the search query string. + search = likeEscaper.Replace(search) + + // Add our own wildcards back in; search + // zero or more chars around the query. + search = `%` + search + `%` + + // Append resulting WHERE + // clause to the main query. + return query.Where( + "(?) LIKE ? ESCAPE ?", + subject, search, `\`, + ) +} + // updateWhere parses []db.Where and adds it to the given update query. func updateWhere(q *bun.UpdateQuery, where []db.Where) { for _, w := range where { diff --git a/internal/db/emoji.go b/internal/db/emoji.go index 5dcad9ece..67d7f7232 100644 --- a/internal/db/emoji.go +++ b/internal/db/emoji.go @@ -19,6 +19,7 @@ package db import ( "context" + "time" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" ) @@ -40,8 +41,16 @@ type Emoji interface { GetEmojisByIDs(ctx context.Context, ids []string) ([]*gtsmodel.Emoji, Error) // GetUseableEmojis gets all emojis which are useable by accounts on this instance. GetUseableEmojis(ctx context.Context) ([]*gtsmodel.Emoji, Error) - // GetEmojis ... + + // GetEmojis fetches all emojis with IDs less than 'maxID', up to a maximum of 'limit' emojis. GetEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error) + + // GetRemoteEmojis fetches all remote emojis with IDs less than 'maxID', up to a maximum of 'limit' emojis. + GetRemoteEmojis(ctx context.Context, maxID string, limit int) ([]*gtsmodel.Emoji, error) + + // GetCachedEmojisOlderThan fetches all cached remote emojis with 'updated_at' greater than 'olderThan', up to a maximum of 'limit' emojis. + GetCachedEmojisOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.Emoji, error) + // GetEmojisBy gets emojis based on given parameters. Useful for admin actions. GetEmojisBy(ctx context.Context, domain string, includeDisabled bool, includeEnabled bool, shortcode string, maxShortcodeDomain string, minShortcodeDomain string, limit int) ([]*gtsmodel.Emoji, error) // GetEmojiByID gets a specific emoji by its database ID. diff --git a/internal/db/media.go b/internal/db/media.go index 01bca1748..5fb18a8fe 100644 --- a/internal/db/media.go +++ b/internal/db/media.go @@ -44,12 +44,12 @@ type Media interface { // GetAttachments ... GetAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) - // GetRemoteOlderThan gets limit n remote media attachments (including avatars and headers) older than the given - // olderThan time. These will be returned in order of attachment.created_at descending (newest to oldest in other words). - // - // The selected media attachments will be those with both a URL and a RemoteURL filled in. - // In other words, media attachments that originated remotely, and that we currently have cached locally. - GetRemoteOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, Error) + // GetRemoteAttachments ... + GetRemoteAttachments(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, error) + + // GetCachedAttachmentsOlderThan gets limit n remote attachments (including avatars and headers) older than + // the given time. These will be returned in order of attachment.created_at descending (i.e. newest to oldest). + GetCachedAttachmentsOlderThan(ctx context.Context, olderThan time.Time, limit int) ([]*gtsmodel.MediaAttachment, Error) // CountRemoteOlderThan is like GetRemoteOlderThan, except instead of getting limit n attachments, // it just counts how many remote attachments in the database (including avatars and headers) meet diff --git a/internal/db/status.go b/internal/db/status.go index fdce19094..c0e330260 100644 --- a/internal/db/status.go +++ b/internal/db/status.go @@ -28,9 +28,6 @@ type Status interface { // GetStatusByID returns one status from the database, with no rel fields populated, only their linking ID / URIs GetStatusByID(ctx context.Context, id string) (*gtsmodel.Status, Error) - // GetStatuses gets a slice of statuses corresponding to the given status IDs. - GetStatuses(ctx context.Context, ids []string) ([]*gtsmodel.Status, Error) - // GetStatusByURI returns one status from the database, with no rel fields populated, only their linking ID / URIs GetStatusByURI(ctx context.Context, uri string) (*gtsmodel.Status, Error) @@ -58,6 +55,12 @@ type Status interface { // CountStatusFaves returns the amount of faves/likes recorded for a status, or an error if something goes wrong CountStatusFaves(ctx context.Context, status *gtsmodel.Status) (int, Error) + // GetStatuses gets a slice of statuses corresponding to the given status IDs. + GetStatusesByIDs(ctx context.Context, ids []string) ([]*gtsmodel.Status, error) + + // GetStatusesUsingEmoji fetches all status models using emoji with given ID stored in their 'emojis' column. + GetStatusesUsingEmoji(ctx context.Context, emojiID string) ([]*gtsmodel.Status, error) + // GetStatusParents gets the parent statuses of a given status. // // If onlyDirect is true, only the immediate parent will be returned. diff --git a/internal/gtsmodel/emoji.go b/internal/gtsmodel/emoji.go index 1e21c7d1e..0fcc3247b 100644 --- a/internal/gtsmodel/emoji.go +++ b/internal/gtsmodel/emoji.go @@ -42,4 +42,5 @@ type Emoji struct { VisibleInPicker *bool `validate:"-" bun:",nullzero,notnull,default:true"` // Is this emoji visible in the admin emoji picker? Category *EmojiCategory `validate:"-" bun:"rel:belongs-to"` // In which emoji category is this emoji visible? CategoryID string `validate:"omitempty,ulid" bun:"type:CHAR(26),nullzero"` // ID of the category this emoji belongs to. + Cached *bool `validate:"-" bun:",nullzero,notnull,default:false"` } diff --git a/internal/media/manager.go b/internal/media/manager.go index 1d673128a..afe686cb9 100644 --- a/internal/media/manager.go +++ b/internal/media/manager.go @@ -51,12 +51,7 @@ type Manager struct { state *state.State } -// NewManager returns a media manager with the given db and underlying storage. -// -// A worker pool will also be initialized for the manager, to ensure that only -// a limited number of media will be processed in parallel. The numbers of workers -// is determined from the $GOMAXPROCS environment variable (usually no. CPU cores). -// See internal/concurrency.NewWorkerPool() documentation for further information. +// NewManager returns a media manager with given state. func NewManager(state *state.State) *Manager { m := &Manager{state: state} return m @@ -159,7 +154,7 @@ func (m *Manager) PreProcessMedia(ctx context.Context, data DataFunc, accountID return processingMedia, nil } -// PreProcessMediaRecache refetches, reprocesses, and recaches an existing attachment that has been uncached via pruneRemote. +// PreProcessMediaRecache refetches, reprocesses, and recaches an existing attachment that has been uncached via cleaner pruning. // // Note: unlike ProcessMedia, this will NOT queue the media to be asychronously processed. func (m *Manager) PreProcessMediaRecache(ctx context.Context, data DataFunc, attachmentID string) (*ProcessingMedia, error) { @@ -209,17 +204,18 @@ func (m *Manager) ProcessMedia(ctx context.Context, data DataFunc, accountID str // // Note: unlike ProcessEmoji, this will NOT queue the emoji to be asynchronously processed. func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode string, emojiID string, uri string, ai *AdditionalEmojiInfo, refresh bool) (*ProcessingEmoji, error) { - instanceAccount, err := m.state.DB.GetInstanceAccount(ctx, "") - if err != nil { - return nil, gtserror.Newf("error fetching this instance account from the db: %s", err) - } - var ( newPathID string emoji *gtsmodel.Emoji now = time.Now() ) + // Fetch the local instance account for emoji path generation. + instanceAcc, err := m.state.DB.GetInstanceAccount(ctx, "") + if err != nil { + return nil, gtserror.Newf("error fetching instance account: %w", err) + } + if refresh { // Look for existing emoji by given ID. emoji, err = m.state.DB.GetEmojiByID(ctx, emojiID) @@ -261,8 +257,8 @@ func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode } // store + serve static image at new path ID - emoji.ImageStaticURL = uris.GenerateURIForAttachment(instanceAccount.ID, string(TypeEmoji), string(SizeStatic), newPathID, mimePng) - emoji.ImageStaticPath = fmt.Sprintf("%s/%s/%s/%s.%s", instanceAccount.ID, TypeEmoji, SizeStatic, newPathID, mimePng) + emoji.ImageStaticURL = uris.GenerateURIForAttachment(instanceAcc.ID, string(TypeEmoji), string(SizeStatic), newPathID, mimePng) + emoji.ImageStaticPath = fmt.Sprintf("%s/%s/%s/%s.%s", instanceAcc.ID, TypeEmoji, SizeStatic, newPathID, mimePng) emoji.Shortcode = shortcode emoji.URI = uri @@ -278,12 +274,12 @@ func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode Domain: "", // assume our own domain unless told otherwise ImageRemoteURL: "", ImageStaticRemoteURL: "", - ImageURL: "", // we don't know yet - ImageStaticURL: uris.GenerateURIForAttachment(instanceAccount.ID, string(TypeEmoji), string(SizeStatic), emojiID, mimePng), // all static emojis are encoded as png - ImagePath: "", // we don't know yet - ImageStaticPath: fmt.Sprintf("%s/%s/%s/%s.%s", instanceAccount.ID, TypeEmoji, SizeStatic, emojiID, mimePng), // all static emojis are encoded as png - ImageContentType: "", // we don't know yet - ImageStaticContentType: mimeImagePng, // all static emojis are encoded as png + ImageURL: "", // we don't know yet + ImageStaticURL: uris.GenerateURIForAttachment(instanceAcc.ID, string(TypeEmoji), string(SizeStatic), emojiID, mimePng), // all static emojis are encoded as png + ImagePath: "", // we don't know yet + ImageStaticPath: fmt.Sprintf("%s/%s/%s/%s.%s", instanceAcc.ID, TypeEmoji, SizeStatic, emojiID, mimePng), // all static emojis are encoded as png + ImageContentType: "", // we don't know yet + ImageStaticContentType: mimeImagePng, // all static emojis are encoded as png ImageFileSize: 0, ImageStaticFileSize: 0, Disabled: &disabled, @@ -329,9 +325,8 @@ func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode } processingEmoji := &ProcessingEmoji{ - instAccID: instanceAccount.ID, emoji: emoji, - refresh: refresh, + existing: refresh, newPathID: newPathID, dataFn: data, mgr: m, @@ -340,6 +335,26 @@ func (m *Manager) PreProcessEmoji(ctx context.Context, data DataFunc, shortcode return processingEmoji, nil } +// PreProcessEmojiRecache refetches, reprocesses, and recaches an existing emoji that has been uncached via cleaner pruning. +// +// Note: unlike ProcessEmoji, this will NOT queue the emoji to be asychronously processed. +func (m *Manager) PreProcessEmojiRecache(ctx context.Context, data DataFunc, emojiID string) (*ProcessingEmoji, error) { + // get the existing emoji from the database. + emoji, err := m.state.DB.GetEmojiByID(ctx, emojiID) + if err != nil { + return nil, err + } + + processingEmoji := &ProcessingEmoji{ + emoji: emoji, + dataFn: data, + existing: true, // inidcate recache + mgr: m, + } + + return processingEmoji, nil +} + // ProcessEmoji will call PreProcessEmoji, followed by queuing the emoji to be processing in the emoji worker queue. func (m *Manager) ProcessEmoji(ctx context.Context, data DataFunc, shortcode string, id string, uri string, ai *AdditionalEmojiInfo, refresh bool) (*ProcessingEmoji, error) { // Create a new processing emoji object for this emoji request. diff --git a/internal/media/processingemoji.go b/internal/media/processingemoji.go index d3a1edbf8..1c7e60144 100644 --- a/internal/media/processingemoji.go +++ b/internal/media/processingemoji.go @@ -31,16 +31,16 @@ import ( "github.com/superseriousbusiness/gotosocial/internal/gtserror" "github.com/superseriousbusiness/gotosocial/internal/gtsmodel" "github.com/superseriousbusiness/gotosocial/internal/log" + "github.com/superseriousbusiness/gotosocial/internal/regexes" "github.com/superseriousbusiness/gotosocial/internal/uris" ) // ProcessingEmoji represents an emoji currently processing. It exposes // various functions for retrieving data from the process. type ProcessingEmoji struct { - instAccID string // instance account ID emoji *gtsmodel.Emoji // processing emoji details - refresh bool // whether this is an existing emoji being refreshed - newPathID string // new emoji path ID to use if refreshed + existing bool // indicates whether this is an existing emoji ID being refreshed / recached + newPathID string // new emoji path ID to use when being refreshed dataFn DataFunc // load-data function, returns media stream done bool // done is set when process finishes with non ctx canceled type error proc runners.Processor // proc helps synchronize only a singular running processing instance @@ -121,24 +121,9 @@ func (p *ProcessingEmoji) load(ctx context.Context) (*gtsmodel.Emoji, bool, erro return err } - if p.refresh { - columns := []string{ - "image_remote_url", - "image_static_remote_url", - "image_url", - "image_static_url", - "image_path", - "image_static_path", - "image_content_type", - "image_file_size", - "image_static_file_size", - "image_updated_at", - "shortcode", - "uri", - } - - // Existing emoji we're refreshing, so only need to update. - err = p.mgr.state.DB.UpdateEmoji(ctx, p.emoji, columns...) + if p.existing { + // Existing emoji we're updating, so only update. + err = p.mgr.state.DB.UpdateEmoji(ctx, p.emoji) return err } @@ -217,7 +202,7 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { var pathID string - if p.refresh { + if p.newPathID != "" { // This is a refreshed emoji with a new // path ID that this will be stored under. pathID = p.newPathID @@ -226,10 +211,13 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { pathID = p.emoji.ID } + // Determine instance account ID from already generated image static path. + instanceAccID := regexes.FilePath.FindStringSubmatch(p.emoji.ImageStaticPath)[1] + // Calculate emoji file path. p.emoji.ImagePath = fmt.Sprintf( "%s/%s/%s/%s.%s", - p.instAccID, + instanceAccID, TypeEmoji, SizeOriginal, pathID, @@ -258,12 +246,13 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { if err := p.mgr.state.Storage.Delete(ctx, p.emoji.ImagePath); err != nil { log.Errorf(ctx, "error removing too-large-emoji from storage: %v", err) } + return gtserror.Newf("calculated emoji size %s greater than max allowed %s", size, maxSize) } // Fill in remaining attachment data now it's stored. p.emoji.ImageURL = uris.GenerateURIForAttachment( - p.instAccID, + instanceAccID, string(TypeEmoji), string(SizeOriginal), pathID, @@ -271,6 +260,10 @@ func (p *ProcessingEmoji) store(ctx context.Context) error { ) p.emoji.ImageContentType = info.MIME.Value p.emoji.ImageFileSize = int(sz) + p.emoji.Cached = func() *bool { + ok := true + return &ok + }() return nil } @@ -297,6 +290,7 @@ func (p *ProcessingEmoji) finish(ctx context.Context) error { // This shouldn't already exist, but we do a check as it's worth logging. if have, _ := p.mgr.state.Storage.Has(ctx, p.emoji.ImageStaticPath); have { log.Warnf(ctx, "static emoji already exists at storage path: %s", p.emoji.ImagePath) + // Attempt to remove static existing emoji at storage path (might be broken / out-of-date) if err := p.mgr.state.Storage.Delete(ctx, p.emoji.ImageStaticPath); err != nil { return gtserror.Newf("error removing static emoji from storage: %v", err) diff --git a/internal/processing/admin/media.go b/internal/processing/admin/media.go index a457487b8..13dcb7d28 100644 --- a/internal/processing/admin/media.go +++ b/internal/processing/admin/media.go @@ -58,7 +58,7 @@ func (p *Processor) MediaPrune(ctx context.Context, mediaRemoteCacheDays int) gt go func() { ctx := context.Background() p.cleaner.Media().All(ctx, mediaRemoteCacheDays) - p.cleaner.Emoji().All(ctx) + p.cleaner.Emoji().All(ctx, mediaRemoteCacheDays) }() return nil diff --git a/internal/processing/fromclientapi.go b/internal/processing/fromclientapi.go index 159f09d1b..412403c44 100644 --- a/internal/processing/fromclientapi.go +++ b/internal/processing/fromclientapi.go @@ -982,7 +982,7 @@ func (p *Processor) federateReport(ctx context.Context, report *gtsmodel.Report) } if len(report.StatusIDs) > 0 && len(report.Statuses) == 0 { - statuses, err := p.state.DB.GetStatuses(ctx, report.StatusIDs) + statuses, err := p.state.DB.GetStatusesByIDs(ctx, report.StatusIDs) if err != nil { return fmt.Errorf("federateReport: error getting report statuses from database: %w", err) } diff --git a/internal/processing/media/getfile.go b/internal/processing/media/getfile.go index 27b08600e..386c3a9a2 100644 --- a/internal/processing/media/getfile.go +++ b/internal/processing/media/getfile.go @@ -118,7 +118,7 @@ func (p *Processor) getAttachmentContent(ctx context.Context, requestingAccount // retrieve attachment from the database and do basic checks on it a, err := p.state.DB.GetAttachmentByID(ctx, wantedMediaID) if err != nil { - return nil, gtserror.NewErrorNotFound(fmt.Errorf("attachment %s could not be taken from the db: %s", wantedMediaID, err)) + return nil, gtserror.NewErrorNotFound(fmt.Errorf("attachment %s could not be taken from the db: %w", wantedMediaID, err)) } if a.AccountID != owningAccountID { @@ -131,7 +131,7 @@ func (p *Processor) getAttachmentContent(ctx context.Context, requestingAccount // 2. we need to fetch it again using a transport and the media manager remoteMediaIRI, err := url.Parse(a.RemoteURL) if err != nil { - return nil, gtserror.NewErrorNotFound(fmt.Errorf("error parsing remote media iri %s: %s", a.RemoteURL, err)) + return nil, gtserror.NewErrorNotFound(fmt.Errorf("error parsing remote media iri %s: %w", a.RemoteURL, err)) } // use an empty string as requestingUsername to use the instance account, unless the request for this @@ -151,24 +151,24 @@ func (p *Processor) getAttachmentContent(ctx context.Context, requestingAccount // recache operation -> holding open a media worker. // ] - dataFn := func(innerCtx context.Context) (io.ReadCloser, int64, error) { - t, err := p.transportController.NewTransportForUsername(innerCtx, requestingUsername) + dataFn := func(ctx context.Context) (io.ReadCloser, int64, error) { + t, err := p.transportController.NewTransportForUsername(ctx, requestingUsername) if err != nil { return nil, 0, err } - return t.DereferenceMedia(gtscontext.SetFastFail(innerCtx), remoteMediaIRI) + return t.DereferenceMedia(gtscontext.SetFastFail(ctx), remoteMediaIRI) } // Start recaching this media with the prepared data function. processingMedia, err := p.mediaManager.PreProcessMediaRecache(ctx, dataFn, wantedMediaID) if err != nil { - return nil, gtserror.NewErrorNotFound(fmt.Errorf("error recaching media: %s", err)) + return nil, gtserror.NewErrorNotFound(fmt.Errorf("error recaching media: %w", err)) } // Load attachment and block until complete a, err = processingMedia.LoadAttachment(ctx) if err != nil { - return nil, gtserror.NewErrorNotFound(fmt.Errorf("error loading recached attachment: %s", err)) + return nil, gtserror.NewErrorNotFound(fmt.Errorf("error loading recached attachment: %w", err)) } } @@ -205,17 +205,53 @@ func (p *Processor) getEmojiContent(ctx context.Context, fileName string, owning // for using the static URL rather than full size url // is that static emojis are always encoded as png, // so this is more reliable than using full size url - imageStaticURL := uris.GenerateURIForAttachment(owningAccountID, string(media.TypeEmoji), string(media.SizeStatic), fileName, "png") + imageStaticURL := uris.GenerateURIForAttachment( + owningAccountID, + string(media.TypeEmoji), + string(media.SizeStatic), + fileName, + "png", + ) e, err := p.state.DB.GetEmojiByStaticURL(ctx, imageStaticURL) if err != nil { - return nil, gtserror.NewErrorNotFound(fmt.Errorf("emoji %s could not be taken from the db: %s", fileName, err)) + return nil, gtserror.NewErrorNotFound(fmt.Errorf("emoji %s could not be taken from the db: %w", fileName, err)) } if *e.Disabled { return nil, gtserror.NewErrorNotFound(fmt.Errorf("emoji %s has been disabled", fileName)) } + if !*e.Cached { + // if we don't have it cached, then we can assume two things: + // 1. this is remote emoji, since local emoji should never be uncached + // 2. we need to fetch it again using a transport and the media manager + remoteURL, err := url.Parse(e.ImageRemoteURL) + if err != nil { + return nil, gtserror.NewErrorNotFound(fmt.Errorf("error parsing remote emoji iri %s: %w", e.ImageRemoteURL, err)) + } + + dataFn := func(ctx context.Context) (io.ReadCloser, int64, error) { + t, err := p.transportController.NewTransportForUsername(ctx, "") + if err != nil { + return nil, 0, err + } + return t.DereferenceMedia(gtscontext.SetFastFail(ctx), remoteURL) + } + + // Start recaching this emoji with the prepared data function. + processingEmoji, err := p.mediaManager.PreProcessEmojiRecache(ctx, dataFn, e.ID) + if err != nil { + return nil, gtserror.NewErrorNotFound(fmt.Errorf("error recaching emoji: %w", err)) + } + + // Load attachment and block until complete + e, err = processingEmoji.LoadEmoji(ctx) + if err != nil { + return nil, gtserror.NewErrorNotFound(fmt.Errorf("error loading recached emoji: %w", err)) + } + } + switch emojiSize { case media.SizeOriginal: emojiContent.ContentType = e.ImageContentType diff --git a/internal/processing/report/create.go b/internal/processing/report/create.go index 9faffd2d4..a6cce8e80 100644 --- a/internal/processing/report/create.go +++ b/internal/processing/report/create.go @@ -51,7 +51,7 @@ func (p *Processor) Create(ctx context.Context, account *gtsmodel.Account, form } // fetch statuses by IDs given in the report form (noop if no statuses given) - statuses, err := p.state.DB.GetStatuses(ctx, form.StatusIDs) + statuses, err := p.state.DB.GetStatusesByIDs(ctx, form.StatusIDs) if err != nil { err = fmt.Errorf("db error fetching report target statuses: %w", err) return nil, gtserror.NewErrorInternalError(err) diff --git a/internal/regexes/regexes.go b/internal/regexes/regexes.go index 88c832508..347a8a98b 100644 --- a/internal/regexes/regexes.go +++ b/internal/regexes/regexes.go @@ -70,7 +70,7 @@ const ( statusesPath = userPathPrefix + `/` + statuses + `/(` + ulid + `)$` blockPath = userPathPrefix + `/` + blocks + `/(` + ulid + `)$` reportPath = `^/?` + reports + `/(` + ulid + `)$` - filePath = `^/?(` + ulid + `)/([a-z]+)/([a-z]+)/(` + ulid + `)\.([a-z]+)$` + filePath = `^/?(` + ulid + `)/([a-z]+)/([a-z]+)/(` + ulid + `)\.([a-z0-9]+)$` ) var ( diff --git a/internal/storage/storage.go b/internal/storage/storage.go index ea8184881..588c586d8 100644 --- a/internal/storage/storage.go +++ b/internal/storage/storage.go @@ -97,6 +97,9 @@ func (d *Driver) Has(ctx context.Context, key string) (bool, error) { func (d *Driver) WalkKeys(ctx context.Context, walk func(context.Context, string) error) error { return d.Storage.WalkKeys(ctx, storage.WalkKeysOptions{ WalkFn: func(ctx context.Context, entry storage.Entry) error { + if entry.Key == "store.lock" { + return nil // skip this. + } return walk(ctx, entry.Key) }, }) diff --git a/internal/typeutils/internaltofrontend.go b/internal/typeutils/internaltofrontend.go index 17b8047e9..03d0bfcab 100644 --- a/internal/typeutils/internaltofrontend.go +++ b/internal/typeutils/internaltofrontend.go @@ -1122,7 +1122,7 @@ func (c *converter) ReportToAdminAPIReport(ctx context.Context, r *gtsmodel.Repo statuses := make([]*apimodel.Status, 0, len(r.StatusIDs)) if len(r.StatusIDs) != 0 && len(r.Statuses) == 0 { - r.Statuses, err = c.db.GetStatuses(ctx, r.StatusIDs) + r.Statuses, err = c.db.GetStatusesByIDs(ctx, r.StatusIDs) if err != nil { return nil, fmt.Errorf("ReportToAdminAPIReport: error getting statuses from the db: %w", err) } diff --git a/test/run-postgres.sh b/test/run-postgres.sh new file mode 100755 index 000000000..54e4970ed --- /dev/null +++ b/test/run-postgres.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +set -e + +DB_NAME='postgres' +DB_USER='postgres' +DB_PASS='postgres' +DB_PORT=5432 + +# Start postgres container +CID=$(docker run --detach \ + --env "POSTGRES_DB=${DB_NAME}" \ + --env "POSTGRES_USER=${DB_USER}" \ + --env "POSTGRES_PASSWORD=${DB_PASS}" \ + --env "POSTGRES_HOST_AUTH_METHOD=trust" \ + --env "PGHOST=0.0.0.0" \ + --env "PGPORT=${DB_PORT}" \ + 'postgres:latest') + +# On exit kill the container +trap "docker kill ${CID}" exit + +sleep 5 +#docker exec "$CID" psql --user "$DB_USER" --password "$DB_PASS" -c "CREATE DATABASE \"${DB_NAME}\" WITH LOCALE \"C.UTF-8\" TEMPLATE \"template0\";" +docker exec "$CID" psql --user "$DB_USER" --password "$DB_PASS" -c "GRANT ALL PRIVILEGES ON DATABASE \"${DB_NAME}\" TO \"${DB_USER}\";" + +# Get running container IP +IP=$(docker container inspect "${CID}" \ + --format '{{ .NetworkSettings.IPAddress }}') + +GTS_DB_TYPE=postgres \ +GTS_DB_ADDRESS=${IP} \ +GTS_DB_PORT=${DB_PORT} \ +GTS_DB_USER=${DB_USER} \ +GTS_DB_PASSWORD=${DB_PASS} \ +GTS_DB_DATABASE=${DB_NAME} \ +go test ./... -p 1 ${@} \ No newline at end of file diff --git a/test/run-sqlite.sh b/test/run-sqlite.sh new file mode 100755 index 000000000..fb5502432 --- /dev/null +++ b/test/run-sqlite.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +set -e + +GTS_DB_TYPE=sqlite \ +GTS_DB_ADDRESS=':memory:' \ +go test ./... ${@} \ No newline at end of file diff --git a/testrig/testmodels.go b/testrig/testmodels.go index c55b80e60..837869c83 100644 --- a/testrig/testmodels.go +++ b/testrig/testmodels.go @@ -1161,6 +1161,7 @@ func NewTestEmojis() map[string]*gtsmodel.Emoji { URI: "http://localhost:8080/emoji/01F8MH9H8E4VG3KDYJR9EGPXCQ", VisibleInPicker: TrueBool(), CategoryID: "01GGQ8V4993XK67B2JB396YFB7", + Cached: FalseBool(), }, "yell": { ID: "01GD5KP5CQEE1R3X43Y1EHS2CW", @@ -1183,6 +1184,7 @@ func NewTestEmojis() map[string]*gtsmodel.Emoji { URI: "http://fossbros-anonymous.io/emoji/01GD5KP5CQEE1R3X43Y1EHS2CW", VisibleInPicker: FalseBool(), CategoryID: "", + Cached: FalseBool(), }, } }