From 9e7d022a06779a03e3eaaadad6cc33423f46892b Mon Sep 17 00:00:00 2001
From: tobi <31960611+tsmethurst@users.noreply.github.com>
Date: Thu, 30 Jun 2022 12:22:10 +0200
Subject: [PATCH] [feature] Cleanup unattached local media (#680)
* add localUnattached db function
* add parseOlderThan util function
* add pruneunusedlocalattachments to media manager
* add unusedlocal pruning to schedule + admin call
* set number of days to keep as a const
* fix test
---
internal/db/bundb/media.go | 26 ++++++++
internal/db/bundb/media_test.go | 9 +++
internal/db/media.go | 5 ++
internal/media/manager.go | 26 +++++++-
internal/media/pruneremote.go | 10 +--
internal/media/pruneunusedlocal.go | 86 +++++++++++++++++++++++++
internal/media/pruneunusedlocal_test.go | 75 +++++++++++++++++++++
internal/media/util.go | 17 +++++
internal/processing/admin/mediaprune.go | 9 +++
9 files changed, 253 insertions(+), 10 deletions(-)
create mode 100644 internal/media/pruneunusedlocal.go
create mode 100644 internal/media/pruneunusedlocal_test.go
diff --git a/internal/db/bundb/media.go b/internal/db/bundb/media.go
index fc3280ddf..71433b901 100644
--- a/internal/db/bundb/media.go
+++ b/internal/db/bundb/media.go
@@ -98,3 +98,29 @@ func (m *mediaDB) GetAvatarsAndHeaders(ctx context.Context, maxID string, limit
return attachments, nil
}
+
+func (m *mediaDB) GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, maxID string, limit int) ([]*gtsmodel.MediaAttachment, db.Error) {
+ attachments := []*gtsmodel.MediaAttachment{}
+
+ q := m.newMediaQ(&attachments).
+ Where("media_attachment.cached = true").
+ Where("media_attachment.avatar = false").
+ Where("media_attachment.header = false").
+ Where("media_attachment.created_at < ?", olderThan).
+ Where("media_attachment.remote_url IS NULL").
+ Where("media_attachment.status_id IS NULL")
+
+ if maxID != "" {
+ q = q.Where("media_attachment.id < ?", maxID)
+ }
+
+ if limit != 0 {
+ q = q.Limit(limit)
+ }
+
+ if err := q.Scan(ctx); err != nil {
+ return nil, m.conn.ProcessError(err)
+ }
+
+ return attachments, nil
+}
diff --git a/internal/db/bundb/media_test.go b/internal/db/bundb/media_test.go
index f1809b3fb..d6a4981f8 100644
--- a/internal/db/bundb/media_test.go
+++ b/internal/db/bundb/media_test.go
@@ -24,6 +24,7 @@ import (
"time"
"github.com/stretchr/testify/suite"
+ "github.com/superseriousbusiness/gotosocial/testrig"
)
type MediaTestSuite struct {
@@ -51,6 +52,14 @@ func (suite *MediaTestSuite) TestGetAvisAndHeaders() {
suite.Len(attachments, 2)
}
+func (suite *MediaTestSuite) TestGetLocalUnattachedOlderThan() {
+ ctx := context.Background()
+
+ attachments, err := suite.db.GetLocalUnattachedOlderThan(ctx, testrig.TimeMustParse("2090-06-04T13:12:00Z"), "", 10)
+ suite.NoError(err)
+ suite.Len(attachments, 1)
+}
+
func TestMediaTestSuite(t *testing.T) {
suite.Run(t, new(MediaTestSuite))
}
diff --git a/internal/db/media.go b/internal/db/media.go
index 636fc61f2..2f9ed79dc 100644
--- a/internal/db/media.go
+++ b/internal/db/media.go
@@ -38,4 +38,9 @@ type Media interface {
// GetAvatarsAndHeaders fetches limit n avatars and headers with an id < maxID. These headers
// and avis may be in use or not; the caller should check this if it's important.
GetAvatarsAndHeaders(ctx context.Context, maxID string, limit int) ([]*gtsmodel.MediaAttachment, Error)
+ // GetLocalUnattachedOlderThan fetches limit n local media attachments, older than the given time, which
+ // aren't header or avatars, and aren't attached to a status. In other words, attachments which were uploaded
+ // but never used for whatever reason, or attachments that were attached to a status which was subsequently
+ // deleted.
+ GetLocalUnattachedOlderThan(ctx context.Context, olderThan time.Time, maxID string, limit int) ([]*gtsmodel.MediaAttachment, Error)
}
diff --git a/internal/media/manager.go b/internal/media/manager.go
index 663f74123..aacf607cc 100644
--- a/internal/media/manager.go
+++ b/internal/media/manager.go
@@ -34,6 +34,10 @@ import (
// selectPruneLimit is the amount of media entries to select at a time from the db when pruning
const selectPruneLimit = 20
+// UnusedLocalAttachmentCacheDays is the amount of days to keep local media in storage if it
+// is not attached to a status, or was never attached to a status.
+const UnusedLocalAttachmentCacheDays = 3
+
// Manager provides an interface for managing media: parsing, storing, and retrieving media objects like photos, videos, and gifs.
type Manager interface {
// ProcessMedia begins the process of decoding and storing the given data as an attachment.
@@ -75,11 +79,16 @@ type Manager interface {
//
// The returned int is the amount of media that was pruned by this function.
PruneAllRemote(ctx context.Context, olderThanDays int) (int, error)
- // PruneAllMeta prunes unused meta media -- currently, this means unused avatars + headers, but can also be extended
- // to include things like attachments that were uploaded on this server but left unused, etc.
+ // PruneAllMeta prunes unused/out of date headers and avatars cached on this instance.
//
// The returned int is the amount of media that was pruned by this function.
PruneAllMeta(ctx context.Context) (int, error)
+ // PruneUnusedLocalAttachments prunes unused media attachments that were uploaded by
+ // a user on this instance, but never actually attached to a status, or attached but
+ // later detached.
+ //
+ // The returned int is the amount of media that was pruned by this function.
+ PruneUnusedLocalAttachments(ctx context.Context) (int, error)
// Stop stops the underlying worker pool of the manager. It should be called
// when closing GoToSocial in order to cleanly finish any in-progress jobs.
@@ -210,6 +219,19 @@ func scheduleCleanupJobs(m *manager) error {
return fmt.Errorf("error starting media manager meta cleanup job: %s", err)
}
+ if _, err := c.AddFunc("@midnight", func() {
+ begin := time.Now()
+ pruned, err := m.PruneUnusedLocalAttachments(pruneCtx)
+ if err != nil {
+ logrus.Errorf("media manager: error pruning unused local attachments: %s", err)
+ return
+ }
+ logrus.Infof("media manager: pruned %d unused local attachments in %s", pruned, time.Since(begin))
+ }); err != nil {
+ pruneCancel()
+ return fmt.Errorf("error starting media manager unused local attachments cleanup job: %s", err)
+ }
+
// start remote cache cleanup cronjob if configured
if mediaRemoteCacheDays := config.GetMediaRemoteCacheDays(); mediaRemoteCacheDays > 0 {
if _, err := c.AddFunc("@midnight", func() {
diff --git a/internal/media/pruneremote.go b/internal/media/pruneremote.go
index f7b77d32e..a01995740 100644
--- a/internal/media/pruneremote.go
+++ b/internal/media/pruneremote.go
@@ -21,7 +21,6 @@ package media
import (
"context"
"fmt"
- "time"
"codeberg.org/gruf/go-store/storage"
"github.com/sirupsen/logrus"
@@ -32,15 +31,10 @@ import (
func (m *manager) PruneAllRemote(ctx context.Context, olderThanDays int) (int, error) {
var totalPruned int
- // convert days into a duration string
- olderThanHoursString := fmt.Sprintf("%dh", olderThanDays*24)
- // parse the duration string into a duration
- olderThanHours, err := time.ParseDuration(olderThanHoursString)
+ olderThan, err := parseOlderThan(olderThanDays)
if err != nil {
- return totalPruned, fmt.Errorf("PruneAllRemote: %d", err)
+ return totalPruned, fmt.Errorf("PruneAllRemote: error parsing olderThanDays %d: %s", olderThanDays, err)
}
- // 'subtract' that from the time now to give our threshold
- olderThan := time.Now().Add(-olderThanHours)
logrus.Infof("PruneAllRemote: pruning media older than %s", olderThan)
// select 20 attachments at a time and prune them
diff --git a/internal/media/pruneunusedlocal.go b/internal/media/pruneunusedlocal.go
new file mode 100644
index 000000000..0c464e857
--- /dev/null
+++ b/internal/media/pruneunusedlocal.go
@@ -0,0 +1,86 @@
+/*
+ GoToSocial
+ Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+*/
+
+package media
+
+import (
+ "context"
+ "fmt"
+
+ "codeberg.org/gruf/go-store/storage"
+ "github.com/sirupsen/logrus"
+ "github.com/superseriousbusiness/gotosocial/internal/db"
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+)
+
+func (m *manager) PruneUnusedLocalAttachments(ctx context.Context) (int, error) {
+ var totalPruned int
+ var maxID string
+ var attachments []*gtsmodel.MediaAttachment
+ var err error
+
+ olderThan, err := parseOlderThan(UnusedLocalAttachmentCacheDays)
+ if err != nil {
+ return totalPruned, fmt.Errorf("PruneUnusedLocalAttachments: error parsing olderThanDays %d: %s", UnusedLocalAttachmentCacheDays, err)
+ }
+ logrus.Infof("PruneUnusedLocalAttachments: pruning unused local attachments older than %s", olderThan)
+
+ // select 20 attachments at a time and prune them
+ for attachments, err = m.db.GetLocalUnattachedOlderThan(ctx, olderThan, maxID, selectPruneLimit); err == nil && len(attachments) != 0; attachments, err = m.db.GetLocalUnattachedOlderThan(ctx, olderThan, maxID, selectPruneLimit) {
+ // use the id of the last attachment in the slice as the next 'maxID' value
+ l := len(attachments)
+ maxID = attachments[l-1].ID
+ logrus.Tracef("PruneUnusedLocalAttachments: got %d unused local attachments older than %s with maxID < %s", l, olderThan, maxID)
+
+ for _, attachment := range attachments {
+ if err := m.pruneOneLocal(ctx, attachment); err != nil {
+ return totalPruned, err
+ }
+ totalPruned++
+ }
+ }
+
+ // make sure we don't have a real error when we leave the loop
+ if err != nil && err != db.ErrNoEntries {
+ return totalPruned, err
+ }
+
+ logrus.Infof("PruneUnusedLocalAttachments: finished pruning: pruned %d entries", totalPruned)
+ return totalPruned, nil
+}
+
+func (m *manager) pruneOneLocal(ctx context.Context, attachment *gtsmodel.MediaAttachment) error {
+ if attachment.File.Path != "" {
+ // delete the full size attachment from storage
+ logrus.Tracef("pruneOneLocal: deleting %s", attachment.File.Path)
+ if err := m.storage.Delete(attachment.File.Path); err != nil && err != storage.ErrNotFound {
+ return err
+ }
+ }
+
+ if attachment.Thumbnail.Path != "" {
+ // delete the thumbnail from storage
+ logrus.Tracef("pruneOneLocal: deleting %s", attachment.Thumbnail.Path)
+ if err := m.storage.Delete(attachment.Thumbnail.Path); err != nil && err != storage.ErrNotFound {
+ return err
+ }
+ }
+
+ // delete the attachment completely
+ return m.db.DeleteByID(ctx, attachment.ID, attachment)
+}
diff --git a/internal/media/pruneunusedlocal_test.go b/internal/media/pruneunusedlocal_test.go
new file mode 100644
index 000000000..5f6bfbfba
--- /dev/null
+++ b/internal/media/pruneunusedlocal_test.go
@@ -0,0 +1,75 @@
+/*
+ GoToSocial
+ Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+*/
+
+package media_test
+
+import (
+ "context"
+ "testing"
+
+ "github.com/stretchr/testify/suite"
+ "github.com/superseriousbusiness/gotosocial/internal/db"
+)
+
+type PruneUnusedLocalTestSuite struct {
+ MediaStandardTestSuite
+}
+
+func (suite *PruneUnusedLocalTestSuite) TestPruneUnusedLocal() {
+ testAttachment := suite.testAttachments["local_account_1_unattached_1"]
+ suite.True(testAttachment.Cached)
+
+ totalPruned, err := suite.manager.PruneUnusedLocalAttachments(context.Background())
+ suite.NoError(err)
+ suite.Equal(1, totalPruned)
+
+ _, err = suite.db.GetAttachmentByID(context.Background(), testAttachment.ID)
+ suite.ErrorIs(err, db.ErrNoEntries)
+}
+
+func (suite *PruneUnusedLocalTestSuite) TestPruneRemoteTwice() {
+ totalPruned, err := suite.manager.PruneUnusedLocalAttachments(context.Background())
+ suite.NoError(err)
+ suite.Equal(1, totalPruned)
+
+ // final prune should prune nothing, since the first prune already happened
+ totalPrunedAgain, err := suite.manager.PruneUnusedLocalAttachments(context.Background())
+ suite.NoError(err)
+ suite.Equal(0, totalPrunedAgain)
+}
+
+func (suite *PruneUnusedLocalTestSuite) TestPruneOneNonExistent() {
+ ctx := context.Background()
+ testAttachment := suite.testAttachments["local_account_1_unattached_1"]
+
+ // Delete this attachment cached on disk
+ media, err := suite.db.GetAttachmentByID(ctx, testAttachment.ID)
+ suite.NoError(err)
+ suite.True(media.Cached)
+ err = suite.storage.Delete(media.File.Path)
+ suite.NoError(err)
+
+ // Now attempt to prune for item with db entry no file
+ totalPruned, err := suite.manager.PruneUnusedLocalAttachments(ctx)
+ suite.NoError(err)
+ suite.Equal(1, totalPruned)
+}
+
+func TestPruneUnusedLocalTestSuite(t *testing.T) {
+ suite.Run(t, &PruneUnusedLocalTestSuite{})
+}
diff --git a/internal/media/util.go b/internal/media/util.go
index 6dfcede89..9d62619f5 100644
--- a/internal/media/util.go
+++ b/internal/media/util.go
@@ -21,6 +21,7 @@ package media
import (
"errors"
"fmt"
+ "time"
"github.com/h2non/filetype"
"github.com/sirupsen/logrus"
@@ -128,3 +129,19 @@ func (l *logrusWrapper) Info(msg string, keysAndValues ...interface{}) {
func (l *logrusWrapper) Error(err error, msg string, keysAndValues ...interface{}) {
logrus.Error("media manager cron logger: ", err, msg, keysAndValues)
}
+
+func parseOlderThan(olderThanDays int) (time.Time, error) {
+ // convert days into a duration string
+ olderThanHoursString := fmt.Sprintf("%dh", olderThanDays*24)
+
+ // parse the duration string into a duration
+ olderThanHours, err := time.ParseDuration(olderThanHoursString)
+ if err != nil {
+ return time.Time{}, err
+ }
+
+ // 'subtract' that from the time now to give our threshold
+ olderThan := time.Now().Add(-olderThanHours)
+
+ return olderThan, nil
+}
diff --git a/internal/processing/admin/mediaprune.go b/internal/processing/admin/mediaprune.go
index 0e6abe028..1c3398b78 100644
--- a/internal/processing/admin/mediaprune.go
+++ b/internal/processing/admin/mediaprune.go
@@ -41,6 +41,15 @@ func (p *processor) MediaPrune(ctx context.Context, mediaRemoteCacheDays int) gt
}
}()
+ go func() {
+ pruned, err := p.mediaManager.PruneUnusedLocalAttachments(ctx)
+ if err != nil {
+ logrus.Errorf("MediaPrune: error pruning unused local cache: %s", err)
+ } else {
+ logrus.Infof("MediaPrune: pruned %d unused local cache entries", pruned)
+ }
+ }()
+
go func() {
pruned, err := p.mediaManager.PruneAllMeta(ctx)
if err != nil {