Merge pull request 'Refactor LFS GC functions' (#3056) from gusted/forgejo-lfs into forgejo
Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/3056 Reviewed-by: Otto <otto@codeberg.org> Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
This commit is contained in:
commit
668edc1948
|
@ -0,0 +1,7 @@
|
||||||
|
-
|
||||||
|
|
||||||
|
id: 1000
|
||||||
|
oid: 9d172e5c64b4f0024b9901ec6afe9ea052f3c9b6ff9f4b07956d8c48c86fca82
|
||||||
|
size: 25
|
||||||
|
repository_id: 1
|
||||||
|
created_unix: 1712309123
|
|
@ -337,32 +337,29 @@ func GetRepoLFSSize(ctx context.Context, repoID int64) (int64, error) {
|
||||||
func IterateRepositoryIDsWithLFSMetaObjects(ctx context.Context, f func(ctx context.Context, repoID, count int64) error) error {
|
func IterateRepositoryIDsWithLFSMetaObjects(ctx context.Context, f func(ctx context.Context, repoID, count int64) error) error {
|
||||||
batchSize := setting.Database.IterateBufferSize
|
batchSize := setting.Database.IterateBufferSize
|
||||||
sess := db.GetEngine(ctx)
|
sess := db.GetEngine(ctx)
|
||||||
id := int64(0)
|
var start int
|
||||||
type RepositoryCount struct {
|
type RepositoryCount struct {
|
||||||
RepositoryID int64
|
RepositoryID int64
|
||||||
Count int64
|
Count int64
|
||||||
}
|
}
|
||||||
for {
|
for {
|
||||||
counts := make([]*RepositoryCount, 0, batchSize)
|
counts := make([]*RepositoryCount, 0, batchSize)
|
||||||
sess.Select("repository_id, COUNT(id) AS count").
|
if err := sess.Select("repository_id, COUNT(id) AS count").
|
||||||
Table("lfs_meta_object").
|
Table("lfs_meta_object").
|
||||||
Where("repository_id > ?", id).
|
|
||||||
GroupBy("repository_id").
|
GroupBy("repository_id").
|
||||||
OrderBy("repository_id ASC")
|
OrderBy("repository_id ASC").Limit(batchSize, start).Find(&counts); err != nil {
|
||||||
|
|
||||||
if err := sess.Limit(batchSize, 0).Find(&counts); err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if len(counts) == 0 {
|
if len(counts) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
start += len(counts)
|
||||||
|
|
||||||
for _, count := range counts {
|
for _, count := range counts {
|
||||||
if err := f(ctx, count.RepositoryID, count.Count); err != nil {
|
if err := f(ctx, count.RepositoryID, count.Count); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
id = counts[len(counts)-1].RepositoryID
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -370,25 +367,17 @@ func IterateRepositoryIDsWithLFSMetaObjects(ctx context.Context, f func(ctx cont
|
||||||
type IterateLFSMetaObjectsForRepoOptions struct {
|
type IterateLFSMetaObjectsForRepoOptions struct {
|
||||||
OlderThan timeutil.TimeStamp
|
OlderThan timeutil.TimeStamp
|
||||||
UpdatedLessRecentlyThan timeutil.TimeStamp
|
UpdatedLessRecentlyThan timeutil.TimeStamp
|
||||||
OrderByUpdated bool
|
|
||||||
LoopFunctionAlwaysUpdates bool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo
|
// IterateLFSMetaObjectsForRepo provides a iterator for LFSMetaObjects per Repo
|
||||||
func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject, int64) error, opts *IterateLFSMetaObjectsForRepoOptions) error {
|
func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(context.Context, *LFSMetaObject) error, opts *IterateLFSMetaObjectsForRepoOptions) error {
|
||||||
var start int
|
|
||||||
batchSize := setting.Database.IterateBufferSize
|
batchSize := setting.Database.IterateBufferSize
|
||||||
engine := db.GetEngine(ctx)
|
engine := db.GetEngine(ctx)
|
||||||
type CountLFSMetaObject struct {
|
|
||||||
Count int64
|
|
||||||
LFSMetaObject `xorm:"extends"`
|
|
||||||
}
|
|
||||||
|
|
||||||
id := int64(0)
|
id := int64(0)
|
||||||
|
|
||||||
for {
|
for {
|
||||||
beans := make([]*CountLFSMetaObject, 0, batchSize)
|
beans := make([]*LFSMetaObject, 0, batchSize)
|
||||||
sess := engine.Table("lfs_meta_object").Select("`lfs_meta_object`.*, COUNT(`l1`.oid) AS `count`").
|
sess := engine.Table("lfs_meta_object").Select("`lfs_meta_object`.*").
|
||||||
Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid").
|
Join("INNER", "`lfs_meta_object` AS l1", "`lfs_meta_object`.oid = `l1`.oid").
|
||||||
Where("`lfs_meta_object`.repository_id = ?", repoID)
|
Where("`lfs_meta_object`.repository_id = ?", repoID)
|
||||||
if !opts.OlderThan.IsZero() {
|
if !opts.OlderThan.IsZero() {
|
||||||
|
@ -397,25 +386,19 @@ func IterateLFSMetaObjectsForRepo(ctx context.Context, repoID int64, f func(cont
|
||||||
if !opts.UpdatedLessRecentlyThan.IsZero() {
|
if !opts.UpdatedLessRecentlyThan.IsZero() {
|
||||||
sess.And("`lfs_meta_object`.updated_unix < ?", opts.UpdatedLessRecentlyThan)
|
sess.And("`lfs_meta_object`.updated_unix < ?", opts.UpdatedLessRecentlyThan)
|
||||||
}
|
}
|
||||||
sess.GroupBy("`lfs_meta_object`.id")
|
sess.GroupBy("`lfs_meta_object`.id").
|
||||||
if opts.OrderByUpdated {
|
And("`lfs_meta_object`.id > ?", id).
|
||||||
sess.OrderBy("`lfs_meta_object`.updated_unix ASC")
|
OrderBy("`lfs_meta_object`.id ASC")
|
||||||
} else {
|
|
||||||
sess.And("`lfs_meta_object`.id > ?", id)
|
if err := sess.Limit(batchSize, 0).Find(&beans); err != nil {
|
||||||
sess.OrderBy("`lfs_meta_object`.id ASC")
|
|
||||||
}
|
|
||||||
if err := sess.Limit(batchSize, start).Find(&beans); err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if len(beans) == 0 {
|
if len(beans) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if !opts.LoopFunctionAlwaysUpdates {
|
|
||||||
start += len(beans)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, bean := range beans {
|
for _, bean := range beans {
|
||||||
if err := f(ctx, &bean.LFSMetaObject, bean.Count); err != nil {
|
if err := f(ctx, bean); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,101 @@
|
||||||
|
// Copyright 2024 The Forgejo Authors. All rights reserved.
|
||||||
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
|
package git
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"code.gitea.io/gitea/models/db"
|
||||||
|
"code.gitea.io/gitea/models/unittest"
|
||||||
|
"code.gitea.io/gitea/modules/setting"
|
||||||
|
"code.gitea.io/gitea/modules/test"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestIterateRepositoryIDsWithLFSMetaObjects(t *testing.T) {
|
||||||
|
defer unittest.OverrideFixtures(
|
||||||
|
unittest.FixturesOptions{
|
||||||
|
Dir: filepath.Join(setting.AppWorkPath, "models/fixtures/"),
|
||||||
|
Base: setting.AppWorkPath,
|
||||||
|
Dirs: []string{"models/git/TestIterateRepositoryIDsWithLFSMetaObjects/"},
|
||||||
|
},
|
||||||
|
)()
|
||||||
|
assert.NoError(t, unittest.PrepareTestDatabase())
|
||||||
|
|
||||||
|
type repocount struct {
|
||||||
|
repoid int64
|
||||||
|
count int64
|
||||||
|
}
|
||||||
|
expected := []repocount{{1, 1}, {54, 4}}
|
||||||
|
|
||||||
|
t.Run("Normal batch size", func(t *testing.T) {
|
||||||
|
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 20)()
|
||||||
|
cases := []repocount{}
|
||||||
|
|
||||||
|
err := IterateRepositoryIDsWithLFSMetaObjects(db.DefaultContext, func(ctx context.Context, repoID, count int64) error {
|
||||||
|
cases = append(cases, repocount{repoID, count})
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.EqualValues(t, expected, cases)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Low batch size", func(t *testing.T) {
|
||||||
|
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 1)()
|
||||||
|
cases := []repocount{}
|
||||||
|
|
||||||
|
err := IterateRepositoryIDsWithLFSMetaObjects(db.DefaultContext, func(ctx context.Context, repoID, count int64) error {
|
||||||
|
cases = append(cases, repocount{repoID, count})
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.EqualValues(t, expected, cases)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIterateLFSMetaObjectsForRepo(t *testing.T) {
|
||||||
|
assert.NoError(t, unittest.PrepareTestDatabase())
|
||||||
|
|
||||||
|
expectedIDs := []int64{1, 2, 3, 4}
|
||||||
|
|
||||||
|
t.Run("Normal batch size", func(t *testing.T) {
|
||||||
|
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 20)()
|
||||||
|
actualIDs := []int64{}
|
||||||
|
|
||||||
|
err := IterateLFSMetaObjectsForRepo(db.DefaultContext, 54, func(ctx context.Context, lo *LFSMetaObject) error {
|
||||||
|
actualIDs = append(actualIDs, lo.ID)
|
||||||
|
return nil
|
||||||
|
}, &IterateLFSMetaObjectsForRepoOptions{})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.EqualValues(t, expectedIDs, actualIDs)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Low batch size", func(t *testing.T) {
|
||||||
|
defer test.MockVariableValue(&setting.Database.IterateBufferSize, 1)()
|
||||||
|
actualIDs := []int64{}
|
||||||
|
|
||||||
|
err := IterateLFSMetaObjectsForRepo(db.DefaultContext, 54, func(ctx context.Context, lo *LFSMetaObject) error {
|
||||||
|
actualIDs = append(actualIDs, lo.ID)
|
||||||
|
return nil
|
||||||
|
}, &IterateLFSMetaObjectsForRepoOptions{})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.EqualValues(t, expectedIDs, actualIDs)
|
||||||
|
|
||||||
|
t.Run("Batch handles updates", func(t *testing.T) {
|
||||||
|
actualIDs := []int64{}
|
||||||
|
|
||||||
|
err := IterateLFSMetaObjectsForRepo(db.DefaultContext, 54, func(ctx context.Context, lo *LFSMetaObject) error {
|
||||||
|
actualIDs = append(actualIDs, lo.ID)
|
||||||
|
_, err := db.DeleteByID[LFSMetaObject](ctx, lo.ID)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
return nil
|
||||||
|
}, &IterateLFSMetaObjectsForRepoOptions{})
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.EqualValues(t, expectedIDs, actualIDs)
|
||||||
|
})
|
||||||
|
})
|
||||||
|
}
|
|
@ -44,6 +44,7 @@ func garbageCollectLFSCheck(ctx context.Context, logger log.Logger, autofix bool
|
||||||
OlderThan: time.Now().Add(-24 * time.Hour * 7),
|
OlderThan: time.Now().Add(-24 * time.Hour * 7),
|
||||||
// We don't set the UpdatedLessRecentlyThan because we want to do a full GC
|
// We don't set the UpdatedLessRecentlyThan because we want to do a full GC
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
|
logger.Error("Couldn't garabage collect LFS objects: %v", err)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,6 @@ package repository
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -25,8 +24,6 @@ type GarbageCollectLFSMetaObjectsOptions struct {
|
||||||
AutoFix bool
|
AutoFix bool
|
||||||
OlderThan time.Time
|
OlderThan time.Time
|
||||||
UpdatedLessRecentlyThan time.Time
|
UpdatedLessRecentlyThan time.Time
|
||||||
NumberToCheckPerRepo int64
|
|
||||||
ProportionToCheckPerRepo float64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// GarbageCollectLFSMetaObjects garbage collects LFS objects for all repositories
|
// GarbageCollectLFSMetaObjects garbage collects LFS objects for all repositories
|
||||||
|
@ -49,9 +46,6 @@ func GarbageCollectLFSMetaObjects(ctx context.Context, opts GarbageCollectLFSMet
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
if newMinimum := int64(float64(count) * opts.ProportionToCheckPerRepo); newMinimum > opts.NumberToCheckPerRepo && opts.NumberToCheckPerRepo != 0 {
|
|
||||||
opts.NumberToCheckPerRepo = newMinimum
|
|
||||||
}
|
|
||||||
return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, opts)
|
return GarbageCollectLFSMetaObjectsForRepo(ctx, repo, opts)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -78,13 +72,9 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
|
||||||
defer gitRepo.Close()
|
defer gitRepo.Close()
|
||||||
|
|
||||||
store := lfs.NewContentStore()
|
store := lfs.NewContentStore()
|
||||||
errStop := errors.New("STOPERR")
|
|
||||||
objectFormat := git.ObjectFormatFromName(repo.ObjectFormatName)
|
objectFormat := git.ObjectFormatFromName(repo.ObjectFormatName)
|
||||||
|
|
||||||
err = git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject, count int64) error {
|
err = git_model.IterateLFSMetaObjectsForRepo(ctx, repo.ID, func(ctx context.Context, metaObject *git_model.LFSMetaObject) error {
|
||||||
if opts.NumberToCheckPerRepo > 0 && total > opts.NumberToCheckPerRepo {
|
|
||||||
return errStop
|
|
||||||
}
|
|
||||||
total++
|
total++
|
||||||
pointerSha := git.ComputeBlobHash(objectFormat, []byte(metaObject.Pointer.StringContent()))
|
pointerSha := git.ComputeBlobHash(objectFormat, []byte(metaObject.Pointer.StringContent()))
|
||||||
|
|
||||||
|
@ -125,14 +115,8 @@ func GarbageCollectLFSMetaObjectsForRepo(ctx context.Context, repo *repo_model.R
|
||||||
// unassociated LFS object is genuinely unassociated.
|
// unassociated LFS object is genuinely unassociated.
|
||||||
OlderThan: timeutil.TimeStamp(opts.OlderThan.Unix()),
|
OlderThan: timeutil.TimeStamp(opts.OlderThan.Unix()),
|
||||||
UpdatedLessRecentlyThan: timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()),
|
UpdatedLessRecentlyThan: timeutil.TimeStamp(opts.UpdatedLessRecentlyThan.Unix()),
|
||||||
OrderByUpdated: true,
|
|
||||||
LoopFunctionAlwaysUpdates: true,
|
|
||||||
})
|
})
|
||||||
|
if err != nil {
|
||||||
if err == errStop {
|
|
||||||
opts.LogDetail("Processing stopped at %d total LFSMetaObjects in %-v", total, repo)
|
|
||||||
return nil
|
|
||||||
} else if err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
|
Loading…
Reference in New Issue