stash/pkg/file/clean.go

412 lines
9.5 KiB
Go
Raw Normal View History

File storage rewrite (#2676) * Restructure data layer part 2 (#2599) * Refactor and separate image model * Refactor image query builder * Handle relationships in image query builder * Remove relationship management methods * Refactor gallery model/query builder * Add scenes to gallery model * Convert scene model * Refactor scene models * Remove unused methods * Add unit tests for gallery * Add image tests * Add scene tests * Convert unnecessary scene value pointers to values * Convert unnecessary pointer values to values * Refactor scene partial * Add scene partial tests * Refactor ImagePartial * Add image partial tests * Refactor gallery partial update * Add partial gallery update tests * Use zero/null package for null values * Add files and scan system * Add sqlite implementation for files/folders * Add unit tests for files/folders * Image refactors * Update image data layer * Refactor gallery model and creation * Refactor scene model * Refactor scenes * Don't set title from filename * Allow galleries to freely add/remove images * Add multiple scene file support to graphql and UI * Add multiple file support for images in graphql/UI * Add multiple file for galleries in graphql/UI * Remove use of some deprecated fields * Remove scene path usage * Remove gallery path usage * Remove path from image * Move funscript to video file * Refactor caption detection * Migrate existing data * Add post commit/rollback hook system * Lint. Comment out import/export tests * Add WithDatabase read only wrapper * Prepend tasks to list * Add 32 pre-migration * Add warnings in release and migration notes
2022-07-13 06:30:54 +00:00
package file
import (
"context"
"errors"
"fmt"
"io/fs"
"github.com/stashapp/stash/pkg/job"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/txn"
)
// Cleaner scans through stored file and folder instances and removes those that are no longer present on disk.
type Cleaner struct {
FS FS
Repository Repository
Handlers []CleanHandler
}
type cleanJob struct {
*Cleaner
progress *job.Progress
options CleanOptions
}
// ScanOptions provides options for scanning files.
type CleanOptions struct {
Paths []string
// Do a dry run. Don't delete any files
DryRun bool
// PathFilter are used to determine if a file should be included.
// Excluded files are marked for cleaning.
PathFilter PathFilter
}
// Clean starts the clean process.
func (s *Cleaner) Clean(ctx context.Context, options CleanOptions, progress *job.Progress) {
j := &cleanJob{
Cleaner: s,
progress: progress,
options: options,
}
if err := j.execute(ctx); err != nil {
logger.Errorf("error cleaning files: %w", err)
return
}
}
type fileOrFolder struct {
fileID ID
folderID FolderID
}
type deleteSet struct {
orderedList []fileOrFolder
fileIDSet map[ID]string
folderIDSet map[FolderID]string
}
func newDeleteSet() deleteSet {
return deleteSet{
fileIDSet: make(map[ID]string),
folderIDSet: make(map[FolderID]string),
}
}
func (s *deleteSet) add(id ID, path string) {
if _, ok := s.fileIDSet[id]; !ok {
s.orderedList = append(s.orderedList, fileOrFolder{fileID: id})
s.fileIDSet[id] = path
}
}
func (s *deleteSet) has(id ID) bool {
_, ok := s.fileIDSet[id]
return ok
}
func (s *deleteSet) addFolder(id FolderID, path string) {
if _, ok := s.folderIDSet[id]; !ok {
s.orderedList = append(s.orderedList, fileOrFolder{folderID: id})
s.folderIDSet[id] = path
}
}
func (s *deleteSet) hasFolder(id FolderID) bool {
_, ok := s.folderIDSet[id]
return ok
}
func (s *deleteSet) len() int {
return len(s.orderedList)
}
func (j *cleanJob) execute(ctx context.Context) error {
progress := j.progress
toDelete := newDeleteSet()
var (
fileCount int
folderCount int
)
if err := txn.WithTxn(ctx, j.Repository, func(ctx context.Context) error {
var err error
fileCount, err = j.Repository.CountAllInPaths(ctx, j.options.Paths)
if err != nil {
return err
}
folderCount, err = j.Repository.FolderStore.CountAllInPaths(ctx, j.options.Paths)
if err != nil {
return err
}
return nil
}); err != nil {
return err
}
progress.AddTotal(fileCount + folderCount)
progress.Definite()
if err := j.assessFiles(ctx, &toDelete); err != nil {
return err
}
if err := j.assessFolders(ctx, &toDelete); err != nil {
return err
}
if j.options.DryRun && toDelete.len() > 0 {
// add progress for files that would've been deleted
progress.AddProcessed(toDelete.len())
return nil
}
progress.ExecuteTask(fmt.Sprintf("Cleaning %d files and folders", toDelete.len()), func() {
for _, ff := range toDelete.orderedList {
if job.IsCancelled(ctx) {
return
}
if ff.fileID != 0 {
j.deleteFile(ctx, ff.fileID, toDelete.fileIDSet[ff.fileID])
}
if ff.folderID != 0 {
j.deleteFolder(ctx, ff.folderID, toDelete.folderIDSet[ff.folderID])
}
progress.Increment()
}
})
return nil
}
func (j *cleanJob) assessFiles(ctx context.Context, toDelete *deleteSet) error {
const batchSize = 1000
offset := 0
progress := j.progress
more := true
if err := txn.WithTxn(ctx, j.Repository, func(ctx context.Context) error {
for more {
if job.IsCancelled(ctx) {
return nil
}
files, err := j.Repository.FindAllInPaths(ctx, j.options.Paths, batchSize, offset)
if err != nil {
return fmt.Errorf("error querying for files: %w", err)
}
for _, f := range files {
path := f.Base().Path
err = nil
fileID := f.Base().ID
// short-cut, don't assess if already added
if toDelete.has(fileID) {
continue
}
progress.ExecuteTask(fmt.Sprintf("Assessing file %s for clean", path), func() {
if j.shouldClean(ctx, f) {
err = j.flagFileForDelete(ctx, toDelete, f)
} else {
// increment progress, no further processing
progress.Increment()
}
})
if err != nil {
return err
}
}
if len(files) != batchSize {
more = false
} else {
offset += batchSize
}
}
return nil
}); err != nil {
return err
}
return nil
}
// flagFolderForDelete adds folders to the toDelete set, with the leaf folders added first
func (j *cleanJob) flagFileForDelete(ctx context.Context, toDelete *deleteSet, f File) error {
// add contained files first
containedFiles, err := j.Repository.FindByZipFileID(ctx, f.Base().ID)
if err != nil {
return fmt.Errorf("error finding contained files for %q: %w", f.Base().Path, err)
}
for _, cf := range containedFiles {
logger.Infof("Marking contained file %q to clean", cf.Base().Path)
toDelete.add(cf.Base().ID, cf.Base().Path)
}
// add contained folders as well
containedFolders, err := j.Repository.FolderStore.FindByZipFileID(ctx, f.Base().ID)
if err != nil {
return fmt.Errorf("error finding contained folders for %q: %w", f.Base().Path, err)
}
for _, cf := range containedFolders {
logger.Infof("Marking contained folder %q to clean", cf.Path)
toDelete.addFolder(cf.ID, cf.Path)
}
toDelete.add(f.Base().ID, f.Base().Path)
return nil
}
func (j *cleanJob) assessFolders(ctx context.Context, toDelete *deleteSet) error {
const batchSize = 1000
offset := 0
progress := j.progress
more := true
if err := txn.WithTxn(ctx, j.Repository, func(ctx context.Context) error {
for more {
if job.IsCancelled(ctx) {
return nil
}
folders, err := j.Repository.FolderStore.FindAllInPaths(ctx, j.options.Paths, batchSize, offset)
if err != nil {
return fmt.Errorf("error querying for folders: %w", err)
}
for _, f := range folders {
path := f.Path
folderID := f.ID
// short-cut, don't assess if already added
if toDelete.hasFolder(folderID) {
continue
}
err = nil
progress.ExecuteTask(fmt.Sprintf("Assessing folder %s for clean", path), func() {
if j.shouldCleanFolder(ctx, f) {
if err = j.flagFolderForDelete(ctx, toDelete, f); err != nil {
return
}
} else {
// increment progress, no further processing
progress.Increment()
}
})
if err != nil {
return err
}
}
if len(folders) != batchSize {
more = false
} else {
offset += batchSize
}
}
return nil
}); err != nil {
return err
}
return nil
}
func (j *cleanJob) flagFolderForDelete(ctx context.Context, toDelete *deleteSet, folder *Folder) error {
// it is possible that child folders may be included while parent folders are not
// so we need to check child folders separately
toDelete.addFolder(folder.ID, folder.Path)
return nil
}
func (j *cleanJob) shouldClean(ctx context.Context, f File) bool {
path := f.Base().Path
info, err := f.Base().Info(j.FS)
if err != nil && !errors.Is(err, fs.ErrNotExist) {
logger.Errorf("error getting file info for %q, not cleaning: %v", path, err)
return false
}
if info == nil {
// info is nil - file not exist
logger.Infof("File not found. Marking to clean: \"%s\"", path)
return true
}
// run through path filter, if returns false then the file should be cleaned
filter := j.options.PathFilter
// don't log anything - assume filter will have logged the reason
return !filter.Accept(ctx, path, info)
}
func (j *cleanJob) shouldCleanFolder(ctx context.Context, f *Folder) bool {
path := f.Path
info, err := f.Info(j.FS)
if err != nil && !errors.Is(err, fs.ErrNotExist) {
logger.Errorf("error getting folder info for %q, not cleaning: %v", path, err)
return false
}
if info == nil {
// info is nil - file not exist
logger.Infof("Folder not found. Marking to clean: \"%s\"", path)
return true
}
// run through path filter, if returns false then the file should be cleaned
filter := j.options.PathFilter
// don't log anything - assume filter will have logged the reason
return !filter.Accept(ctx, path, info)
}
func (j *cleanJob) deleteFile(ctx context.Context, fileID ID, fn string) {
// delete associated objects
fileDeleter := NewDeleter()
if err := txn.WithTxn(ctx, j.Repository, func(ctx context.Context) error {
fileDeleter.RegisterHooks(ctx, j.Repository)
if err := j.fireHandlers(ctx, fileDeleter, fileID); err != nil {
return err
}
return j.Repository.Destroy(ctx, fileID)
}); err != nil {
logger.Errorf("Error deleting file %q from database: %s", fn, err.Error())
return
}
}
func (j *cleanJob) deleteFolder(ctx context.Context, folderID FolderID, fn string) {
// delete associated objects
fileDeleter := NewDeleter()
if err := txn.WithTxn(ctx, j.Repository, func(ctx context.Context) error {
fileDeleter.RegisterHooks(ctx, j.Repository)
if err := j.fireFolderHandlers(ctx, fileDeleter, folderID); err != nil {
return err
}
return j.Repository.FolderStore.Destroy(ctx, folderID)
}); err != nil {
logger.Errorf("Error deleting folder %q from database: %s", fn, err.Error())
return
}
}
func (j *cleanJob) fireHandlers(ctx context.Context, fileDeleter *Deleter, fileID ID) error {
for _, h := range j.Handlers {
if err := h.HandleFile(ctx, fileDeleter, fileID); err != nil {
return err
}
}
return nil
}
func (j *cleanJob) fireFolderHandlers(ctx context.Context, fileDeleter *Deleter, folderID FolderID) error {
for _, h := range j.Handlers {
if err := h.HandleFolder(ctx, fileDeleter, folderID); err != nil {
return err
}
}
return nil
}