mirror of https://github.com/stashapp/stash.git
Add folder rename detection (#3817)
This commit is contained in:
parent
5c38836ade
commit
93b41fb650
|
@ -154,10 +154,12 @@ type Getter interface {
|
|||
FindByFingerprint(ctx context.Context, fp Fingerprint) ([]File, error)
|
||||
FindByZipFileID(ctx context.Context, zipFileID ID) ([]File, error)
|
||||
FindAllInPaths(ctx context.Context, p []string, limit, offset int) ([]File, error)
|
||||
FindByFileInfo(ctx context.Context, info fs.FileInfo, size int64) ([]File, error)
|
||||
}
|
||||
|
||||
type Counter interface {
|
||||
CountAllInPaths(ctx context.Context, p []string) (int, error)
|
||||
CountByFolderID(ctx context.Context, folderID FolderID) (int, error)
|
||||
}
|
||||
|
||||
// Creator provides methods to create Files.
|
||||
|
|
|
@ -0,0 +1,195 @@
|
|||
package file
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
|
||||
"github.com/stashapp/stash/pkg/logger"
|
||||
)
|
||||
|
||||
type folderRenameCandidate struct {
|
||||
folder *Folder
|
||||
found int
|
||||
files int
|
||||
}
|
||||
|
||||
type folderRenameDetector struct {
|
||||
// candidates is a map of folder id to the number of files that match
|
||||
candidates map[FolderID]folderRenameCandidate
|
||||
// rejects is a set of folder ids which were found to still exist
|
||||
rejects map[FolderID]struct{}
|
||||
}
|
||||
|
||||
func (d *folderRenameDetector) isReject(id FolderID) bool {
|
||||
_, ok := d.rejects[id]
|
||||
return ok
|
||||
}
|
||||
|
||||
func (d *folderRenameDetector) getCandidate(id FolderID) *folderRenameCandidate {
|
||||
c, ok := d.candidates[id]
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (d *folderRenameDetector) setCandidate(c folderRenameCandidate) {
|
||||
d.candidates[c.folder.ID] = c
|
||||
}
|
||||
|
||||
func (d *folderRenameDetector) reject(id FolderID) {
|
||||
d.rejects[id] = struct{}{}
|
||||
}
|
||||
|
||||
// bestCandidate returns the folder that is the best candidate for a rename.
|
||||
// This is the folder that has the largest number of its original files that
|
||||
// are still present in the new location.
|
||||
func (d *folderRenameDetector) bestCandidate() *Folder {
|
||||
if len(d.candidates) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var best *folderRenameCandidate
|
||||
|
||||
for _, c := range d.candidates {
|
||||
// ignore folders that have less than 50% of their original files
|
||||
if c.found < c.files/2 {
|
||||
continue
|
||||
}
|
||||
|
||||
// prefer the folder with the most files if the ratio is the same
|
||||
if best == nil || c.found > best.found {
|
||||
cc := c
|
||||
best = &cc
|
||||
}
|
||||
}
|
||||
|
||||
if best == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return best.folder
|
||||
}
|
||||
|
||||
func (s *scanJob) detectFolderMove(ctx context.Context, file scanFile) (*Folder, error) {
|
||||
// in order for a folder to be considered moved, the existing folder must be
|
||||
// missing, and the majority of the old folder's files must be present, unchanged,
|
||||
// in the new folder.
|
||||
|
||||
detector := folderRenameDetector{
|
||||
candidates: make(map[FolderID]folderRenameCandidate),
|
||||
rejects: make(map[FolderID]struct{}),
|
||||
}
|
||||
// rejects is a set of folder ids which were found to still exist
|
||||
|
||||
if err := symWalk(file.fs, file.Path, func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
// don't let errors prevent scanning
|
||||
logger.Errorf("error scanning %s: %v", path, err)
|
||||
return nil
|
||||
}
|
||||
|
||||
// ignore root
|
||||
if path == file.Path {
|
||||
return nil
|
||||
}
|
||||
|
||||
// ignore directories
|
||||
if d.IsDir() {
|
||||
return fs.SkipDir
|
||||
}
|
||||
|
||||
info, err := d.Info()
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading info for %q: %w", path, err)
|
||||
}
|
||||
|
||||
if !s.acceptEntry(ctx, path, info) {
|
||||
return nil
|
||||
}
|
||||
|
||||
size, err := getFileSize(file.fs, path, info)
|
||||
if err != nil {
|
||||
return fmt.Errorf("getting file size for %q: %w", path, err)
|
||||
}
|
||||
|
||||
// check if the file exists in the database based on basename, size and mod time
|
||||
existing, err := s.Repository.Store.FindByFileInfo(ctx, info, size)
|
||||
if err != nil {
|
||||
return fmt.Errorf("checking for existing file %q: %w", path, err)
|
||||
}
|
||||
|
||||
for _, e := range existing {
|
||||
// ignore files in zip files
|
||||
if e.Base().ZipFileID != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
parentFolderID := e.Base().ParentFolderID
|
||||
|
||||
if detector.isReject(parentFolderID) {
|
||||
// folder was found to still exist, not a candidate
|
||||
continue
|
||||
}
|
||||
|
||||
c := detector.getCandidate(parentFolderID)
|
||||
|
||||
if c == nil {
|
||||
// need to check if the folder exists in the filesystem
|
||||
pf, err := s.Repository.FolderStore.Find(ctx, e.Base().ParentFolderID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("getting parent folder %d: %w", e.Base().ParentFolderID, err)
|
||||
}
|
||||
|
||||
if pf == nil {
|
||||
// shouldn't happen, but just in case
|
||||
continue
|
||||
}
|
||||
|
||||
// parent folder must be missing
|
||||
_, err = file.fs.Lstat(pf.Path)
|
||||
if err == nil {
|
||||
// parent folder exists, not a candidate
|
||||
detector.reject(parentFolderID)
|
||||
continue
|
||||
}
|
||||
|
||||
if !errors.Is(err, fs.ErrNotExist) {
|
||||
return fmt.Errorf("checking for parent folder %q: %w", pf.Path, err)
|
||||
}
|
||||
|
||||
// parent folder is missing, possible candidate
|
||||
// count the total number of files in the existing folder
|
||||
count, err := s.Repository.Store.CountByFolderID(ctx, parentFolderID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("counting files in folder %d: %w", parentFolderID, err)
|
||||
}
|
||||
|
||||
if count == 0 {
|
||||
// no files in the folder, not a candidate
|
||||
detector.reject(parentFolderID)
|
||||
continue
|
||||
}
|
||||
|
||||
c = &folderRenameCandidate{
|
||||
folder: pf,
|
||||
found: 0,
|
||||
files: count,
|
||||
}
|
||||
}
|
||||
|
||||
// increment the count and set it in the map
|
||||
c.found++
|
||||
detector.setCandidate(*c)
|
||||
}
|
||||
|
||||
return nil
|
||||
}); err != nil {
|
||||
return nil, fmt.Errorf("walking filesystem for folder rename detection: %w", err)
|
||||
}
|
||||
|
||||
return detector.bestCandidate(), nil
|
||||
}
|
|
@ -215,19 +215,6 @@ func (s *scanJob) queueFileFunc(ctx context.Context, f FS, zipFile *scanFile) fs
|
|||
return fmt.Errorf("reading info for %q: %w", path, err)
|
||||
}
|
||||
|
||||
var size int64
|
||||
|
||||
// #2196/#3042 - replace size with target size if file is a symlink
|
||||
if info.Mode()&os.ModeSymlink == os.ModeSymlink {
|
||||
targetInfo, err := f.Stat(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading info for symlink %q: %w", path, err)
|
||||
}
|
||||
size = targetInfo.Size()
|
||||
} else {
|
||||
size = info.Size()
|
||||
}
|
||||
|
||||
if !s.acceptEntry(ctx, path, info) {
|
||||
if info.IsDir() {
|
||||
return fs.SkipDir
|
||||
|
@ -236,6 +223,11 @@ func (s *scanJob) queueFileFunc(ctx context.Context, f FS, zipFile *scanFile) fs
|
|||
return nil
|
||||
}
|
||||
|
||||
size, err := getFileSize(f, path, info)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
ff := scanFile{
|
||||
BaseFile: &BaseFile{
|
||||
DirEntry: DirEntry{
|
||||
|
@ -294,6 +286,19 @@ func (s *scanJob) queueFileFunc(ctx context.Context, f FS, zipFile *scanFile) fs
|
|||
}
|
||||
}
|
||||
|
||||
func getFileSize(f FS, path string, info fs.FileInfo) (int64, error) {
|
||||
// #2196/#3042 - replace size with target size if file is a symlink
|
||||
if info.Mode()&os.ModeSymlink == os.ModeSymlink {
|
||||
targetInfo, err := f.Stat(path)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("reading info for symlink %q: %w", path, err)
|
||||
}
|
||||
return targetInfo.Size(), nil
|
||||
}
|
||||
|
||||
return info.Size(), nil
|
||||
}
|
||||
|
||||
func (s *scanJob) acceptEntry(ctx context.Context, path string, info fs.FileInfo) bool {
|
||||
// always accept if there's no filters
|
||||
accept := len(s.options.ScanFilters) == 0
|
||||
|
@ -485,6 +490,15 @@ func (s *scanJob) handleFolder(ctx context.Context, file scanFile) error {
|
|||
}
|
||||
|
||||
func (s *scanJob) onNewFolder(ctx context.Context, file scanFile) (*Folder, error) {
|
||||
renamed, err := s.handleFolderRename(ctx, file)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if renamed != nil {
|
||||
return renamed, nil
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
|
||||
toCreate := &Folder{
|
||||
|
@ -522,6 +536,42 @@ func (s *scanJob) onNewFolder(ctx context.Context, file scanFile) (*Folder, erro
|
|||
return toCreate, nil
|
||||
}
|
||||
|
||||
func (s *scanJob) handleFolderRename(ctx context.Context, file scanFile) (*Folder, error) {
|
||||
// ignore folders in zip files
|
||||
if file.ZipFileID != nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// check if the folder was moved from elsewhere
|
||||
renamedFrom, err := s.detectFolderMove(ctx, file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("detecting folder move: %w", err)
|
||||
}
|
||||
|
||||
if renamedFrom == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// if the folder was moved, update the existing folder
|
||||
logger.Infof("%s moved to %s. Updating path...", renamedFrom.Path, file.Path)
|
||||
renamedFrom.Path = file.Path
|
||||
|
||||
// update the parent folder ID
|
||||
// find the parent folder
|
||||
parentFolderID, err := s.getFolderID(ctx, filepath.Dir(file.Path))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("getting parent folder for %q: %w", file.Path, err)
|
||||
}
|
||||
|
||||
renamedFrom.ParentFolderID = parentFolderID
|
||||
|
||||
if err := s.Repository.FolderStore.Update(ctx, renamedFrom); err != nil {
|
||||
return nil, fmt.Errorf("updating folder for rename %q: %w", renamedFrom.Path, err)
|
||||
}
|
||||
|
||||
return renamedFrom, nil
|
||||
}
|
||||
|
||||
func (s *scanJob) onExistingFolder(ctx context.Context, f scanFile, existing *Folder) (*Folder, error) {
|
||||
update := false
|
||||
|
||||
|
|
|
@ -5,8 +5,10 @@ import (
|
|||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/doug-martin/goqu/v9"
|
||||
"github.com/doug-martin/goqu/v9/exp"
|
||||
|
@ -713,6 +715,31 @@ func (qb *FileStore) FindByZipFileID(ctx context.Context, zipFileID file.ID) ([]
|
|||
return qb.getMany(ctx, q)
|
||||
}
|
||||
|
||||
// FindByFileInfo finds files that match the base name, size, and mod time of the given file.
|
||||
func (qb *FileStore) FindByFileInfo(ctx context.Context, info fs.FileInfo, size int64) ([]file.File, error) {
|
||||
table := qb.table()
|
||||
|
||||
modTime := info.ModTime().Format(time.RFC3339)
|
||||
|
||||
q := qb.selectDataset().Prepared(true).Where(
|
||||
table.Col("basename").Eq(info.Name()),
|
||||
table.Col("size").Eq(size),
|
||||
table.Col("mod_time").Eq(modTime),
|
||||
)
|
||||
|
||||
return qb.getMany(ctx, q)
|
||||
}
|
||||
|
||||
func (qb *FileStore) CountByFolderID(ctx context.Context, folderID file.FolderID) (int, error) {
|
||||
table := qb.table()
|
||||
|
||||
q := qb.countDataset().Prepared(true).Where(
|
||||
table.Col("parent_folder_id").Eq(folderID),
|
||||
)
|
||||
|
||||
return count(ctx, q)
|
||||
}
|
||||
|
||||
func (qb *FileStore) IsPrimary(ctx context.Context, fileID file.ID) (bool, error) {
|
||||
joinTables := []exp.IdentifierExpression{
|
||||
scenesFilesJoinTable,
|
||||
|
|
Loading…
Reference in New Issue