stash/pkg/file/scan.go

package file

import (
	"context"
	"errors"
	"fmt"
	"io/fs"
	"os"
	"path/filepath"
	"strings"
	"sync"
	"time"

	"github.com/remeh/sizedwaitgroup"
	"github.com/stashapp/stash/pkg/logger"
	"github.com/stashapp/stash/pkg/models"
	"github.com/stashapp/stash/pkg/txn"
	"github.com/stashapp/stash/pkg/utils"
)

const (
	scanQueueSize = 200000
	// maximum number of times to retry in the event of a locked database
	// use -1 to retry forever
	maxRetries = -1
)

// Scanner scans files into the database.
//
// The scan process works using two goroutines. The first walks through the provided paths
// in the filesystem. It runs each directory entry through the provided ScanFilters. If none
// of the filter Accept methods return true, then the file/directory is ignored.
// Any folders found are handled immediately. Files inside zip files are also handled immediately.
// All other files encountered are sent to the second goroutine queue.
//
// Folders are handled by checking if the folder exists in the database, by its full path.
// If a folder entry already exists, then its mod time is updated (if applicable).
// If the folder does not exist in the database, then a new folder entry its created.
//
// Files are handled by first querying for the file by its path. If the file entry exists in the
// database, then the mod time is compared to the value in the database. If the mod time is different
// then file is marked as updated - it recalculates any fingerprints and fires decorators, then
// the file entry is updated and any applicable handlers are fired.
//
// If the file entry does not exist in the database, then fingerprints are calculated for the file.
// It then determines if the file is a rename of an existing file by querying for file entries with
// the same fingerprint. If any are found, it checks each to see if any are missing in the file
// system. If one is, then the file is treated as renamed and its path is updated. If none are missing,
// or many are, then the file is treated as a new file.
//
// If the file is not a renamed file, then the decorators are fired and the file is created, then
// the applicable handlers are fired.
type Scanner struct {
	FS                    models.FS
	Repository            Repository
	FingerprintCalculator FingerprintCalculator

	// FileDecorators are applied to files as they are scanned.
	FileDecorators []Decorator
}

// FingerprintCalculator calculates a fingerprint for the provided file.
type FingerprintCalculator interface {
	CalculateFingerprints(f *models.BaseFile, o Opener, useExisting bool) ([]models.Fingerprint, error)
}

// Decorator wraps the Decorate method to add additional functionality while scanning files.
type Decorator interface {
	Decorate(ctx context.Context, fs models.FS, f models.File) (models.File, error)
	IsMissingMetadata(ctx context.Context, fs models.FS, f models.File) bool
}

type FilteredDecorator struct {
	Decorator
	Filter
}

// Decorate runs the decorator if the filter accepts the file.
func (d *FilteredDecorator) Decorate(ctx context.Context, fs models.FS, f models.File) (models.File, error) {
	if d.Accept(ctx, f) {
		return d.Decorator.Decorate(ctx, fs, f)
	}
	return f, nil
}

func (d *FilteredDecorator) IsMissingMetadata(ctx context.Context, fs models.FS, f models.File) bool {
	if d.Accept(ctx, f) {
		return d.Decorator.IsMissingMetadata(ctx, fs, f)
	}

	return false
}

// ProgressReporter is used to report progress of the scan.
type ProgressReporter interface {
	AddTotal(total int)
	Increment()
	Definite()
	ExecuteTask(description string, fn func())
}

type scanJob struct {
	*Scanner

	// handlers are called after a file has been scanned.
	handlers []Handler

	ProgressReports ProgressReporter
	options         ScanOptions

	startTime      time.Time
	fileQueue      chan scanFile
	retryList      []scanFile
	retrying       bool
	folderPathToID sync.Map
	zipPathToID    sync.Map
	count          int

	txnRetryer txn.Retryer
}

// ScanOptions provides options for scanning files.
type ScanOptions struct {
	Paths []string

	// ZipFileExtensions is a list of file extensions that are considered zip files.
	// Extension does not include the . character.
	ZipFileExtensions []string

	// ScanFilters are used to determine if a file should be scanned.
	ScanFilters []PathFilter

	// HandlerRequiredFilters are used to determine if an unchanged file needs to be handled
	HandlerRequiredFilters []Filter

	ParallelTasks int

	// When true files in path will be rescanned even if they haven't changed
	Rescan bool
}

// Scan starts the scanning process.
func (s *Scanner) Scan(ctx context.Context, handlers []Handler, options ScanOptions, progressReporter ProgressReporter) {
	job := &scanJob{
		Scanner:         s,
		handlers:        handlers,
		ProgressReports: progressReporter,
		options:         options,
		txnRetryer: txn.Retryer{
			Manager: s.Repository.TxnManager,
			Retries: maxRetries,
		},
	}

	job.execute(ctx)
}

type scanFile struct {
	*models.BaseFile
	fs   models.FS
	info fs.FileInfo
}

func (s *scanJob) withTxn(ctx context.Context, fn func(ctx context.Context) error) error {
	return s.txnRetryer.WithTxn(ctx, fn)
}

func (s *scanJob) withDB(ctx context.Context, fn func(ctx context.Context) error) error {
	return s.Repository.WithDB(ctx, fn)
}

func (s *scanJob) execute(ctx context.Context) {
	paths := s.options.Paths
	logger.Infof("scanning %d paths", len(paths))
	s.startTime = time.Now()

	s.fileQueue = make(chan scanFile, scanQueueSize)
	var wg sync.WaitGroup
	wg.Add(1)

	go func() {
		defer wg.Done()
		if err := s.queueFiles(ctx, paths); err != nil {
			if errors.Is(err, context.Canceled) {
				return
			}

			logger.Errorf("error queuing files for scan: %v", err)
			return
		}

		logger.Infof("Finished adding files to queue. %d files queued", s.count)
	}()

	defer wg.Wait()

	if err := s.processQueue(ctx); err != nil {
		if errors.Is(err, context.Canceled) {
			return
		}

		logger.Errorf("error scanning files: %v", err)
		return
	}
}

func (s *scanJob) queueFiles(ctx context.Context, paths []string) error {
	var err error
	s.ProgressReports.ExecuteTask("Walking directory tree", func() {
		for _, p := range paths {
			err = symWalk(s.FS, p, s.queueFileFunc(ctx, s.FS, nil))
			if err != nil {
				return
			}
		}
	})

	close(s.fileQueue)

	if s.ProgressReports != nil {
		s.ProgressReports.AddTotal(s.count)
		s.ProgressReports.Definite()
	}

	return err
}

func (s *scanJob) queueFileFunc(ctx context.Context, f models.FS, zipFile *scanFile) fs.WalkDirFunc {
	return func(path string, d fs.DirEntry, err error) error {
		if err != nil {
			// don't let errors prevent scanning
			logger.Errorf("error scanning %s: %v", path, err)
			return nil
		}

		if err = ctx.Err(); err != nil {
			return err
		}

		info, err := d.Info()
		if err != nil {
			return fmt.Errorf("reading info for %q: %w", path, err)
		}

		if !s.acceptEntry(ctx, path, info) {
			if info.IsDir() {
				return fs.SkipDir
			}

			return nil
		}

		size, err := getFileSize(f, path, info)
		if err != nil {
			return err
		}

		ff := scanFile{
			BaseFile: &models.BaseFile{
				DirEntry: models.DirEntry{
					ModTime: modTime(info),
				},
				Path:     path,
				Basename: filepath.Base(path),
				Size:     size,
			},
			fs:   f,
			info: info,
		}

		if zipFile != nil {
			zipFileID, err := s.getZipFileID(ctx, zipFile)
			if err != nil {
				return err
			}
			ff.ZipFileID = zipFileID
			ff.ZipFile = zipFile
		}

		if info.IsDir() {
			// handle folders immediately
			if err := s.handleFolder(ctx, ff); err != nil {
				if !errors.Is(err, context.Canceled) {
					logger.Errorf("error processing %q: %v", path, err)
				}

				// skip the directory since we won't be able to process the files anyway
				return fs.SkipDir
			}

			return nil
		}

		// if zip file is present, we handle immediately
		if zipFile != nil {
			s.ProgressReports.ExecuteTask("Scanning "+path, func() {
				if err := s.handleFile(ctx, ff); err != nil {
					if !errors.Is(err, context.Canceled) {
						logger.Errorf("error processing %q: %v", path, err)
					}
					// don't return an error, just skip the file
				}
			})

			return nil
		}

		s.fileQueue <- ff

		s.count++

		return nil
	}
}

func getFileSize(f models.FS, path string, info fs.FileInfo) (int64, error) {
	// #2196/#3042 - replace size with target size if file is a symlink
	if info.Mode()&os.ModeSymlink == os.ModeSymlink {
		targetInfo, err := f.Stat(path)
		if err != nil {
			return 0, fmt.Errorf("reading info for symlink %q: %w", path, err)
		}
		return targetInfo.Size(), nil
	}

	return info.Size(), nil
}

func (s *scanJob) acceptEntry(ctx context.Context, path string, info fs.FileInfo) bool {
	// always accept if there's no filters
	accept := len(s.options.ScanFilters) == 0
	for _, filter := range s.options.ScanFilters {
		// accept if any filter accepts the file
		if filter.Accept(ctx, path, info) {
			accept = true
			break
		}
	}

	return accept
}

func (s *scanJob) scanZipFile(ctx context.Context, f scanFile) error {
	zipFS, err := f.fs.OpenZip(f.Path, f.Size)
	if err != nil {
		if errors.Is(err, errNotReaderAt) {
			// can't walk the zip file
			// just return
			return nil
		}

		return err
	}

	defer zipFS.Close()

	return symWalk(zipFS, f.Path, s.queueFileFunc(ctx, zipFS, &f))
}

func (s *scanJob) processQueue(ctx context.Context) error {
	parallelTasks := s.options.ParallelTasks
	if parallelTasks < 1 {
		parallelTasks = 1
	}

	wg := sizedwaitgroup.New(parallelTasks)

	if err := func() error {
		defer wg.Wait()

		for f := range s.fileQueue {
			if err := ctx.Err(); err != nil {
				return err
			}

			wg.Add()
			ff := f
			go func() {
				defer wg.Done()
				s.processQueueItem(ctx, ff)
			}()
		}

		return nil
	}(); err != nil {
		return err
	}

	s.retrying = true

	if err := func() error {
		defer wg.Wait()

		for _, f := range s.retryList {
			if err := ctx.Err(); err != nil {
				return err
			}

			wg.Add()
			ff := f
			go func() {
				defer wg.Done()
				s.processQueueItem(ctx, ff)
			}()
		}

		return nil
	}(); err != nil {
		return err
	}

	return nil
}

func (s *scanJob) incrementProgress(f scanFile) {
	// don't increment for files inside zip files since these aren't
	// counted during the initial walking
	if s.ProgressReports != nil && f.ZipFile == nil {
		s.ProgressReports.Increment()
	}
}

func (s *scanJob) processQueueItem(ctx context.Context, f scanFile) {
	s.ProgressReports.ExecuteTask("Scanning "+f.Path, func() {
		var err error
		if f.info.IsDir() {
			err = s.handleFolder(ctx, f)
		} else {
			err = s.handleFile(ctx, f)
		}

		if err != nil && !errors.Is(err, context.Canceled) {
			logger.Errorf("error processing %q: %v", f.Path, err)
		}
	})
}

func (s *scanJob) getFolderID(ctx context.Context, path string) (*models.FolderID, error) {
	// check the folder cache first
	if f, ok := s.folderPathToID.Load(path); ok {
		v := f.(models.FolderID)
		return &v, nil
	}

	ret, err := s.Repository.Folder.FindByPath(ctx, path)
	if err != nil {
		return nil, err
	}

	if ret == nil {
		return nil, nil
	}

	s.folderPathToID.Store(path, ret.ID)
	return &ret.ID, nil
}

func (s *scanJob) getZipFileID(ctx context.Context, zipFile *scanFile) (*models.FileID, error) {
	if zipFile == nil {
		return nil, nil
	}

	if zipFile.ID != 0 {
		return &zipFile.ID, nil
	}

	path := zipFile.Path

	// check the folder cache first
	if f, ok := s.zipPathToID.Load(path); ok {
		v := f.(models.FileID)
		return &v, nil
	}

	ret, err := s.Repository.File.FindByPath(ctx, path)
	if err != nil {
		return nil, fmt.Errorf("getting zip file ID for %q: %w", path, err)
	}

	if ret == nil {
		return nil, fmt.Errorf("zip file %q doesn't exist in database", zipFile.Path)
	}

	s.zipPathToID.Store(path, ret.Base().ID)
	return &ret.Base().ID, nil
}

func (s *scanJob) handleFolder(ctx context.Context, file scanFile) error {
	path := file.Path

	return s.withTxn(ctx, func(ctx context.Context) error {
		defer s.incrementProgress(file)

		// determine if folder already exists in data store (by path)
		f, err := s.Repository.Folder.FindByPath(ctx, path)
		if err != nil {
			return fmt.Errorf("checking for existing folder %q: %w", path, err)
		}

		// if folder not exists, create it
		if f == nil {
			f, err = s.onNewFolder(ctx, file)
		} else {
			f, err = s.onExistingFolder(ctx, file, f)
		}

		if err != nil {
			return err
		}

		if f != nil {
			s.folderPathToID.Store(f.Path, f.ID)
		}

		return nil
	})
}

func (s *scanJob) onNewFolder(ctx context.Context, file scanFile) (*models.Folder, error) {
	renamed, err := s.handleFolderRename(ctx, file)
	if err != nil {
		return nil, err
	}

	if renamed != nil {
		return renamed, nil
	}

	now := time.Now()

	toCreate := &models.Folder{
		DirEntry:  file.DirEntry,
		Path:      file.Path,
		CreatedAt: now,
		UpdatedAt: now,
	}

	dir := filepath.Dir(file.Path)
	if dir != "." {
		parentFolderID, err := s.getFolderID(ctx, dir)
		if err != nil {
			return nil, fmt.Errorf("getting parent folder %q: %w", dir, err)
		}

		// if parent folder doesn't exist, assume it's a top-level folder
		// this may not be true if we're using multiple goroutines
		if parentFolderID != nil {
			toCreate.ParentFolderID = parentFolderID
		}
	}

	txn.AddPostCommitHook(ctx, func(ctx context.Context) {
		// log at the end so that if anything fails above due to a locked database
		// error and the transaction must be retried, then we shouldn't get multiple
		// logs of the same thing.
		logger.Infof("%s doesn't exist. Creating new folder entry...", file.Path)
	})

	if err := s.Repository.Folder.Create(ctx, toCreate); err != nil {
		return nil, fmt.Errorf("creating folder %q: %w", file.Path, err)
	}

	return toCreate, nil
}

func (s *scanJob) handleFolderRename(ctx context.Context, file scanFile) (*models.Folder, error) {
	// ignore folders in zip files
	if file.ZipFileID != nil {
		return nil, nil
	}

	// check if the folder was moved from elsewhere
	renamedFrom, err := s.detectFolderMove(ctx, file)
	if err != nil {
		return nil, fmt.Errorf("detecting folder move: %w", err)
	}

	if renamedFrom == nil {
		return nil, nil
	}

	// if the folder was moved, update the existing folder
	logger.Infof("%s moved to %s. Updating path...", renamedFrom.Path, file.Path)
	renamedFrom.Path = file.Path

	// update the parent folder ID
	// find the parent folder
	parentFolderID, err := s.getFolderID(ctx, filepath.Dir(file.Path))
	if err != nil {
		return nil, fmt.Errorf("getting parent folder for %q: %w", file.Path, err)
	}

	renamedFrom.ParentFolderID = parentFolderID

	if err := s.Repository.Folder.Update(ctx, renamedFrom); err != nil {
		return nil, fmt.Errorf("updating folder for rename %q: %w", renamedFrom.Path, err)
	}

	// #4146 - correct sub-folders to have the correct path
	if err := correctSubFolderHierarchy(ctx, s.Repository.Folder, renamedFrom); err != nil {
		return nil, fmt.Errorf("correcting sub folder hierarchy for %q: %w", renamedFrom.Path, err)
	}

	return renamedFrom, nil
}

func (s *scanJob) onExistingFolder(ctx context.Context, f scanFile, existing *models.Folder) (*models.Folder, error) {
	update := false

	// update if mod time is changed
	entryModTime := f.ModTime
	if !entryModTime.Equal(existing.ModTime) {
		existing.ModTime = entryModTime
		update = true
	}

	// update if zip file ID has changed
	fZfID := f.ZipFileID
	existingZfID := existing.ZipFileID
	if fZfID != existingZfID {
		if fZfID == nil {
			existing.ZipFileID = nil
			update = true
		} else if existingZfID == nil || *fZfID != *existingZfID {
			existing.ZipFileID = fZfID
			update = true
		}
	}

	if update {
		var err error
		if err = s.Repository.Folder.Update(ctx, existing); err != nil {
			return nil, fmt.Errorf("updating folder %q: %w", f.Path, err)
		}
	}

	return existing, nil
}

func modTime(info fs.FileInfo) time.Time {
	// truncate to seconds, since we don't store beyond that in the database
	return info.ModTime().Truncate(time.Second)
}

func (s *scanJob) handleFile(ctx context.Context, f scanFile) error {
	defer s.incrementProgress(f)

	var ff models.File
	// don't use a transaction to check if new or existing
	if err := s.withDB(ctx, func(ctx context.Context) error {
		// determine if file already exists in data store
		var err error
		ff, err = s.Repository.File.FindByPath(ctx, f.Path)
		if err != nil {
			return fmt.Errorf("checking for existing file %q: %w", f.Path, err)
		}

		if ff == nil {
			// returns a file only if it is actually new
			ff, err = s.onNewFile(ctx, f)
			return err
		}

		ff, err = s.onExistingFile(ctx, f, ff)
		return err
	}); err != nil {
		return err
	}

	if ff != nil && s.isZipFile(f.info.Name()) {
		f.BaseFile = ff.Base()

		// scan zip files with a different context that is not cancellable
		// cancelling while scanning zip file contents results in the scan
		// contents being partially completed
		zipCtx := utils.ValueOnlyContext{Context: ctx}

		if err := s.scanZipFile(zipCtx, f); err != nil {
			logger.Errorf("Error scanning zip file %q: %v", f.Path, err)
		}
	}

	return nil
}

func (s *scanJob) isZipFile(path string) bool {
	fExt := filepath.Ext(path)
	for _, ext := range s.options.ZipFileExtensions {
		if strings.EqualFold(fExt, "."+ext) {
			return true
		}
	}

	return false
}

func (s *scanJob) onNewFile(ctx context.Context, f scanFile) (models.File, error) {
	now := time.Now()

	baseFile := f.BaseFile
	path := baseFile.Path

	baseFile.CreatedAt = now
	baseFile.UpdatedAt = now

	// find the parent folder
	parentFolderID, err := s.getFolderID(ctx, filepath.Dir(path))
	if err != nil {
		return nil, fmt.Errorf("getting parent folder for %q: %w", path, err)
	}

	if parentFolderID == nil {
		// if parent folder doesn't exist, assume it's not yet created
		// add this file to the queue to be created later
		if s.retrying {
			// if we're retrying and the folder still doesn't exist, then it's a problem
			return nil, fmt.Errorf("parent folder for %q doesn't exist", path)
		}

		s.retryList = append(s.retryList, f)
		return nil, nil
	}

	baseFile.ParentFolderID = *parentFolderID

	const useExisting = false
	fp, err := s.calculateFingerprints(f.fs, baseFile, path, useExisting)
	if err != nil {
		return nil, err
	}

	baseFile.SetFingerprints(fp)

	file, err := s.fireDecorators(ctx, f.fs, baseFile)
	if err != nil {
		return nil, err
	}

	// determine if the file is renamed from an existing file in the store
	// do this after decoration so that missing fields can be populated
	renamed, err := s.handleRename(ctx, file, fp)
	if err != nil {
		return nil, err
	}

	if renamed != nil {
		// handle rename should have already handled the contents of the zip file
		// so shouldn't need to scan it again
		// return nil so it doesn't
		return nil, nil
	}

	// if not renamed, queue file for creation
	if err := s.withTxn(ctx, func(ctx context.Context) error {
		if err := s.Repository.File.Create(ctx, file); err != nil {
			return fmt.Errorf("creating file %q: %w", path, err)
		}

		if err := s.fireHandlers(ctx, file, nil); err != nil {
			return err
		}

		return nil
	}); err != nil {
		return nil, err
	}

	return file, nil
}

func (s *scanJob) fireDecorators(ctx context.Context, fs models.FS, f models.File) (models.File, error) {
	for _, h := range s.FileDecorators {
		var err error
		f, err = h.Decorate(ctx, fs, f)
		if err != nil {
			return f, err
		}
	}

	return f, nil
}

func (s *scanJob) fireHandlers(ctx context.Context, f models.File, oldFile models.File) error {
	for _, h := range s.handlers {
		if err := h.Handle(ctx, f, oldFile); err != nil {
			return err
		}
	}

	return nil
}

func (s *scanJob) calculateFingerprints(fs models.FS, f *models.BaseFile, path string, useExisting bool) (models.Fingerprints, error) {
	// only log if we're (re)calculating fingerprints
	if !useExisting {
		logger.Infof("Calculating fingerprints for %s ...", path)
	}

	// calculate primary fingerprint for the file
	fp, err := s.FingerprintCalculator.CalculateFingerprints(f, &fsOpener{
		fs:   fs,
		name: path,
	}, useExisting)
	if err != nil {
		return nil, fmt.Errorf("calculating fingerprint for file %q: %w", path, err)
	}

	return fp, nil
}

func appendFileUnique(v []models.File, toAdd []models.File) []models.File {
	for _, f := range toAdd {
		found := false
		id := f.Base().ID
		for _, vv := range v {
			if vv.Base().ID == id {
				found = true
				break
			}
		}

		if !found {
			v = append(v, f)
		}
	}

	return v
}

func (s *scanJob) getFileFS(f *models.BaseFile) (models.FS, error) {
	if f.ZipFile == nil {
		return s.FS, nil
	}

	fs, err := s.getFileFS(f.ZipFile.Base())
	if err != nil {
		return nil, err
	}

	zipPath := f.ZipFile.Base().Path
	return fs.OpenZip(zipPath, f.Size)
}

func (s *scanJob) handleRename(ctx context.Context, f models.File, fp []models.Fingerprint) (models.File, error) {
	var others []models.File

	for _, tfp := range fp {
		thisOthers, err := s.Repository.File.FindByFingerprint(ctx, tfp)
		if err != nil {
			return nil, fmt.Errorf("getting files by fingerprint %v: %w", tfp, err)
		}

		others = appendFileUnique(others, thisOthers)
	}

	var missing []models.File

	fZipID := f.Base().ZipFileID
	for _, other := range others {
		// if file is from a zip file, then only rename if both files are from the same zip file
		otherZipID := other.Base().ZipFileID
		if otherZipID != nil && (fZipID == nil || *otherZipID != *fZipID) {
			continue
		}

		// if file does not exist, then update it to the new path
		fs, err := s.getFileFS(other.Base())
		if err != nil {
			missing = append(missing, other)
			continue
		}

		info, err := fs.Lstat(other.Base().Path)
		switch {
		case err != nil:
			missing = append(missing, other)
		case strings.EqualFold(f.Base().Path, other.Base().Path):
			// #1426 - if file exists but is a case-insensitive match for the
			// original filename, and the filesystem is case-insensitive
			// then treat it as a move
			if caseSensitive, _ := fs.IsPathCaseSensitive(other.Base().Path); !caseSensitive {
				// treat as a move
				missing = append(missing, other)
			}
		case !s.acceptEntry(ctx, other.Base().Path, info):
			// #4393 - if the file is no longer in the configured library paths, treat it as a move
			logger.Debugf("File %q no longer in library paths. Treating as a move.", other.Base().Path)
			missing = append(missing, other)
		}
	}

	n := len(missing)
	if n == 0 {
		// no missing files, not a rename
		return nil, nil
	}

	// assume does not exist, update existing file
	// it's possible that there may be multiple missing files.
	// just use the first one to rename.
	// #4775 - using the new file instance means that any changes made to the existing
	// file will be lost. Update the existing file instead.
	other := missing[0]
	updated := other.Clone()
	updatedBase := updated.Base()

	fBaseCopy := *(f.Base())

	oldPath := updatedBase.Path
	newPath := fBaseCopy.Path

	logger.Infof("%s moved to %s. Updating path...", oldPath, newPath)
	fBaseCopy.ID = updatedBase.ID
	fBaseCopy.CreatedAt = updatedBase.CreatedAt
	fBaseCopy.Fingerprints = updatedBase.Fingerprints
	*updatedBase = fBaseCopy

	if err := s.withTxn(ctx, func(ctx context.Context) error {
		if err := s.Repository.File.Update(ctx, updated); err != nil {
			return fmt.Errorf("updating file for rename %q: %w", newPath, err)
		}

		if s.isZipFile(updatedBase.Basename) {
			if err := transferZipHierarchy(ctx, s.Repository.Folder, s.Repository.File, updatedBase.ID, oldPath, newPath); err != nil {
				return fmt.Errorf("moving zip hierarchy for renamed zip file %q: %w", newPath, err)
			}
		}

		if err := s.fireHandlers(ctx, updated, other); err != nil {
			return err
		}

		return nil
	}); err != nil {
		return nil, err
	}

	return updated, nil
}

func (s *scanJob) isHandlerRequired(ctx context.Context, f models.File) bool {
	accept := len(s.options.HandlerRequiredFilters) == 0
	for _, filter := range s.options.HandlerRequiredFilters {
		// accept if any filter accepts the file
		if filter.Accept(ctx, f) {
			accept = true
			break
		}
	}

	return accept
}

// isMissingMetadata returns true if the provided file is missing metadata.
// Missing metadata should only occur after the 32 schema migration.
// Looks for special values. For numbers, this will be -1. For strings, this
// will be 'unset'.
// Missing metadata includes the following:
// - file size
// - image format, width or height
// - video codec, audio codec, format, width, height, framerate or bitrate
func (s *scanJob) isMissingMetadata(ctx context.Context, f scanFile, existing models.File) bool {
	for _, h := range s.FileDecorators {
		if h.IsMissingMetadata(ctx, f.fs, existing) {
			return true
		}
	}

	return false
}

func (s *scanJob) setMissingMetadata(ctx context.Context, f scanFile, existing models.File) (models.File, error) {
	path := existing.Base().Path
	logger.Infof("Updating metadata for %s", path)

	existing.Base().Size = f.Size

	var err error
	existing, err = s.fireDecorators(ctx, f.fs, existing)
	if err != nil {
		return nil, err
	}

	// queue file for update
	if err := s.withTxn(ctx, func(ctx context.Context) error {
		if err := s.Repository.File.Update(ctx, existing); err != nil {
			return fmt.Errorf("updating file %q: %w", path, err)
		}

		return nil
	}); err != nil {
		return nil, err
	}

	return existing, nil
}

func (s *scanJob) setMissingFingerprints(ctx context.Context, f scanFile, existing models.File) (models.File, error) {
	const useExisting = true
	fp, err := s.calculateFingerprints(f.fs, existing.Base(), f.Path, useExisting)
	if err != nil {
		return nil, err
	}

	if fp.ContentsChanged(existing.Base().Fingerprints) {
		existing.SetFingerprints(fp)

		if err := s.withTxn(ctx, func(ctx context.Context) error {
			if err := s.Repository.File.Update(ctx, existing); err != nil {
				return fmt.Errorf("updating file %q: %w", f.Path, err)
			}

			return nil
		}); err != nil {
			return nil, err
		}
	}

	return existing, nil
}

// returns a file only if it was updated
func (s *scanJob) onExistingFile(ctx context.Context, f scanFile, existing models.File) (models.File, error) {
	base := existing.Base()
	path := base.Path

	fileModTime := f.ModTime
	updated := !fileModTime.Equal(base.ModTime)
	forceRescan := s.options.Rescan

	if !updated && !forceRescan {
		return s.onUnchangedFile(ctx, f, existing)
	}

	oldBase := *base

	if !updated && forceRescan {
		logger.Infof("rescanning %s", path)
	} else {
		logger.Infof("%s has been updated: rescanning", path)
	}

	base.ModTime = fileModTime
	base.Size = f.Size
	base.UpdatedAt = time.Now()

	// calculate and update fingerprints for the file
	const useExisting = false
	fp, err := s.calculateFingerprints(f.fs, base, path, useExisting)
	if err != nil {
		return nil, err
	}

	s.removeOutdatedFingerprints(existing, fp)
	existing.SetFingerprints(fp)

	existing, err = s.fireDecorators(ctx, f.fs, existing)
	if err != nil {
		return nil, err
	}

	// queue file for update
	if err := s.withTxn(ctx, func(ctx context.Context) error {
		if err := s.Repository.File.Update(ctx, existing); err != nil {
			return fmt.Errorf("updating file %q: %w", path, err)
		}

		if err := s.fireHandlers(ctx, existing, &oldBase); err != nil {
			return err
		}

		return nil
	}); err != nil {
		return nil, err
	}

	return existing, nil
}

func (s *scanJob) removeOutdatedFingerprints(existing models.File, fp models.Fingerprints) {
	// HACK - if no MD5 fingerprint was returned, and the oshash is changed
	// then remove the MD5 fingerprint
	oshash := fp.For(models.FingerprintTypeOshash)
	if oshash == nil {
		return
	}

	existingOshash := existing.Base().Fingerprints.For(models.FingerprintTypeOshash)
	if existingOshash == nil || *existingOshash == *oshash {
		// missing oshash or same oshash - nothing to do
		return
	}

	md5 := fp.For(models.FingerprintTypeMD5)

	if md5 != nil {
		// nothing to do
		return
	}

	// oshash has changed, MD5 is missing - remove MD5 from the existing fingerprints
	logger.Infof("Removing outdated checksum from %s", existing.Base().Path)
	b := existing.Base()
	b.Fingerprints = b.Fingerprints.Remove(models.FingerprintTypeMD5)
}

// returns a file only if it was updated
func (s *scanJob) onUnchangedFile(ctx context.Context, f scanFile, existing models.File) (models.File, error) {
	var err error

	isMissingMetdata := s.isMissingMetadata(ctx, f, existing)
	// set missing information
	if isMissingMetdata {
		existing, err = s.setMissingMetadata(ctx, f, existing)
		if err != nil {
			return nil, err
		}
	}

	// calculate missing fingerprints
	existing, err = s.setMissingFingerprints(ctx, f, existing)
	if err != nil {
		return nil, err
	}

	handlerRequired := false
	if err := s.withDB(ctx, func(ctx context.Context) error {
		// check if the handler needs to be run
		handlerRequired = s.isHandlerRequired(ctx, existing)
		return nil
	}); err != nil {
		return nil, err
	}

	if !handlerRequired {
		// if this file is a zip file, then we need to rescan the contents
		// as well. We do this by returning the file, instead of nil.
		if isMissingMetdata {
			return existing, nil
		}

		return nil, nil
	}

	if err := s.withTxn(ctx, func(ctx context.Context) error {
		if err := s.fireHandlers(ctx, existing, nil); err != nil {
			return err
		}

		return nil
	}); err != nil {
		return nil, err
	}

	// if this file is a zip file, then we need to rescan the contents
	// as well. We do this by returning the file, instead of nil.
	return existing, nil
}