package manager import ( "context" "errors" "fmt" "os" "path/filepath" "time" "github.com/remeh/sizedwaitgroup" "github.com/stashapp/stash/pkg/file" "github.com/stashapp/stash/pkg/job" "github.com/stashapp/stash/pkg/logger" "github.com/stashapp/stash/pkg/manager/config" "github.com/stashapp/stash/pkg/models" "github.com/stashapp/stash/pkg/utils" ) const scanQueueSize = 200000 type ScanJob struct { txnManager models.TransactionManager input models.ScanMetadataInput subscriptions *subscriptionManager } type scanFile struct { path string info os.FileInfo caseSensitiveFs bool } func (j *ScanJob) Execute(ctx context.Context, progress *job.Progress) { input := j.input paths := getScanPaths(input.Paths) if job.IsCancelled(ctx) { logger.Info("Stopping due to user request") return } start := time.Now() config := config.GetInstance() parallelTasks := config.GetParallelTasksWithAutoDetection() logger.Infof("Scan started with %d parallel tasks", parallelTasks) fileQueue := make(chan scanFile, scanQueueSize) go func() { total, newFiles := j.queueFiles(ctx, paths, fileQueue, parallelTasks) if !job.IsCancelled(ctx) { progress.SetTotal(total) logger.Infof("Finished counting files. Total files to scan: %d, %d new files found", total, newFiles) } }() wg := sizedwaitgroup.New(parallelTasks) fileNamingAlgo := config.GetVideoFileNamingAlgorithm() calculateMD5 := config.IsCalculateMD5() var err error var galleries []string mutexManager := utils.NewMutexManager() for f := range fileQueue { if job.IsCancelled(ctx) { break } if isGallery(f.path) { galleries = append(galleries, f.path) } if err := instance.Paths.Generated.EnsureTmpDir(); err != nil { logger.Warnf("couldn't create temporary directory: %v", err) } wg.Add() task := ScanTask{ TxnManager: j.txnManager, file: file.FSFile(f.path, f.info), UseFileMetadata: utils.IsTrue(input.UseFileMetadata), StripFileExtension: utils.IsTrue(input.StripFileExtension), fileNamingAlgorithm: fileNamingAlgo, calculateMD5: calculateMD5, GeneratePreview: utils.IsTrue(input.ScanGeneratePreviews), GenerateImagePreview: utils.IsTrue(input.ScanGenerateImagePreviews), GenerateSprite: utils.IsTrue(input.ScanGenerateSprites), GeneratePhash: utils.IsTrue(input.ScanGeneratePhashes), GenerateThumbnails: utils.IsTrue(input.ScanGenerateThumbnails), progress: progress, CaseSensitiveFs: f.caseSensitiveFs, ctx: ctx, mutexManager: mutexManager, } go func() { task.Start(ctx) wg.Done() progress.Increment() }() } wg.Wait() if err := instance.Paths.Generated.EmptyTmpDir(); err != nil { logger.Warnf("couldn't empty temporary directory: %v", err) } elapsed := time.Since(start) logger.Info(fmt.Sprintf("Scan finished (%s)", elapsed)) if job.IsCancelled(ctx) { logger.Info("Stopping due to user request") return } if err != nil { return } progress.ExecuteTask("Associating galleries", func() { for _, path := range galleries { wg.Add() task := ScanTask{ TxnManager: j.txnManager, file: file.FSFile(path, nil), // hopefully info is not needed UseFileMetadata: false, } go task.associateGallery(&wg) wg.Wait() } logger.Info("Finished gallery association") }) j.subscriptions.notify() } func (j *ScanJob) queueFiles(ctx context.Context, paths []*models.StashConfig, scanQueue chan<- scanFile, parallelTasks int) (total int, newFiles int) { defer close(scanQueue) var minModTime time.Time if j.input.Filter != nil && j.input.Filter.MinModTime != nil { minModTime = *j.input.Filter.MinModTime } wg := sizedwaitgroup.New(parallelTasks) for _, sp := range paths { csFs, er := utils.IsFsPathCaseSensitive(sp.Path) if er != nil { logger.Warnf("Cannot determine fs case sensitivity: %s", er.Error()) } err := walkFilesToScan(sp, func(path string, info os.FileInfo, err error) error { // check stop if job.IsCancelled(ctx) { return context.Canceled } // exit early on cutoff if info.Mode().IsRegular() && info.ModTime().Before(minModTime) { return nil } wg.Add() go func() { defer wg.Done() // #1756 - skip zero length files and directories if info.IsDir() { return } if info.Size() == 0 { logger.Infof("Skipping zero-length file: %s", path) return } total++ if !j.doesPathExist(path) { newFiles++ } scanQueue <- scanFile{ path: path, info: info, caseSensitiveFs: csFs, } }() return nil }) wg.Wait() if err != nil && !errors.Is(err, context.Canceled) { logger.Errorf("Error encountered queuing files to scan: %s", err.Error()) return } } return } func (j *ScanJob) doesPathExist(path string) bool { config := config.GetInstance() vidExt := config.GetVideoExtensions() imgExt := config.GetImageExtensions() gExt := config.GetGalleryExtensions() ret := false txnErr := j.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error { switch { case utils.MatchExtension(path, gExt): g, _ := r.Gallery().FindByPath(path) if g != nil { ret = true } case utils.MatchExtension(path, vidExt): s, _ := r.Scene().FindByPath(path) if s != nil { ret = true } case utils.MatchExtension(path, imgExt): i, _ := r.Image().FindByPath(path) if i != nil { ret = true } } return nil }) if txnErr != nil { logger.Warnf("error checking if file exists in database: %v", txnErr) } return ret } type ScanTask struct { ctx context.Context TxnManager models.TransactionManager file file.SourceFile UseFileMetadata bool StripFileExtension bool calculateMD5 bool fileNamingAlgorithm models.HashAlgorithm GenerateSprite bool GeneratePhash bool GeneratePreview bool GenerateImagePreview bool GenerateThumbnails bool zipGallery *models.Gallery progress *job.Progress CaseSensitiveFs bool mutexManager *utils.MutexManager } func (t *ScanTask) Start(ctx context.Context) { var s *models.Scene path := t.file.Path() t.progress.ExecuteTask("Scanning "+path, func() { switch { case isGallery(path): t.scanGallery(ctx) case isVideo(path): s = t.scanScene() case isImage(path): t.scanImage() } }) if s == nil { return } // Handle the case of a scene iwg := sizedwaitgroup.New(2) if t.GenerateSprite { iwg.Add() go t.progress.ExecuteTask(fmt.Sprintf("Generating sprites for %s", path), func() { taskSprite := GenerateSpriteTask{ Scene: *s, Overwrite: false, fileNamingAlgorithm: t.fileNamingAlgorithm, } taskSprite.Start(ctx) iwg.Done() }) } if t.GeneratePhash { iwg.Add() go t.progress.ExecuteTask(fmt.Sprintf("Generating phash for %s", path), func() { taskPhash := GeneratePhashTask{ Scene: *s, fileNamingAlgorithm: t.fileNamingAlgorithm, txnManager: t.TxnManager, } taskPhash.Start(ctx) iwg.Done() }) } if t.GeneratePreview { iwg.Add() go t.progress.ExecuteTask(fmt.Sprintf("Generating preview for %s", path), func() { config := config.GetInstance() var previewSegmentDuration = config.GetPreviewSegmentDuration() var previewSegments = config.GetPreviewSegments() var previewExcludeStart = config.GetPreviewExcludeStart() var previewExcludeEnd = config.GetPreviewExcludeEnd() var previewPresent = config.GetPreviewPreset() // NOTE: the reuse of this model like this is painful. previewOptions := models.GeneratePreviewOptionsInput{ PreviewSegments: &previewSegments, PreviewSegmentDuration: &previewSegmentDuration, PreviewExcludeStart: &previewExcludeStart, PreviewExcludeEnd: &previewExcludeEnd, PreviewPreset: &previewPresent, } taskPreview := GeneratePreviewTask{ Scene: *s, ImagePreview: t.GenerateImagePreview, Options: previewOptions, Overwrite: false, fileNamingAlgorithm: t.fileNamingAlgorithm, } taskPreview.Start(ctx) iwg.Done() }) } iwg.Wait() } func walkFilesToScan(s *models.StashConfig, f filepath.WalkFunc) error { config := config.GetInstance() vidExt := config.GetVideoExtensions() imgExt := config.GetImageExtensions() gExt := config.GetGalleryExtensions() excludeVidRegex := generateRegexps(config.GetExcludes()) excludeImgRegex := generateRegexps(config.GetImageExcludes()) // don't scan zip images directly if file.IsZipPath(s.Path) { logger.Warnf("Cannot rescan zip image %s. Rescan zip gallery instead.", s.Path) return nil } generatedPath := config.GetGeneratedPath() return utils.SymWalk(s.Path, func(path string, info os.FileInfo, err error) error { if err != nil { logger.Warnf("error scanning %s: %s", path, err.Error()) return nil } if info.IsDir() { // #1102 - ignore files in generated path if utils.IsPathInDir(generatedPath, path) { return filepath.SkipDir } // shortcut: skip the directory entirely if it matches both exclusion patterns // add a trailing separator so that it correctly matches against patterns like path/.* pathExcludeTest := path + string(filepath.Separator) if (s.ExcludeVideo || matchFileRegex(pathExcludeTest, excludeVidRegex)) && (s.ExcludeImage || matchFileRegex(pathExcludeTest, excludeImgRegex)) { return filepath.SkipDir } return nil } if !s.ExcludeVideo && utils.MatchExtension(path, vidExt) && !matchFileRegex(path, excludeVidRegex) { return f(path, info, err) } if !s.ExcludeImage { if (utils.MatchExtension(path, imgExt) || utils.MatchExtension(path, gExt)) && !matchFileRegex(path, excludeImgRegex) { return f(path, info, err) } } return nil }) }