stash/pkg/manager/task_scan.go

680 lines
18 KiB
Go
Raw Normal View History

2019-02-09 12:30:49 +00:00
package manager
import (
"archive/zip"
2019-02-09 12:30:49 +00:00
"context"
"database/sql"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
"time"
"github.com/jmoiron/sqlx"
2019-02-14 23:42:52 +00:00
"github.com/stashapp/stash/pkg/database"
"github.com/stashapp/stash/pkg/ffmpeg"
"github.com/stashapp/stash/pkg/image"
2019-02-14 23:42:52 +00:00
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/manager/config"
2019-02-14 23:42:52 +00:00
"github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/utils"
2019-02-09 12:30:49 +00:00
)
type ScanTask struct {
2020-08-06 01:21:14 +00:00
FilePath string
UseFileMetadata bool
calculateMD5 bool
fileNamingAlgorithm models.HashAlgorithm
zipGallery *models.Gallery
2019-02-09 12:30:49 +00:00
}
func (t *ScanTask) Start(wg *sync.WaitGroup) {
if isGallery(t.FilePath) {
2019-02-09 12:30:49 +00:00
t.scanGallery()
} else if isVideo(t.FilePath) {
2019-02-09 12:30:49 +00:00
t.scanScene()
} else if isImage(t.FilePath) {
t.scanImage()
2019-02-09 12:30:49 +00:00
}
wg.Done()
}
func (t *ScanTask) scanGallery() {
qb := models.NewGalleryQueryBuilder()
gallery, _ := qb.FindByPath(t.FilePath)
2019-02-09 12:30:49 +00:00
if gallery != nil {
// We already have this item in the database, keep going
// scan the zip files if the gallery has no images
iqb := models.NewImageQueryBuilder()
images, err := iqb.CountByGalleryID(gallery.ID)
if err != nil {
logger.Errorf("error getting images for zip gallery %s: %s", t.FilePath, err.Error())
}
if images == 0 {
t.scanZipImages(gallery)
} else {
// in case thumbnails have been deleted, regenerate them
t.regenerateZipImages(gallery)
}
2019-02-09 12:30:49 +00:00
return
}
// Ignore directories.
if isDir, _ := utils.DirExists(t.FilePath); isDir {
return
}
2019-02-09 12:30:49 +00:00
checksum, err := t.calculateChecksum()
if err != nil {
logger.Error(err.Error())
return
}
ctx := context.TODO()
tx := database.DB.MustBeginTx(ctx, nil)
gallery, _ = qb.FindByChecksum(checksum, tx)
if gallery != nil {
exists, _ := utils.FileExists(gallery.Path.String)
if exists {
logger.Infof("%s already exists. Duplicate of %s ", t.FilePath, gallery.Path.String)
} else {
logger.Infof("%s already exists. Updating path...", t.FilePath)
gallery.Path = sql.NullString{
String: t.FilePath,
Valid: true,
}
gallery, err = qb.Update(*gallery, tx)
}
2019-02-09 12:30:49 +00:00
} else {
currentTime := time.Now()
2019-02-09 12:30:49 +00:00
newGallery := models.Gallery{
Checksum: checksum,
Zip: true,
Path: sql.NullString{
String: t.FilePath,
Valid: true,
},
CreatedAt: models.SQLiteTimestamp{Timestamp: currentTime},
UpdatedAt: models.SQLiteTimestamp{Timestamp: currentTime},
2019-02-09 12:30:49 +00:00
}
// don't create gallery if it has no images
if countImagesInZip(t.FilePath) > 0 {
// only warn when creating the gallery
ok, err := utils.IsZipFileUncompressed(t.FilePath)
if err == nil && !ok {
logger.Warnf("%s is using above store (0) level compression.", t.FilePath)
}
logger.Infof("%s doesn't exist. Creating new item...", t.FilePath)
gallery, err = qb.Create(newGallery, tx)
}
2019-02-09 12:30:49 +00:00
}
if err != nil {
logger.Error(err.Error())
tx.Rollback()
return
}
err = tx.Commit()
if err != nil {
2019-02-09 12:30:49 +00:00
logger.Error(err.Error())
return
}
// if the gallery has no associated images, then scan the zip for images
if gallery != nil {
t.scanZipImages(gallery)
2019-02-09 12:30:49 +00:00
}
}
// associates a gallery to a scene with the same basename
func (t *ScanTask) associateGallery(wg *sync.WaitGroup) {
qb := models.NewGalleryQueryBuilder()
gallery, _ := qb.FindByPath(t.FilePath)
if gallery == nil {
// associate is run after scan is finished
// should only happen if gallery is a directory or an io error occurs during hashing
logger.Warnf("associate: gallery %s not found in DB", t.FilePath)
wg.Done()
return
}
// gallery has no SceneID
if !gallery.SceneID.Valid {
basename := strings.TrimSuffix(t.FilePath, filepath.Ext(t.FilePath))
var relatedFiles []string
vExt := config.GetVideoExtensions()
// make a list of media files that can be related to the gallery
for _, ext := range vExt {
related := basename + "." + ext
// exclude gallery extensions from the related files
if !isGallery(related) {
relatedFiles = append(relatedFiles, related)
}
}
for _, scenePath := range relatedFiles {
qbScene := models.NewSceneQueryBuilder()
scene, _ := qbScene.FindByPath(scenePath)
// found related Scene
if scene != nil {
logger.Infof("associate: Gallery %s is related to scene: %d", t.FilePath, scene.ID)
gallery.SceneID.Int64 = int64(scene.ID)
gallery.SceneID.Valid = true
ctx := context.TODO()
tx := database.DB.MustBeginTx(ctx, nil)
_, err := qb.Update(*gallery, tx)
if err != nil {
logger.Errorf("associate: Error updating gallery sceneId %s", err)
_ = tx.Rollback()
} else if err := tx.Commit(); err != nil {
logger.Error(err.Error())
}
// since a gallery can have only one related scene
// only first found is associated
break
}
}
}
wg.Done()
}
2019-02-09 12:30:49 +00:00
func (t *ScanTask) scanScene() {
qb := models.NewSceneQueryBuilder()
scene, _ := qb.FindByPath(t.FilePath)
if scene != nil {
// We already have this item in the database
2020-08-06 01:21:14 +00:00
// check for thumbnails,screenshots
t.makeScreenshots(nil, scene.GetHash(t.fileNamingAlgorithm))
2020-08-06 01:21:14 +00:00
// check for container
if !scene.Format.Valid {
videoFile, err := ffmpeg.NewVideoFile(instance.FFProbePath, t.FilePath)
if err != nil {
logger.Error(err.Error())
return
}
container := ffmpeg.MatchContainer(videoFile.Container, t.FilePath)
logger.Infof("Adding container %s to file %s", container, t.FilePath)
ctx := context.TODO()
tx := database.DB.MustBeginTx(ctx, nil)
err = qb.UpdateFormat(scene.ID, string(container), tx)
if err != nil {
logger.Error(err.Error())
_ = tx.Rollback()
} else if err := tx.Commit(); err != nil {
logger.Error(err.Error())
}
2020-08-06 01:21:14 +00:00
}
// check if oshash is set
if !scene.OSHash.Valid {
logger.Infof("Calculating oshash for existing file %s ...", t.FilePath)
oshash, err := utils.OSHashFromFilePath(t.FilePath)
if err != nil {
logger.Error(err.Error())
return
}
// check if oshash clashes with existing scene
dupe, _ := qb.FindByOSHash(oshash)
if dupe != nil {
logger.Errorf("OSHash for file %s is the same as that of %s", t.FilePath, dupe.Path)
return
}
2020-08-06 01:21:14 +00:00
ctx := context.TODO()
tx := database.DB.MustBeginTx(ctx, nil)
err = qb.UpdateOSHash(scene.ID, oshash, tx)
if err != nil {
logger.Error(err.Error())
tx.Rollback()
return
2020-08-06 01:21:14 +00:00
} else if err := tx.Commit(); err != nil {
logger.Error(err.Error())
}
}
// check if MD5 is set, if calculateMD5 is true
if t.calculateMD5 && !scene.Checksum.Valid {
checksum, err := t.calculateChecksum()
if err != nil {
logger.Error(err.Error())
return
}
// check if checksum clashes with existing scene
dupe, _ := qb.FindByChecksum(checksum)
if dupe != nil {
logger.Errorf("MD5 for file %s is the same as that of %s", t.FilePath, dupe.Path)
return
}
2020-08-06 01:21:14 +00:00
ctx := context.TODO()
tx := database.DB.MustBeginTx(ctx, nil)
err = qb.UpdateChecksum(scene.ID, checksum, tx)
if err != nil {
logger.Error(err.Error())
_ = tx.Rollback()
} else if err := tx.Commit(); err != nil {
logger.Error(err.Error())
}
}
2020-08-06 01:21:14 +00:00
2019-02-09 12:30:49 +00:00
return
}
// Ignore directories.
if isDir, _ := utils.DirExists(t.FilePath); isDir {
return
}
videoFile, err := ffmpeg.NewVideoFile(instance.FFProbePath, t.FilePath)
if err != nil {
logger.Error(err.Error())
return
}
container := ffmpeg.MatchContainer(videoFile.Container, t.FilePath)
// Override title to be filename if UseFileMetadata is false
if !t.UseFileMetadata {
2019-10-12 08:20:27 +00:00
videoFile.SetTitleFromPath()
}
2020-08-06 01:21:14 +00:00
var checksum string
logger.Infof("%s not found. Calculating oshash...", t.FilePath)
2020-08-06 01:21:14 +00:00
oshash, err := utils.OSHashFromFilePath(t.FilePath)
2019-02-09 12:30:49 +00:00
if err != nil {
logger.Error(err.Error())
return
}
2020-08-06 01:21:14 +00:00
if t.fileNamingAlgorithm == models.HashAlgorithmMd5 || t.calculateMD5 {
checksum, err = t.calculateChecksum()
if err != nil {
logger.Error(err.Error())
return
}
}
// check for scene by checksum and oshash - MD5 should be
// redundant, but check both
if checksum != "" {
scene, _ = qb.FindByChecksum(checksum)
}
if scene == nil {
scene, _ = qb.FindByOSHash(oshash)
}
2020-08-06 01:21:14 +00:00
sceneHash := oshash
2020-08-06 01:21:14 +00:00
if t.fileNamingAlgorithm == models.HashAlgorithmMd5 {
sceneHash = checksum
}
t.makeScreenshots(videoFile, sceneHash)
2019-02-09 12:30:49 +00:00
ctx := context.TODO()
tx := database.DB.MustBeginTx(ctx, nil)
if scene != nil {
2019-08-20 13:46:05 +00:00
exists, _ := utils.FileExists(scene.Path)
if exists {
logger.Infof("%s already exists. Duplicate of %s", t.FilePath, scene.Path)
} else {
logger.Infof("%s already exists. Updating path...", t.FilePath)
2019-10-14 21:57:53 +00:00
scenePartial := models.ScenePartial{
ID: scene.ID,
Path: &t.FilePath,
}
_, err = qb.Update(scenePartial, tx)
}
2019-02-09 12:30:49 +00:00
} else {
logger.Infof("%s doesn't exist. Creating new item...", t.FilePath)
2019-02-09 12:30:49 +00:00
currentTime := time.Now()
newScene := models.Scene{
2020-08-06 01:21:14 +00:00
Checksum: sql.NullString{String: checksum, Valid: checksum != ""},
OSHash: sql.NullString{String: oshash, Valid: oshash != ""},
Path: t.FilePath,
Title: sql.NullString{String: videoFile.Title, Valid: true},
Duration: sql.NullFloat64{Float64: videoFile.Duration, Valid: true},
VideoCodec: sql.NullString{String: videoFile.VideoCodec, Valid: true},
AudioCodec: sql.NullString{String: videoFile.AudioCodec, Valid: true},
Format: sql.NullString{String: string(container), Valid: true},
Width: sql.NullInt64{Int64: int64(videoFile.Width), Valid: true},
Height: sql.NullInt64{Int64: int64(videoFile.Height), Valid: true},
Framerate: sql.NullFloat64{Float64: videoFile.FrameRate, Valid: true},
Bitrate: sql.NullInt64{Int64: videoFile.Bitrate, Valid: true},
Size: sql.NullString{String: strconv.Itoa(int(videoFile.Size)), Valid: true},
CreatedAt: models.SQLiteTimestamp{Timestamp: currentTime},
UpdatedAt: models.SQLiteTimestamp{Timestamp: currentTime},
2019-02-09 12:30:49 +00:00
}
if t.UseFileMetadata {
newScene.Details = sql.NullString{String: videoFile.Comment, Valid: true}
newScene.Date = models.SQLiteDate{String: videoFile.CreationTime.Format("2006-01-02")}
}
2019-02-09 12:30:49 +00:00
_, err = qb.Create(newScene, tx)
}
if err != nil {
logger.Error(err.Error())
_ = tx.Rollback()
} else if err := tx.Commit(); err != nil {
logger.Error(err.Error())
}
}
func (t *ScanTask) makeScreenshots(probeResult *ffmpeg.VideoFile, checksum string) {
2019-02-09 12:30:49 +00:00
thumbPath := instance.Paths.Scene.GetThumbnailScreenshotPath(checksum)
normalPath := instance.Paths.Scene.GetScreenshotPath(checksum)
thumbExists, _ := utils.FileExists(thumbPath)
normalExists, _ := utils.FileExists(normalPath)
2019-02-09 12:30:49 +00:00
if thumbExists && normalExists {
return
}
2019-10-17 23:17:51 +00:00
if probeResult == nil {
var err error
2019-10-17 23:17:51 +00:00
probeResult, err = ffmpeg.NewVideoFile(instance.FFProbePath, t.FilePath)
2019-10-17 23:17:51 +00:00
if err != nil {
logger.Error(err.Error())
return
}
logger.Infof("Regenerating images for %s", t.FilePath)
}
at := float64(probeResult.Duration) * 0.2
2019-10-17 23:17:51 +00:00
if !thumbExists {
logger.Debugf("Creating thumbnail for %s", t.FilePath)
makeScreenshot(*probeResult, thumbPath, 5, 320, at)
2019-10-17 23:17:51 +00:00
}
if !normalExists {
logger.Debugf("Creating screenshot for %s", t.FilePath)
makeScreenshot(*probeResult, normalPath, 2, probeResult.Width, at)
2019-02-09 12:30:49 +00:00
}
}
func (t *ScanTask) scanZipImages(zipGallery *models.Gallery) {
err := walkGalleryZip(zipGallery.Path.String, func(file *zip.File) error {
// copy this task and change the filename
subTask := *t
// filepath is the zip file and the internal file name, separated by a null byte
subTask.FilePath = image.ZipFilename(zipGallery.Path.String, file.Name)
subTask.zipGallery = zipGallery
// run the subtask and wait for it to complete
var wg sync.WaitGroup
wg.Add(1)
subTask.Start(&wg)
return nil
})
if err != nil {
logger.Warnf("failed to scan zip file images for %s: %s", zipGallery.Path.String, err.Error())
}
}
func (t *ScanTask) regenerateZipImages(zipGallery *models.Gallery) {
iqb := models.NewImageQueryBuilder()
images, err := iqb.FindByGalleryID(zipGallery.ID)
if err != nil {
logger.Warnf("failed to find gallery images: %s", err.Error())
return
}
for _, img := range images {
t.generateThumbnail(img)
}
}
func (t *ScanTask) scanImage() {
qb := models.NewImageQueryBuilder()
i, _ := qb.FindByPath(t.FilePath)
if i != nil {
// We already have this item in the database
// check for thumbnails
t.generateThumbnail(i)
return
}
// Ignore directories.
if isDir, _ := utils.DirExists(t.FilePath); isDir {
return
}
var checksum string
logger.Infof("%s not found. Calculating checksum...", t.FilePath)
checksum, err := t.calculateImageChecksum()
if err != nil {
logger.Errorf("error calculating checksum for %s: %s", t.FilePath, err.Error())
return
}
// check for scene by checksum and oshash - MD5 should be
// redundant, but check both
i, _ = qb.FindByChecksum(checksum)
ctx := context.TODO()
tx := database.DB.MustBeginTx(ctx, nil)
if i != nil {
exists := image.FileExists(i.Path)
if exists {
logger.Infof("%s already exists. Duplicate of %s ", image.PathDisplayName(t.FilePath), image.PathDisplayName(i.Path))
} else {
logger.Infof("%s already exists. Updating path...", image.PathDisplayName(t.FilePath))
imagePartial := models.ImagePartial{
ID: i.ID,
Path: &t.FilePath,
}
_, err = qb.Update(imagePartial, tx)
}
} else {
logger.Infof("%s doesn't exist. Creating new item...", image.PathDisplayName(t.FilePath))
currentTime := time.Now()
newImage := models.Image{
Checksum: checksum,
Path: t.FilePath,
CreatedAt: models.SQLiteTimestamp{Timestamp: currentTime},
UpdatedAt: models.SQLiteTimestamp{Timestamp: currentTime},
}
err = image.SetFileDetails(&newImage)
if err == nil {
i, err = qb.Create(newImage, tx)
}
}
if err == nil {
jqb := models.NewJoinsQueryBuilder()
if t.zipGallery != nil {
// associate with gallery
_, err = jqb.AddImageGallery(i.ID, t.zipGallery.ID, tx)
} else if config.GetCreateGalleriesFromFolders() {
// create gallery from folder or associate with existing gallery
logger.Infof("Associating image %s with folder gallery", i.Path)
err = t.associateImageWithFolderGallery(i.ID, tx)
}
}
if err != nil {
logger.Error(err.Error())
_ = tx.Rollback()
return
} else if err := tx.Commit(); err != nil {
logger.Error(err.Error())
return
}
t.generateThumbnail(i)
}
func (t *ScanTask) associateImageWithFolderGallery(imageID int, tx *sqlx.Tx) error {
// find a gallery with the path specified
path := filepath.Dir(t.FilePath)
gqb := models.NewGalleryQueryBuilder()
jqb := models.NewJoinsQueryBuilder()
g, err := gqb.FindByPath(path)
if err != nil {
return err
}
if g == nil {
checksum := utils.MD5FromString(path)
// create the gallery
currentTime := time.Now()
newGallery := models.Gallery{
Checksum: checksum,
Path: sql.NullString{
String: path,
Valid: true,
},
CreatedAt: models.SQLiteTimestamp{Timestamp: currentTime},
UpdatedAt: models.SQLiteTimestamp{Timestamp: currentTime},
}
logger.Infof("Creating gallery for folder %s", path)
g, err = gqb.Create(newGallery, tx)
if err != nil {
return err
}
}
// associate image with gallery
_, err = jqb.AddImageGallery(imageID, g.ID, tx)
return err
}
func (t *ScanTask) generateThumbnail(i *models.Image) {
thumbPath := GetInstance().Paths.Generated.GetThumbnailPath(i.Checksum, models.DefaultGthumbWidth)
exists, _ := utils.FileExists(thumbPath)
if exists {
logger.Debug("Thumbnail already exists for this path... skipping")
return
}
srcImage, err := image.GetSourceImage(i)
if err != nil {
logger.Errorf("error reading image %s: %s", i.Path, err.Error())
return
}
if image.ThumbnailNeeded(srcImage, models.DefaultGthumbWidth) {
data, err := image.GetThumbnail(srcImage, models.DefaultGthumbWidth)
if err != nil {
logger.Errorf("error getting thumbnail for image %s: %s", i.Path, err.Error())
return
}
err = utils.WriteFile(thumbPath, data)
if err != nil {
logger.Errorf("error writing thumbnail for image %s: %s", i.Path, err)
}
}
}
2019-02-09 12:30:49 +00:00
func (t *ScanTask) calculateChecksum() (string, error) {
2020-08-06 01:21:14 +00:00
logger.Infof("Calculating checksum for %s...", t.FilePath)
2019-02-09 12:30:49 +00:00
checksum, err := utils.MD5FromFilePath(t.FilePath)
if err != nil {
return "", err
}
logger.Debugf("Checksum calculated: %s", checksum)
return checksum, nil
}
func (t *ScanTask) calculateImageChecksum() (string, error) {
logger.Infof("Calculating checksum for %s...", image.PathDisplayName(t.FilePath))
// uses image.CalculateMD5 to read files in zips
checksum, err := image.CalculateMD5(t.FilePath)
if err != nil {
return "", err
}
logger.Debugf("Checksum calculated: %s", checksum)
return checksum, nil
}
func (t *ScanTask) doesPathExist() bool {
vidExt := config.GetVideoExtensions()
imgExt := config.GetImageExtensions()
gExt := config.GetGalleryExtensions()
if matchExtension(t.FilePath, gExt) {
qb := models.NewGalleryQueryBuilder()
gallery, _ := qb.FindByPath(t.FilePath)
if gallery != nil {
return true
}
} else if matchExtension(t.FilePath, vidExt) {
qb := models.NewSceneQueryBuilder()
scene, _ := qb.FindByPath(t.FilePath)
if scene != nil {
return true
}
} else if matchExtension(t.FilePath, imgExt) {
qb := models.NewImageQueryBuilder()
i, _ := qb.FindByPath(t.FilePath)
if i != nil {
return true
}
}
return false
}
func walkFilesToScan(s *models.StashConfig, f filepath.WalkFunc) error {
vidExt := config.GetVideoExtensions()
imgExt := config.GetImageExtensions()
gExt := config.GetGalleryExtensions()
excludeVidRegex := generateRegexps(config.GetExcludes())
excludeImgRegex := generateRegexps(config.GetImageExcludes())
return utils.SymWalk(s.Path, func(path string, info os.FileInfo, err error) error {
2020-10-13 23:51:36 +00:00
if err != nil {
logger.Warnf("error scanning %s: %s", path, err.Error())
return nil
}
if info.IsDir() {
return nil
}
if !s.ExcludeVideo && matchExtension(path, vidExt) && !matchFileRegex(path, excludeVidRegex) {
return f(path, info, err)
}
if !s.ExcludeImage {
if (matchExtension(path, imgExt) || matchExtension(path, gExt)) && !matchFileRegex(path, excludeImgRegex) {
return f(path, info, err)
}
}
return nil
})
}