stash/internal/manager/task_export.go

1059 lines
29 KiB
Go
Raw Normal View History

2019-02-09 12:30:49 +00:00
package manager
import (
2020-09-15 07:28:53 +00:00
"archive/zip"
"context"
2019-02-09 12:30:49 +00:00
"fmt"
2020-09-15 07:28:53 +00:00
"io"
"os"
"path/filepath"
"runtime"
"sync"
"time"
"github.com/stashapp/stash/internal/manager/config"
"github.com/stashapp/stash/pkg/fsutil"
"github.com/stashapp/stash/pkg/gallery"
"github.com/stashapp/stash/pkg/hash/md5"
"github.com/stashapp/stash/pkg/image"
2019-02-14 23:42:52 +00:00
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/models/jsonschema"
"github.com/stashapp/stash/pkg/models/paths"
2020-09-15 07:28:53 +00:00
"github.com/stashapp/stash/pkg/movie"
"github.com/stashapp/stash/pkg/performer"
"github.com/stashapp/stash/pkg/scene"
"github.com/stashapp/stash/pkg/sliceutil/intslice"
"github.com/stashapp/stash/pkg/sliceutil/stringslice"
2020-09-15 07:28:53 +00:00
"github.com/stashapp/stash/pkg/studio"
"github.com/stashapp/stash/pkg/tag"
2019-02-14 23:42:52 +00:00
"github.com/stashapp/stash/pkg/utils"
2019-02-09 12:30:49 +00:00
)
type ExportTask struct {
txnManager models.TransactionManager
full bool
2020-09-15 07:28:53 +00:00
baseDir string
json jsonUtils
2020-08-06 01:21:14 +00:00
Mappings *jsonschema.Mappings
fileNamingAlgorithm models.HashAlgorithm
2020-09-15 07:28:53 +00:00
scenes *exportSpec
images *exportSpec
2020-09-15 07:28:53 +00:00
performers *exportSpec
movies *exportSpec
tags *exportSpec
studios *exportSpec
galleries *exportSpec
includeDependencies bool
DownloadHash string
}
type exportSpec struct {
IDs []int
all bool
}
func newExportSpec(input *models.ExportObjectTypeInput) *exportSpec {
if input == nil {
return &exportSpec{}
}
ids, _ := stringslice.StringSliceToIntSlice(input.Ids)
2020-09-15 07:28:53 +00:00
ret := &exportSpec{
IDs: ids,
2020-09-15 07:28:53 +00:00
}
if input.All != nil {
ret.all = *input.All
}
return ret
}
func CreateExportTask(a models.HashAlgorithm, input models.ExportObjectsInput) *ExportTask {
includeDeps := false
if input.IncludeDependencies != nil {
includeDeps = *input.IncludeDependencies
}
return &ExportTask{
txnManager: GetInstance().TxnManager,
2020-09-15 07:28:53 +00:00
fileNamingAlgorithm: a,
scenes: newExportSpec(input.Scenes),
images: newExportSpec(input.Images),
2020-09-15 07:28:53 +00:00
performers: newExportSpec(input.Performers),
movies: newExportSpec(input.Movies),
tags: newExportSpec(input.Tags),
studios: newExportSpec(input.Studios),
galleries: newExportSpec(input.Galleries),
includeDependencies: includeDeps,
}
}
2019-02-09 12:30:49 +00:00
func (t *ExportTask) Start(wg *sync.WaitGroup) {
2019-02-10 20:15:36 +00:00
defer wg.Done()
// @manager.total = Scene.count + Gallery.count + Performer.count + Studio.count + Movie.count
workerCount := runtime.GOMAXPROCS(0) // set worker count to number of cpus available
2019-02-09 12:30:49 +00:00
t.Mappings = &jsonschema.Mappings{}
startTime := time.Now()
2019-02-09 12:30:49 +00:00
2020-09-15 07:28:53 +00:00
if t.full {
t.baseDir = config.GetInstance().GetMetadataPath()
2020-09-15 07:28:53 +00:00
} else {
var err error
t.baseDir, err = instance.Paths.Generated.TempDir("export")
if err != nil {
logger.Errorf("error creating temporary directory for export: %s", err.Error())
return
}
2020-09-15 07:28:53 +00:00
defer func() {
err := fsutil.RemoveDir(t.baseDir)
2020-09-15 07:28:53 +00:00
if err != nil {
logger.Errorf("error removing directory %s: %s", t.baseDir, err.Error())
}
}()
}
t.json = jsonUtils{
json: *paths.GetJSONPaths(t.baseDir),
}
2019-02-09 12:30:49 +00:00
2020-09-15 07:28:53 +00:00
paths.EnsureJSONDirs(t.baseDir)
Errcheck phase 1 (#1715) * Avoid redundant logging in migrations Return the error and let the caller handle the logging of the error if needed. While here, defer m.Close() to the function boundary. * Treat errors as values Use %v rather than %s and pass the errors directly. * Generate a wrapped error on stat-failure * Log 3 unchecked errors Rather than ignore errors, log them at the WARNING log level. The server has been functioning without these, so assume they are not at the ERROR level. * Propagate errors upward Failure in path generation was ignored. Propagate the errors upward the call stack, so it can be handled at the level of orchestration. * Warn on errors Log errors rather than quenching them. Errors are logged at the Warn-level for now. * Check error when creating test databases Use the builtin log package and stop the program fatally on error. * Add warnings to uncheck task errors Focus on the task system in a single commit, logging unchecked errors as warnings. * Warn-on-error in API routes Look through the API routes, and make sure errors are being logged if they occur. Prefer the Warn-log-level because none of these has proven to be fatal in the system up until now. * Propagate error when adding Util API * Propagate error on adding util API * Return unhandled error * JS log API: propagate and log errors * JS Plugins: log GQL addition failures. * Warn on failure to write to stdin * Warn on failure to stop task * Wrap viper.BindEnv The current viper code only errors if no name is provided, so it should never fail. Rewrite the code flow to factor through a panic-function. This removes error warnings from this part of the code. * Log errors in concurrency test If we can't initialize the configuration, treat the test as a failure. * Warn on errors in configuration code * Plug an unchecked error in gallery zip walking * Warn on screenshot serving failure * Warn on encoder screenshot failure * Warn on errors in path-handling code * Undo the errcheck on configurations for now. * Use one-line initializers where applicable rather than using err := f() if err!= nil { .. prefer the shorter if err := f(); err != nil { .. If f() isn't too long of a name, or wraps a function with a body.
2021-09-20 23:34:25 +00:00
txnErr := t.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
// include movie scenes and gallery images
if !t.full {
// only include movie scenes if includeDependencies is also set
if !t.scenes.all && t.includeDependencies {
t.populateMovieScenes(r)
}
// always export gallery images
if !t.images.all {
t.populateGalleryImages(r)
}
}
t.ExportScenes(workerCount, r)
t.ExportImages(workerCount, r)
t.ExportGalleries(workerCount, r)
t.ExportMovies(workerCount, r)
t.ExportPerformers(workerCount, r)
t.ExportStudios(workerCount, r)
t.ExportTags(workerCount, r)
if t.full {
t.ExportScrapedItems(r)
}
return nil
})
Errcheck phase 1 (#1715) * Avoid redundant logging in migrations Return the error and let the caller handle the logging of the error if needed. While here, defer m.Close() to the function boundary. * Treat errors as values Use %v rather than %s and pass the errors directly. * Generate a wrapped error on stat-failure * Log 3 unchecked errors Rather than ignore errors, log them at the WARNING log level. The server has been functioning without these, so assume they are not at the ERROR level. * Propagate errors upward Failure in path generation was ignored. Propagate the errors upward the call stack, so it can be handled at the level of orchestration. * Warn on errors Log errors rather than quenching them. Errors are logged at the Warn-level for now. * Check error when creating test databases Use the builtin log package and stop the program fatally on error. * Add warnings to uncheck task errors Focus on the task system in a single commit, logging unchecked errors as warnings. * Warn-on-error in API routes Look through the API routes, and make sure errors are being logged if they occur. Prefer the Warn-log-level because none of these has proven to be fatal in the system up until now. * Propagate error when adding Util API * Propagate error on adding util API * Return unhandled error * JS log API: propagate and log errors * JS Plugins: log GQL addition failures. * Warn on failure to write to stdin * Warn on failure to stop task * Wrap viper.BindEnv The current viper code only errors if no name is provided, so it should never fail. Rewrite the code flow to factor through a panic-function. This removes error warnings from this part of the code. * Log errors in concurrency test If we can't initialize the configuration, treat the test as a failure. * Warn on errors in configuration code * Plug an unchecked error in gallery zip walking * Warn on screenshot serving failure * Warn on encoder screenshot failure * Warn on errors in path-handling code * Undo the errcheck on configurations for now. * Use one-line initializers where applicable rather than using err := f() if err!= nil { .. prefer the shorter if err := f(); err != nil { .. If f() isn't too long of a name, or wraps a function with a body.
2021-09-20 23:34:25 +00:00
if txnErr != nil {
logger.Warnf("error while running export transaction: %v", txnErr)
}
2020-09-15 07:28:53 +00:00
if err := t.json.saveMappings(t.Mappings); err != nil {
2019-02-09 12:30:49 +00:00
logger.Errorf("[mappings] failed to save json: %s", err.Error())
}
if !t.full {
2020-09-15 07:28:53 +00:00
err := t.generateDownload()
if err != nil {
logger.Errorf("error generating download link: %s", err.Error())
return
}
}
logger.Infof("Export complete in %s.", time.Since(startTime))
2019-02-09 12:30:49 +00:00
}
2020-09-15 07:28:53 +00:00
func (t *ExportTask) generateDownload() error {
// zip the files and register a download link
if err := fsutil.EnsureDir(instance.Paths.Generated.Downloads); err != nil {
Lint checks phase 2 (#1747) * Log 3 unchecked errors Rather than ignore errors, log them at the WARNING log level. The server has been functioning without these, so assume they are not at the ERROR level. * Log errors in concurrency test If we can't initialize the configuration, treat the test as a failure. * Undo the errcheck on configurations for now. * Handle unchecked errors in pkg/manager * Resolve unchecked errors * Handle DLNA/DMS unchecked errors * Handle error checking in concurrency test Generalize config initialization, so we can initialize a configuration without writing it to disk. Use this in the test case, since otherwise the test fails to write. * Handle the remaining unchecked errors * Heed gosimple in update test * Use one-line if-initializer statements While here, fix a wrong variable capture error. * testing.T doesn't support %w use %v instead which is supported. * Remove unused query builder functions The Int/String criterion handler functions are now generalized. Thus, there's no need to keep these functions around anymore. * Mark filterBuilder.addRecursiveWith nolint The function is useful in the future and no other refactors are looking nice. Keep the function around, but tell the linter to ignore it. * Remove utils.Btoi There are no users of this utility function * Return error on scan failure If we fail to scan the row when looking for the unique checksum index, then report the error upwards. * Fix comments on exported functions * Fix typos * Fix startup error
2021-09-23 07:15:50 +00:00
return err
}
z, err := os.CreateTemp(instance.Paths.Generated.Downloads, "export*.zip")
2020-09-15 07:28:53 +00:00
if err != nil {
return err
}
defer z.Close()
err = t.zipFiles(z)
if err != nil {
return err
}
t.DownloadHash, err = instance.DownloadStore.RegisterFile(z.Name(), "", false)
if err != nil {
return fmt.Errorf("error registering file for download: %w", err)
}
2020-09-15 07:28:53 +00:00
logger.Debugf("Generated zip file %s with hash %s", z.Name(), t.DownloadHash)
return nil
}
func (t *ExportTask) zipFiles(w io.Writer) error {
z := zip.NewWriter(w)
defer z.Close()
u := jsonUtils{
json: *paths.GetJSONPaths(""),
}
// write the mappings file
err := t.zipFile(t.json.json.MappingsFile, "", z)
if err != nil {
return err
}
Errcheck phase 1 (#1715) * Avoid redundant logging in migrations Return the error and let the caller handle the logging of the error if needed. While here, defer m.Close() to the function boundary. * Treat errors as values Use %v rather than %s and pass the errors directly. * Generate a wrapped error on stat-failure * Log 3 unchecked errors Rather than ignore errors, log them at the WARNING log level. The server has been functioning without these, so assume they are not at the ERROR level. * Propagate errors upward Failure in path generation was ignored. Propagate the errors upward the call stack, so it can be handled at the level of orchestration. * Warn on errors Log errors rather than quenching them. Errors are logged at the Warn-level for now. * Check error when creating test databases Use the builtin log package and stop the program fatally on error. * Add warnings to uncheck task errors Focus on the task system in a single commit, logging unchecked errors as warnings. * Warn-on-error in API routes Look through the API routes, and make sure errors are being logged if they occur. Prefer the Warn-log-level because none of these has proven to be fatal in the system up until now. * Propagate error when adding Util API * Propagate error on adding util API * Return unhandled error * JS log API: propagate and log errors * JS Plugins: log GQL addition failures. * Warn on failure to write to stdin * Warn on failure to stop task * Wrap viper.BindEnv The current viper code only errors if no name is provided, so it should never fail. Rewrite the code flow to factor through a panic-function. This removes error warnings from this part of the code. * Log errors in concurrency test If we can't initialize the configuration, treat the test as a failure. * Warn on errors in configuration code * Plug an unchecked error in gallery zip walking * Warn on screenshot serving failure * Warn on encoder screenshot failure * Warn on errors in path-handling code * Undo the errcheck on configurations for now. * Use one-line initializers where applicable rather than using err := f() if err!= nil { .. prefer the shorter if err := f(); err != nil { .. If f() isn't too long of a name, or wraps a function with a body.
2021-09-20 23:34:25 +00:00
walkWarn(t.json.json.Tags, t.zipWalkFunc(u.json.Tags, z))
walkWarn(t.json.json.Galleries, t.zipWalkFunc(u.json.Galleries, z))
walkWarn(t.json.json.Performers, t.zipWalkFunc(u.json.Performers, z))
walkWarn(t.json.json.Studios, t.zipWalkFunc(u.json.Studios, z))
walkWarn(t.json.json.Movies, t.zipWalkFunc(u.json.Movies, z))
walkWarn(t.json.json.Scenes, t.zipWalkFunc(u.json.Scenes, z))
walkWarn(t.json.json.Images, t.zipWalkFunc(u.json.Images, z))
2020-09-15 07:28:53 +00:00
return nil
}
Errcheck phase 1 (#1715) * Avoid redundant logging in migrations Return the error and let the caller handle the logging of the error if needed. While here, defer m.Close() to the function boundary. * Treat errors as values Use %v rather than %s and pass the errors directly. * Generate a wrapped error on stat-failure * Log 3 unchecked errors Rather than ignore errors, log them at the WARNING log level. The server has been functioning without these, so assume they are not at the ERROR level. * Propagate errors upward Failure in path generation was ignored. Propagate the errors upward the call stack, so it can be handled at the level of orchestration. * Warn on errors Log errors rather than quenching them. Errors are logged at the Warn-level for now. * Check error when creating test databases Use the builtin log package and stop the program fatally on error. * Add warnings to uncheck task errors Focus on the task system in a single commit, logging unchecked errors as warnings. * Warn-on-error in API routes Look through the API routes, and make sure errors are being logged if they occur. Prefer the Warn-log-level because none of these has proven to be fatal in the system up until now. * Propagate error when adding Util API * Propagate error on adding util API * Return unhandled error * JS log API: propagate and log errors * JS Plugins: log GQL addition failures. * Warn on failure to write to stdin * Warn on failure to stop task * Wrap viper.BindEnv The current viper code only errors if no name is provided, so it should never fail. Rewrite the code flow to factor through a panic-function. This removes error warnings from this part of the code. * Log errors in concurrency test If we can't initialize the configuration, treat the test as a failure. * Warn on errors in configuration code * Plug an unchecked error in gallery zip walking * Warn on screenshot serving failure * Warn on encoder screenshot failure * Warn on errors in path-handling code * Undo the errcheck on configurations for now. * Use one-line initializers where applicable rather than using err := f() if err!= nil { .. prefer the shorter if err := f(); err != nil { .. If f() isn't too long of a name, or wraps a function with a body.
2021-09-20 23:34:25 +00:00
// like filepath.Walk but issue a warning on error
func walkWarn(root string, fn filepath.WalkFunc) {
if err := filepath.Walk(root, fn); err != nil {
logger.Warnf("error walking structure %v: %v", root, err)
}
}
2020-09-15 07:28:53 +00:00
func (t *ExportTask) zipWalkFunc(outDir string, z *zip.Writer) filepath.WalkFunc {
return func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
return nil
}
return t.zipFile(path, outDir, z)
}
}
func (t *ExportTask) zipFile(fn, outDir string, z *zip.Writer) error {
bn := filepath.Base(fn)
f, err := z.Create(filepath.Join(outDir, bn))
if err != nil {
return fmt.Errorf("error creating zip entry for %s: %s", fn, err.Error())
}
i, err := os.Open(fn)
if err != nil {
return fmt.Errorf("error opening %s: %s", fn, err.Error())
}
defer i.Close()
if _, err := io.Copy(f, i); err != nil {
return fmt.Errorf("error writing %s to zip: %s", fn, err.Error())
}
return nil
}
func (t *ExportTask) populateMovieScenes(repo models.ReaderRepository) {
reader := repo.Movie()
sceneReader := repo.Scene()
var movies []*models.Movie
var err error
all := t.full || (t.movies != nil && t.movies.all)
if all {
movies, err = reader.All()
} else if t.movies != nil && len(t.movies.IDs) > 0 {
movies, err = reader.FindMany(t.movies.IDs)
}
if err != nil {
logger.Errorf("[movies] failed to fetch movies: %s", err.Error())
}
for _, m := range movies {
scenes, err := sceneReader.FindByMovieID(m.ID)
if err != nil {
logger.Errorf("[movies] <%s> failed to fetch scenes for movie: %s", m.Checksum, err.Error())
continue
}
for _, s := range scenes {
t.scenes.IDs = intslice.IntAppendUnique(t.scenes.IDs, s.ID)
}
}
}
func (t *ExportTask) populateGalleryImages(repo models.ReaderRepository) {
reader := repo.Gallery()
imageReader := repo.Image()
var galleries []*models.Gallery
var err error
all := t.full || (t.galleries != nil && t.galleries.all)
if all {
galleries, err = reader.All()
} else if t.galleries != nil && len(t.galleries.IDs) > 0 {
galleries, err = reader.FindMany(t.galleries.IDs)
}
if err != nil {
logger.Errorf("[galleries] failed to fetch galleries: %s", err.Error())
}
for _, g := range galleries {
images, err := imageReader.FindByGalleryID(g.ID)
if err != nil {
logger.Errorf("[galleries] <%s> failed to fetch images for gallery: %s", g.Checksum, err.Error())
continue
}
for _, i := range images {
t.images.IDs = intslice.IntAppendUnique(t.images.IDs, i.ID)
}
}
}
func (t *ExportTask) ExportScenes(workers int, repo models.ReaderRepository) {
var scenesWg sync.WaitGroup
sceneReader := repo.Scene()
2020-09-15 07:28:53 +00:00
var scenes []*models.Scene
var err error
all := t.full || (t.scenes != nil && t.scenes.all)
if all {
scenes, err = sceneReader.All()
} else if t.scenes != nil && len(t.scenes.IDs) > 0 {
scenes, err = sceneReader.FindMany(t.scenes.IDs)
}
2019-02-09 12:30:49 +00:00
if err != nil {
2020-09-15 07:28:53 +00:00
logger.Errorf("[scenes] failed to fetch scenes: %s", err.Error())
2019-02-09 12:30:49 +00:00
}
jobCh := make(chan *models.Scene, workers*2) // make a buffered channel to feed workers
2019-02-09 12:30:49 +00:00
logger.Info("[scenes] exporting")
startTime := time.Now()
for w := 0; w < workers; w++ { // create export Scene workers
scenesWg.Add(1)
go exportScene(&scenesWg, jobCh, repo, t)
}
2019-02-09 12:30:49 +00:00
for i, scene := range scenes {
index := i + 1
if (i % 100) == 0 { // make progress easier to read
logger.Progressf("[scenes] %d of %d", index, len(scenes))
}
t.Mappings.Scenes = append(t.Mappings.Scenes, jsonschema.PathNameMapping{Path: scene.Path, Checksum: scene.GetHash(t.fileNamingAlgorithm)})
jobCh <- scene // feed workers
}
close(jobCh) // close channel so that workers will know no more jobs are available
scenesWg.Wait()
logger.Infof("[scenes] export complete in %s. %d workers used.", time.Since(startTime), workers)
}
func exportScene(wg *sync.WaitGroup, jobChan <-chan *models.Scene, repo models.ReaderRepository, t *ExportTask) {
2020-09-15 07:28:53 +00:00
defer wg.Done()
sceneReader := repo.Scene()
studioReader := repo.Studio()
movieReader := repo.Movie()
galleryReader := repo.Gallery()
performerReader := repo.Performer()
tagReader := repo.Tag()
sceneMarkerReader := repo.SceneMarker()
2020-09-15 07:28:53 +00:00
for s := range jobChan {
sceneHash := s.GetHash(t.fileNamingAlgorithm)
newSceneJSON, err := scene.ToBasicJSON(sceneReader, s)
if err != nil {
logger.Errorf("[scenes] <%s> error getting scene JSON: %s", sceneHash, err.Error())
continue
}
2019-02-09 12:30:49 +00:00
2020-09-15 07:28:53 +00:00
newSceneJSON.Studio, err = scene.GetStudioName(studioReader, s)
if err != nil {
logger.Errorf("[scenes] <%s> error getting scene studio name: %s", sceneHash, err.Error())
continue
2020-08-06 01:21:14 +00:00
}
2021-02-01 20:56:54 +00:00
galleries, err := galleryReader.FindBySceneID(s.ID)
2020-09-15 07:28:53 +00:00
if err != nil {
2021-02-01 20:56:54 +00:00
logger.Errorf("[scenes] <%s> error getting scene gallery checksums: %s", sceneHash, err.Error())
2020-09-15 07:28:53 +00:00
continue
2020-08-06 01:21:14 +00:00
}
2021-02-01 20:56:54 +00:00
newSceneJSON.Galleries = gallery.GetChecksums(galleries)
2019-02-09 12:30:49 +00:00
2020-09-15 07:28:53 +00:00
performers, err := performerReader.FindBySceneID(s.ID)
if err != nil {
logger.Errorf("[scenes] <%s> error getting scene performer names: %s", sceneHash, err.Error())
continue
2019-02-09 12:30:49 +00:00
}
2020-09-15 07:28:53 +00:00
newSceneJSON.Performers = performer.GetNames(performers)
2019-02-09 12:30:49 +00:00
2020-09-15 07:28:53 +00:00
newSceneJSON.Tags, err = scene.GetTagNames(tagReader, s)
if err != nil {
logger.Errorf("[scenes] <%s> error getting scene tag names: %s", sceneHash, err.Error())
continue
2019-02-09 12:30:49 +00:00
}
2020-09-15 07:28:53 +00:00
newSceneJSON.Markers, err = scene.GetSceneMarkersJSON(sceneMarkerReader, tagReader, s)
if err != nil {
logger.Errorf("[scenes] <%s> error getting scene markers JSON: %s", sceneHash, err.Error())
continue
2019-02-09 12:30:49 +00:00
}
2020-09-15 07:28:53 +00:00
newSceneJSON.Movies, err = scene.GetSceneMoviesJSON(movieReader, sceneReader, s)
2020-09-15 07:28:53 +00:00
if err != nil {
logger.Errorf("[scenes] <%s> error getting scene movies JSON: %s", sceneHash, err.Error())
continue
2019-02-09 12:30:49 +00:00
}
2020-09-15 07:28:53 +00:00
if t.includeDependencies {
if s.StudioID.Valid {
t.studios.IDs = intslice.IntAppendUnique(t.studios.IDs, int(s.StudioID.Int64))
2020-09-15 07:28:53 +00:00
}
2019-02-09 12:30:49 +00:00
t.galleries.IDs = intslice.IntAppendUniques(t.galleries.IDs, gallery.GetIDs(galleries))
2020-08-06 01:21:14 +00:00
tagIDs, err := scene.GetDependentTagIDs(tagReader, sceneMarkerReader, s)
2019-02-09 12:30:49 +00:00
if err != nil {
2020-09-15 07:28:53 +00:00
logger.Errorf("[scenes] <%s> error getting scene tags: %s", sceneHash, err.Error())
2019-02-09 12:30:49 +00:00
continue
}
t.tags.IDs = intslice.IntAppendUniques(t.tags.IDs, tagIDs)
2020-09-15 07:28:53 +00:00
movieIDs, err := scene.GetDependentMovieIDs(sceneReader, s)
2019-02-09 12:30:49 +00:00
if err != nil {
2020-09-15 07:28:53 +00:00
logger.Errorf("[scenes] <%s> error getting scene movies: %s", sceneHash, err.Error())
2019-02-09 12:30:49 +00:00
continue
}
t.movies.IDs = intslice.IntAppendUniques(t.movies.IDs, movieIDs)
t.performers.IDs = intslice.IntAppendUniques(t.performers.IDs, performer.GetIDs(performers))
}
2020-09-15 07:28:53 +00:00
sceneJSON, err := t.json.getScene(sceneHash)
if err == nil && jsonschema.CompareJSON(*sceneJSON, *newSceneJSON) {
continue
}
2020-09-15 07:28:53 +00:00
if err := t.json.saveScene(sceneHash, newSceneJSON); err != nil {
2020-08-06 01:21:14 +00:00
logger.Errorf("[scenes] <%s> failed to save json: %s", sceneHash, err.Error())
2019-02-09 12:30:49 +00:00
}
}
}
func (t *ExportTask) ExportImages(workers int, repo models.ReaderRepository) {
var imagesWg sync.WaitGroup
imageReader := repo.Image()
var images []*models.Image
var err error
all := t.full || (t.images != nil && t.images.all)
if all {
images, err = imageReader.All()
} else if t.images != nil && len(t.images.IDs) > 0 {
images, err = imageReader.FindMany(t.images.IDs)
}
if err != nil {
logger.Errorf("[images] failed to fetch images: %s", err.Error())
}
jobCh := make(chan *models.Image, workers*2) // make a buffered channel to feed workers
logger.Info("[images] exporting")
startTime := time.Now()
for w := 0; w < workers; w++ { // create export Image workers
imagesWg.Add(1)
go exportImage(&imagesWg, jobCh, repo, t)
}
for i, image := range images {
index := i + 1
if (i % 100) == 0 { // make progress easier to read
logger.Progressf("[images] %d of %d", index, len(images))
}
t.Mappings.Images = append(t.Mappings.Images, jsonschema.PathNameMapping{Path: image.Path, Checksum: image.Checksum})
jobCh <- image // feed workers
}
close(jobCh) // close channel so that workers will know no more jobs are available
imagesWg.Wait()
logger.Infof("[images] export complete in %s. %d workers used.", time.Since(startTime), workers)
}
func exportImage(wg *sync.WaitGroup, jobChan <-chan *models.Image, repo models.ReaderRepository, t *ExportTask) {
defer wg.Done()
studioReader := repo.Studio()
galleryReader := repo.Gallery()
performerReader := repo.Performer()
tagReader := repo.Tag()
for s := range jobChan {
imageHash := s.Checksum
newImageJSON := image.ToBasicJSON(s)
var err error
newImageJSON.Studio, err = image.GetStudioName(studioReader, s)
if err != nil {
logger.Errorf("[images] <%s> error getting image studio name: %s", imageHash, err.Error())
continue
}
imageGalleries, err := galleryReader.FindByImageID(s.ID)
if err != nil {
logger.Errorf("[images] <%s> error getting image galleries: %s", imageHash, err.Error())
continue
}
newImageJSON.Galleries = t.getGalleryChecksums(imageGalleries)
performers, err := performerReader.FindByImageID(s.ID)
if err != nil {
logger.Errorf("[images] <%s> error getting image performer names: %s", imageHash, err.Error())
continue
}
newImageJSON.Performers = performer.GetNames(performers)
tags, err := tagReader.FindByImageID(s.ID)
if err != nil {
logger.Errorf("[images] <%s> error getting image tag names: %s", imageHash, err.Error())
continue
}
newImageJSON.Tags = tag.GetNames(tags)
if t.includeDependencies {
if s.StudioID.Valid {
t.studios.IDs = intslice.IntAppendUnique(t.studios.IDs, int(s.StudioID.Int64))
}
t.galleries.IDs = intslice.IntAppendUniques(t.galleries.IDs, gallery.GetIDs(imageGalleries))
t.tags.IDs = intslice.IntAppendUniques(t.tags.IDs, tag.GetIDs(tags))
t.performers.IDs = intslice.IntAppendUniques(t.performers.IDs, performer.GetIDs(performers))
}
imageJSON, err := t.json.getImage(imageHash)
if err == nil && jsonschema.CompareJSON(*imageJSON, *newImageJSON) {
continue
}
if err := t.json.saveImage(imageHash, newImageJSON); err != nil {
logger.Errorf("[images] <%s> failed to save json: %s", imageHash, err.Error())
}
}
}
func (t *ExportTask) getGalleryChecksums(galleries []*models.Gallery) (ret []string) {
for _, g := range galleries {
ret = append(ret, g.Checksum)
}
return
}
func (t *ExportTask) ExportGalleries(workers int, repo models.ReaderRepository) {
var galleriesWg sync.WaitGroup
reader := repo.Gallery()
2020-09-15 07:28:53 +00:00
var galleries []*models.Gallery
var err error
all := t.full || (t.galleries != nil && t.galleries.all)
if all {
galleries, err = reader.All()
} else if t.galleries != nil && len(t.galleries.IDs) > 0 {
galleries, err = reader.FindMany(t.galleries.IDs)
}
2019-02-09 12:30:49 +00:00
if err != nil {
2020-09-15 07:28:53 +00:00
logger.Errorf("[galleries] failed to fetch galleries: %s", err.Error())
2019-02-09 12:30:49 +00:00
}
jobCh := make(chan *models.Gallery, workers*2) // make a buffered channel to feed workers
2019-02-09 12:30:49 +00:00
logger.Info("[galleries] exporting")
startTime := time.Now()
for w := 0; w < workers; w++ { // create export Scene workers
galleriesWg.Add(1)
go exportGallery(&galleriesWg, jobCh, repo, t)
}
2019-02-09 12:30:49 +00:00
for i, gallery := range galleries {
index := i + 1
if (i % 100) == 0 { // make progress easier to read
logger.Progressf("[galleries] %d of %d", index, len(galleries))
}
t.Mappings.Galleries = append(t.Mappings.Galleries, jsonschema.PathNameMapping{
Path: gallery.Path.String,
Name: gallery.Title.String,
Checksum: gallery.Checksum,
})
jobCh <- gallery
2019-02-09 12:30:49 +00:00
}
close(jobCh) // close channel so that workers will know no more jobs are available
galleriesWg.Wait()
logger.Infof("[galleries] export complete in %s. %d workers used.", time.Since(startTime), workers)
}
func exportGallery(wg *sync.WaitGroup, jobChan <-chan *models.Gallery, repo models.ReaderRepository, t *ExportTask) {
defer wg.Done()
studioReader := repo.Studio()
performerReader := repo.Performer()
tagReader := repo.Tag()
for g := range jobChan {
galleryHash := g.Checksum
newGalleryJSON, err := gallery.ToBasicJSON(g)
if err != nil {
logger.Errorf("[galleries] <%s> error getting gallery JSON: %s", galleryHash, err.Error())
continue
}
newGalleryJSON.Studio, err = gallery.GetStudioName(studioReader, g)
if err != nil {
logger.Errorf("[galleries] <%s> error getting gallery studio name: %s", galleryHash, err.Error())
continue
}
performers, err := performerReader.FindByGalleryID(g.ID)
if err != nil {
logger.Errorf("[galleries] <%s> error getting gallery performer names: %s", galleryHash, err.Error())
continue
}
newGalleryJSON.Performers = performer.GetNames(performers)
tags, err := tagReader.FindByGalleryID(g.ID)
if err != nil {
logger.Errorf("[galleries] <%s> error getting gallery tag names: %s", galleryHash, err.Error())
continue
}
newGalleryJSON.Tags = tag.GetNames(tags)
if t.includeDependencies {
if g.StudioID.Valid {
t.studios.IDs = intslice.IntAppendUnique(t.studios.IDs, int(g.StudioID.Int64))
}
t.tags.IDs = intslice.IntAppendUniques(t.tags.IDs, tag.GetIDs(tags))
t.performers.IDs = intslice.IntAppendUniques(t.performers.IDs, performer.GetIDs(performers))
}
galleryJSON, err := t.json.getGallery(galleryHash)
if err == nil && jsonschema.CompareJSON(*galleryJSON, *newGalleryJSON) {
continue
}
if err := t.json.saveGallery(galleryHash, newGalleryJSON); err != nil {
logger.Errorf("[galleries] <%s> failed to save json: %s", galleryHash, err.Error())
}
}
2019-02-09 12:30:49 +00:00
}
func (t *ExportTask) ExportPerformers(workers int, repo models.ReaderRepository) {
var performersWg sync.WaitGroup
reader := repo.Performer()
2020-09-15 07:28:53 +00:00
var performers []*models.Performer
var err error
all := t.full || (t.performers != nil && t.performers.all)
if all {
performers, err = reader.All()
} else if t.performers != nil && len(t.performers.IDs) > 0 {
performers, err = reader.FindMany(t.performers.IDs)
}
2019-02-09 12:30:49 +00:00
if err != nil {
2020-09-15 07:28:53 +00:00
logger.Errorf("[performers] failed to fetch performers: %s", err.Error())
2019-02-09 12:30:49 +00:00
}
jobCh := make(chan *models.Performer, workers*2) // make a buffered channel to feed workers
2019-02-09 12:30:49 +00:00
logger.Info("[performers] exporting")
startTime := time.Now()
for w := 0; w < workers; w++ { // create export Performer workers
performersWg.Add(1)
go t.exportPerformer(&performersWg, jobCh, repo)
}
2019-02-09 12:30:49 +00:00
for i, performer := range performers {
index := i + 1
logger.Progressf("[performers] %d of %d", index, len(performers))
t.Mappings.Performers = append(t.Mappings.Performers, jsonschema.PathNameMapping{Name: performer.Name.String, Checksum: performer.Checksum})
jobCh <- performer // feed workers
}
2019-02-09 12:30:49 +00:00
close(jobCh) // close channel so workers will know that no more jobs are available
performersWg.Wait()
logger.Infof("[performers] export complete in %s. %d workers used.", time.Since(startTime), workers)
}
func (t *ExportTask) exportPerformer(wg *sync.WaitGroup, jobChan <-chan *models.Performer, repo models.ReaderRepository) {
defer wg.Done()
performerReader := repo.Performer()
2019-02-09 12:30:49 +00:00
2020-09-15 07:28:53 +00:00
for p := range jobChan {
newPerformerJSON, err := performer.ToJSON(performerReader, p)
2019-02-09 12:30:49 +00:00
if err != nil {
2020-09-15 07:28:53 +00:00
logger.Errorf("[performers] <%s> error getting performer JSON: %s", p.Checksum, err.Error())
continue
}
tags, err := repo.Tag().FindByPerformerID(p.ID)
if err != nil {
logger.Errorf("[performers] <%s> error getting performer tags: %s", p.Checksum, err.Error())
continue
}
newPerformerJSON.Tags = tag.GetNames(tags)
if t.includeDependencies {
t.tags.IDs = intslice.IntAppendUniques(t.tags.IDs, tag.GetIDs(tags))
}
2020-09-15 07:28:53 +00:00
performerJSON, err := t.json.getPerformer(p.Checksum)
2019-02-09 12:30:49 +00:00
if err != nil {
2019-02-11 14:41:59 +00:00
logger.Debugf("[performers] error reading performer json: %s", err.Error())
2020-09-15 07:28:53 +00:00
} else if jsonschema.CompareJSON(*performerJSON, *newPerformerJSON) {
2019-02-09 12:30:49 +00:00
continue
}
2020-09-15 07:28:53 +00:00
if err := t.json.savePerformer(p.Checksum, newPerformerJSON); err != nil {
logger.Errorf("[performers] <%s> failed to save json: %s", p.Checksum, err.Error())
2019-02-09 12:30:49 +00:00
}
}
}
func (t *ExportTask) ExportStudios(workers int, repo models.ReaderRepository) {
var studiosWg sync.WaitGroup
reader := repo.Studio()
2020-09-15 07:28:53 +00:00
var studios []*models.Studio
var err error
all := t.full || (t.studios != nil && t.studios.all)
if all {
studios, err = reader.All()
} else if t.studios != nil && len(t.studios.IDs) > 0 {
studios, err = reader.FindMany(t.studios.IDs)
}
2019-02-09 12:30:49 +00:00
if err != nil {
2020-09-15 07:28:53 +00:00
logger.Errorf("[studios] failed to fetch studios: %s", err.Error())
2019-02-09 12:30:49 +00:00
}
logger.Info("[studios] exporting")
startTime := time.Now()
jobCh := make(chan *models.Studio, workers*2) // make a buffered channel to feed workers
for w := 0; w < workers; w++ { // create export Studio workers
studiosWg.Add(1)
go t.exportStudio(&studiosWg, jobCh, repo)
}
2019-02-09 12:30:49 +00:00
for i, studio := range studios {
index := i + 1
logger.Progressf("[studios] %d of %d", index, len(studios))
t.Mappings.Studios = append(t.Mappings.Studios, jsonschema.PathNameMapping{Name: studio.Name.String, Checksum: studio.Checksum})
jobCh <- studio // feed workers
}
close(jobCh)
studiosWg.Wait()
logger.Infof("[studios] export complete in %s. %d workers used.", time.Since(startTime), workers)
}
func (t *ExportTask) exportStudio(wg *sync.WaitGroup, jobChan <-chan *models.Studio, repo models.ReaderRepository) {
defer wg.Done()
studioReader := repo.Studio()
2020-09-15 07:28:53 +00:00
for s := range jobChan {
newStudioJSON, err := studio.ToJSON(studioReader, s)
2019-02-09 12:30:49 +00:00
if err != nil {
2020-09-15 07:28:53 +00:00
logger.Errorf("[studios] <%s> error getting studio JSON: %s", s.Checksum, err.Error())
continue
}
2020-09-15 07:28:53 +00:00
studioJSON, err := t.json.getStudio(s.Checksum)
if err == nil && jsonschema.CompareJSON(*studioJSON, *newStudioJSON) {
2019-02-09 12:30:49 +00:00
continue
}
2020-09-15 07:28:53 +00:00
if err := t.json.saveStudio(s.Checksum, newStudioJSON); err != nil {
logger.Errorf("[studios] <%s> failed to save json: %s", s.Checksum, err.Error())
2019-02-09 12:30:49 +00:00
}
}
}
func (t *ExportTask) ExportTags(workers int, repo models.ReaderRepository) {
var tagsWg sync.WaitGroup
reader := repo.Tag()
2020-09-15 07:28:53 +00:00
var tags []*models.Tag
var err error
all := t.full || (t.tags != nil && t.tags.all)
if all {
tags, err = reader.All()
} else if t.tags != nil && len(t.tags.IDs) > 0 {
tags, err = reader.FindMany(t.tags.IDs)
}
if err != nil {
2020-09-15 07:28:53 +00:00
logger.Errorf("[tags] failed to fetch tags: %s", err.Error())
}
logger.Info("[tags] exporting")
startTime := time.Now()
jobCh := make(chan *models.Tag, workers*2) // make a buffered channel to feed workers
for w := 0; w < workers; w++ { // create export Tag workers
tagsWg.Add(1)
go t.exportTag(&tagsWg, jobCh, repo)
}
for i, tag := range tags {
index := i + 1
logger.Progressf("[tags] %d of %d", index, len(tags))
// generate checksum on the fly by name, since we don't store it
checksum := md5.FromString(tag.Name)
t.Mappings.Tags = append(t.Mappings.Tags, jsonschema.PathNameMapping{Name: tag.Name, Checksum: checksum})
jobCh <- tag // feed workers
}
close(jobCh)
tagsWg.Wait()
logger.Infof("[tags] export complete in %s. %d workers used.", time.Since(startTime), workers)
}
func (t *ExportTask) exportTag(wg *sync.WaitGroup, jobChan <-chan *models.Tag, repo models.ReaderRepository) {
defer wg.Done()
tagReader := repo.Tag()
2020-09-15 07:28:53 +00:00
for thisTag := range jobChan {
newTagJSON, err := tag.ToJSON(tagReader, thisTag)
if err != nil {
2020-09-15 07:28:53 +00:00
logger.Errorf("[tags] <%s> error getting tag JSON: %s", thisTag.Name, err.Error())
continue
}
// generate checksum on the fly by name, since we don't store it
checksum := md5.FromString(thisTag.Name)
2020-09-15 07:28:53 +00:00
tagJSON, err := t.json.getTag(checksum)
if err == nil && jsonschema.CompareJSON(*tagJSON, *newTagJSON) {
continue
}
2020-09-15 07:28:53 +00:00
if err := t.json.saveTag(checksum, newTagJSON); err != nil {
logger.Errorf("[tags] <%s> failed to save json: %s", checksum, err.Error())
}
}
}
func (t *ExportTask) ExportMovies(workers int, repo models.ReaderRepository) {
var moviesWg sync.WaitGroup
reader := repo.Movie()
2020-09-15 07:28:53 +00:00
var movies []*models.Movie
var err error
all := t.full || (t.movies != nil && t.movies.all)
if all {
movies, err = reader.All()
} else if t.movies != nil && len(t.movies.IDs) > 0 {
movies, err = reader.FindMany(t.movies.IDs)
}
if err != nil {
2020-09-15 07:28:53 +00:00
logger.Errorf("[movies] failed to fetch movies: %s", err.Error())
}
logger.Info("[movies] exporting")
startTime := time.Now()
jobCh := make(chan *models.Movie, workers*2) // make a buffered channel to feed workers
for w := 0; w < workers; w++ { // create export Studio workers
moviesWg.Add(1)
go t.exportMovie(&moviesWg, jobCh, repo)
}
for i, movie := range movies {
index := i + 1
logger.Progressf("[movies] %d of %d", index, len(movies))
t.Mappings.Movies = append(t.Mappings.Movies, jsonschema.PathNameMapping{Name: movie.Name.String, Checksum: movie.Checksum})
jobCh <- movie // feed workers
}
close(jobCh)
moviesWg.Wait()
logger.Infof("[movies] export complete in %s. %d workers used.", time.Since(startTime), workers)
}
func (t *ExportTask) exportMovie(wg *sync.WaitGroup, jobChan <-chan *models.Movie, repo models.ReaderRepository) {
defer wg.Done()
movieReader := repo.Movie()
studioReader := repo.Studio()
2020-09-15 07:28:53 +00:00
for m := range jobChan {
newMovieJSON, err := movie.ToJSON(movieReader, studioReader, m)
if err != nil {
2020-09-15 07:28:53 +00:00
logger.Errorf("[movies] <%s> error getting tag JSON: %s", m.Checksum, err.Error())
continue
}
if t.includeDependencies {
if m.StudioID.Valid {
t.studios.IDs = intslice.IntAppendUnique(t.studios.IDs, int(m.StudioID.Int64))
}
}
2020-09-15 07:28:53 +00:00
movieJSON, err := t.json.getMovie(m.Checksum)
if err != nil {
logger.Debugf("[movies] error reading movie json: %s", err.Error())
2020-09-15 07:28:53 +00:00
} else if jsonschema.CompareJSON(*movieJSON, *newMovieJSON) {
continue
}
2020-09-15 07:28:53 +00:00
if err := t.json.saveMovie(m.Checksum, newMovieJSON); err != nil {
logger.Errorf("[movies] <%s> failed to save json: %s", m.Checksum, err.Error())
}
}
}
func (t *ExportTask) ExportScrapedItems(repo models.ReaderRepository) {
qb := repo.ScrapedItem()
sqb := repo.Studio()
2019-02-09 12:30:49 +00:00
scrapedItems, err := qb.All()
if err != nil {
logger.Errorf("[scraped sites] failed to fetch all items: %s", err.Error())
}
logger.Info("[scraped sites] exporting")
2020-09-15 07:28:53 +00:00
scraped := []jsonschema.ScrapedItem{}
2019-02-09 12:30:49 +00:00
for i, scrapedItem := range scrapedItems {
index := i + 1
logger.Progressf("[scraped sites] %d of %d", index, len(scrapedItems))
var studioName string
if scrapedItem.StudioID.Valid {
studio, _ := sqb.Find(int(scrapedItem.StudioID.Int64))
2019-02-09 12:30:49 +00:00
if studio != nil {
studioName = studio.Name.String
}
}
newScrapedItemJSON := jsonschema.ScrapedItem{}
if scrapedItem.Title.Valid {
newScrapedItemJSON.Title = scrapedItem.Title.String
}
if scrapedItem.Description.Valid {
newScrapedItemJSON.Description = scrapedItem.Description.String
}
if scrapedItem.URL.Valid {
newScrapedItemJSON.URL = scrapedItem.URL.String
2019-02-09 12:30:49 +00:00
}
if scrapedItem.Date.Valid {
newScrapedItemJSON.Date = utils.GetYMDFromDatabaseDate(scrapedItem.Date.String)
}
if scrapedItem.Rating.Valid {
newScrapedItemJSON.Rating = scrapedItem.Rating.String
}
if scrapedItem.Tags.Valid {
newScrapedItemJSON.Tags = scrapedItem.Tags.String
}
if scrapedItem.Models.Valid {
newScrapedItemJSON.Models = scrapedItem.Models.String
}
if scrapedItem.Episode.Valid {
newScrapedItemJSON.Episode = int(scrapedItem.Episode.Int64)
}
if scrapedItem.GalleryFilename.Valid {
newScrapedItemJSON.GalleryFilename = scrapedItem.GalleryFilename.String
}
if scrapedItem.GalleryURL.Valid {
newScrapedItemJSON.GalleryURL = scrapedItem.GalleryURL.String
2019-02-09 12:30:49 +00:00
}
if scrapedItem.VideoFilename.Valid {
newScrapedItemJSON.VideoFilename = scrapedItem.VideoFilename.String
}
if scrapedItem.VideoURL.Valid {
newScrapedItemJSON.VideoURL = scrapedItem.VideoURL.String
2019-02-09 12:30:49 +00:00
}
newScrapedItemJSON.Studio = studioName
updatedAt := models.JSONTime{Time: scrapedItem.UpdatedAt.Timestamp} // TODO keeping ruby format
2019-02-09 12:30:49 +00:00
newScrapedItemJSON.UpdatedAt = updatedAt
2020-09-15 07:28:53 +00:00
scraped = append(scraped, newScrapedItemJSON)
2019-02-09 12:30:49 +00:00
}
2020-09-15 07:28:53 +00:00
scrapedJSON, err := t.json.getScraped()
2019-02-09 12:30:49 +00:00
if err != nil {
2019-02-11 14:41:59 +00:00
logger.Debugf("[scraped sites] error reading json: %s", err.Error())
2019-02-09 12:30:49 +00:00
}
2020-09-15 07:28:53 +00:00
if !jsonschema.CompareJSON(scrapedJSON, scraped) {
if err := t.json.saveScaped(scraped); err != nil {
2019-02-09 12:30:49 +00:00
logger.Errorf("[scraped sites] failed to save json: %s", err.Error())
}
}
logger.Infof("[scraped sites] export complete")
}