stash/pkg/manager/task_export.go

1052 lines
29 KiB
Go

package manager
import (
"archive/zip"
"context"
"fmt"
"io"
"os"
"path/filepath"
"runtime"
"sync"
"time"
"github.com/stashapp/stash/pkg/gallery"
"github.com/stashapp/stash/pkg/image"
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/manager/config"
"github.com/stashapp/stash/pkg/manager/jsonschema"
"github.com/stashapp/stash/pkg/manager/paths"
"github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/movie"
"github.com/stashapp/stash/pkg/performer"
"github.com/stashapp/stash/pkg/scene"
"github.com/stashapp/stash/pkg/studio"
"github.com/stashapp/stash/pkg/tag"
"github.com/stashapp/stash/pkg/utils"
)
type ExportTask struct {
txnManager models.TransactionManager
full bool
baseDir string
json jsonUtils
Mappings *jsonschema.Mappings
fileNamingAlgorithm models.HashAlgorithm
scenes *exportSpec
images *exportSpec
performers *exportSpec
movies *exportSpec
tags *exportSpec
studios *exportSpec
galleries *exportSpec
includeDependencies bool
DownloadHash string
}
type exportSpec struct {
IDs []int
all bool
}
func newExportSpec(input *models.ExportObjectTypeInput) *exportSpec {
if input == nil {
return &exportSpec{}
}
ids, _ := utils.StringSliceToIntSlice(input.Ids)
ret := &exportSpec{
IDs: ids,
}
if input.All != nil {
ret.all = *input.All
}
return ret
}
func CreateExportTask(a models.HashAlgorithm, input models.ExportObjectsInput) *ExportTask {
includeDeps := false
if input.IncludeDependencies != nil {
includeDeps = *input.IncludeDependencies
}
return &ExportTask{
txnManager: GetInstance().TxnManager,
fileNamingAlgorithm: a,
scenes: newExportSpec(input.Scenes),
images: newExportSpec(input.Images),
performers: newExportSpec(input.Performers),
movies: newExportSpec(input.Movies),
tags: newExportSpec(input.Tags),
studios: newExportSpec(input.Studios),
galleries: newExportSpec(input.Galleries),
includeDependencies: includeDeps,
}
}
func (t *ExportTask) Start(wg *sync.WaitGroup) {
defer wg.Done()
// @manager.total = Scene.count + Gallery.count + Performer.count + Studio.count + Movie.count
workerCount := runtime.GOMAXPROCS(0) // set worker count to number of cpus available
t.Mappings = &jsonschema.Mappings{}
startTime := time.Now()
if t.full {
t.baseDir = config.GetInstance().GetMetadataPath()
} else {
var err error
t.baseDir, err = instance.Paths.Generated.TempDir("export")
if err != nil {
logger.Errorf("error creating temporary directory for export: %s", err.Error())
return
}
defer func() {
err := utils.RemoveDir(t.baseDir)
if err != nil {
logger.Errorf("error removing directory %s: %s", t.baseDir, err.Error())
}
}()
}
t.json = jsonUtils{
json: *paths.GetJSONPaths(t.baseDir),
}
paths.EnsureJSONDirs(t.baseDir)
txnErr := t.txnManager.WithReadTxn(context.TODO(), func(r models.ReaderRepository) error {
// include movie scenes and gallery images
if !t.full {
// only include movie scenes if includeDependencies is also set
if !t.scenes.all && t.includeDependencies {
t.populateMovieScenes(r)
}
// always export gallery images
if !t.images.all {
t.populateGalleryImages(r)
}
}
t.ExportScenes(workerCount, r)
t.ExportImages(workerCount, r)
t.ExportGalleries(workerCount, r)
t.ExportMovies(workerCount, r)
t.ExportPerformers(workerCount, r)
t.ExportStudios(workerCount, r)
t.ExportTags(workerCount, r)
if t.full {
t.ExportScrapedItems(r)
}
return nil
})
if txnErr != nil {
logger.Warnf("error while running export transaction: %v", txnErr)
}
if err := t.json.saveMappings(t.Mappings); err != nil {
logger.Errorf("[mappings] failed to save json: %s", err.Error())
}
if !t.full {
err := t.generateDownload()
if err != nil {
logger.Errorf("error generating download link: %s", err.Error())
return
}
}
logger.Infof("Export complete in %s.", time.Since(startTime))
}
func (t *ExportTask) generateDownload() error {
// zip the files and register a download link
if err := utils.EnsureDir(instance.Paths.Generated.Downloads); err != nil {
return err
}
z, err := os.CreateTemp(instance.Paths.Generated.Downloads, "export*.zip")
if err != nil {
return err
}
defer z.Close()
err = t.zipFiles(z)
if err != nil {
return err
}
t.DownloadHash = instance.DownloadStore.RegisterFile(z.Name(), "", false)
logger.Debugf("Generated zip file %s with hash %s", z.Name(), t.DownloadHash)
return nil
}
func (t *ExportTask) zipFiles(w io.Writer) error {
z := zip.NewWriter(w)
defer z.Close()
u := jsonUtils{
json: *paths.GetJSONPaths(""),
}
// write the mappings file
err := t.zipFile(t.json.json.MappingsFile, "", z)
if err != nil {
return err
}
walkWarn(t.json.json.Tags, t.zipWalkFunc(u.json.Tags, z))
walkWarn(t.json.json.Galleries, t.zipWalkFunc(u.json.Galleries, z))
walkWarn(t.json.json.Performers, t.zipWalkFunc(u.json.Performers, z))
walkWarn(t.json.json.Studios, t.zipWalkFunc(u.json.Studios, z))
walkWarn(t.json.json.Movies, t.zipWalkFunc(u.json.Movies, z))
walkWarn(t.json.json.Scenes, t.zipWalkFunc(u.json.Scenes, z))
walkWarn(t.json.json.Images, t.zipWalkFunc(u.json.Images, z))
return nil
}
// like filepath.Walk but issue a warning on error
func walkWarn(root string, fn filepath.WalkFunc) {
if err := filepath.Walk(root, fn); err != nil {
logger.Warnf("error walking structure %v: %v", root, err)
}
}
func (t *ExportTask) zipWalkFunc(outDir string, z *zip.Writer) filepath.WalkFunc {
return func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
return nil
}
return t.zipFile(path, outDir, z)
}
}
func (t *ExportTask) zipFile(fn, outDir string, z *zip.Writer) error {
bn := filepath.Base(fn)
f, err := z.Create(filepath.Join(outDir, bn))
if err != nil {
return fmt.Errorf("error creating zip entry for %s: %s", fn, err.Error())
}
i, err := os.Open(fn)
if err != nil {
return fmt.Errorf("error opening %s: %s", fn, err.Error())
}
defer i.Close()
if _, err := io.Copy(f, i); err != nil {
return fmt.Errorf("error writing %s to zip: %s", fn, err.Error())
}
return nil
}
func (t *ExportTask) populateMovieScenes(repo models.ReaderRepository) {
reader := repo.Movie()
sceneReader := repo.Scene()
var movies []*models.Movie
var err error
all := t.full || (t.movies != nil && t.movies.all)
if all {
movies, err = reader.All()
} else if t.movies != nil && len(t.movies.IDs) > 0 {
movies, err = reader.FindMany(t.movies.IDs)
}
if err != nil {
logger.Errorf("[movies] failed to fetch movies: %s", err.Error())
}
for _, m := range movies {
scenes, err := sceneReader.FindByMovieID(m.ID)
if err != nil {
logger.Errorf("[movies] <%s> failed to fetch scenes for movie: %s", m.Checksum, err.Error())
continue
}
for _, s := range scenes {
t.scenes.IDs = utils.IntAppendUnique(t.scenes.IDs, s.ID)
}
}
}
func (t *ExportTask) populateGalleryImages(repo models.ReaderRepository) {
reader := repo.Gallery()
imageReader := repo.Image()
var galleries []*models.Gallery
var err error
all := t.full || (t.galleries != nil && t.galleries.all)
if all {
galleries, err = reader.All()
} else if t.galleries != nil && len(t.galleries.IDs) > 0 {
galleries, err = reader.FindMany(t.galleries.IDs)
}
if err != nil {
logger.Errorf("[galleries] failed to fetch galleries: %s", err.Error())
}
for _, g := range galleries {
images, err := imageReader.FindByGalleryID(g.ID)
if err != nil {
logger.Errorf("[galleries] <%s> failed to fetch images for gallery: %s", g.Checksum, err.Error())
continue
}
for _, i := range images {
t.images.IDs = utils.IntAppendUnique(t.images.IDs, i.ID)
}
}
}
func (t *ExportTask) ExportScenes(workers int, repo models.ReaderRepository) {
var scenesWg sync.WaitGroup
sceneReader := repo.Scene()
var scenes []*models.Scene
var err error
all := t.full || (t.scenes != nil && t.scenes.all)
if all {
scenes, err = sceneReader.All()
} else if t.scenes != nil && len(t.scenes.IDs) > 0 {
scenes, err = sceneReader.FindMany(t.scenes.IDs)
}
if err != nil {
logger.Errorf("[scenes] failed to fetch scenes: %s", err.Error())
}
jobCh := make(chan *models.Scene, workers*2) // make a buffered channel to feed workers
logger.Info("[scenes] exporting")
startTime := time.Now()
for w := 0; w < workers; w++ { // create export Scene workers
scenesWg.Add(1)
go exportScene(&scenesWg, jobCh, repo, t)
}
for i, scene := range scenes {
index := i + 1
if (i % 100) == 0 { // make progress easier to read
logger.Progressf("[scenes] %d of %d", index, len(scenes))
}
t.Mappings.Scenes = append(t.Mappings.Scenes, jsonschema.PathNameMapping{Path: scene.Path, Checksum: scene.GetHash(t.fileNamingAlgorithm)})
jobCh <- scene // feed workers
}
close(jobCh) // close channel so that workers will know no more jobs are available
scenesWg.Wait()
logger.Infof("[scenes] export complete in %s. %d workers used.", time.Since(startTime), workers)
}
func exportScene(wg *sync.WaitGroup, jobChan <-chan *models.Scene, repo models.ReaderRepository, t *ExportTask) {
defer wg.Done()
sceneReader := repo.Scene()
studioReader := repo.Studio()
movieReader := repo.Movie()
galleryReader := repo.Gallery()
performerReader := repo.Performer()
tagReader := repo.Tag()
sceneMarkerReader := repo.SceneMarker()
for s := range jobChan {
sceneHash := s.GetHash(t.fileNamingAlgorithm)
newSceneJSON, err := scene.ToBasicJSON(sceneReader, s)
if err != nil {
logger.Errorf("[scenes] <%s> error getting scene JSON: %s", sceneHash, err.Error())
continue
}
newSceneJSON.Studio, err = scene.GetStudioName(studioReader, s)
if err != nil {
logger.Errorf("[scenes] <%s> error getting scene studio name: %s", sceneHash, err.Error())
continue
}
galleries, err := galleryReader.FindBySceneID(s.ID)
if err != nil {
logger.Errorf("[scenes] <%s> error getting scene gallery checksums: %s", sceneHash, err.Error())
continue
}
newSceneJSON.Galleries = gallery.GetChecksums(galleries)
performers, err := performerReader.FindBySceneID(s.ID)
if err != nil {
logger.Errorf("[scenes] <%s> error getting scene performer names: %s", sceneHash, err.Error())
continue
}
newSceneJSON.Performers = performer.GetNames(performers)
newSceneJSON.Tags, err = scene.GetTagNames(tagReader, s)
if err != nil {
logger.Errorf("[scenes] <%s> error getting scene tag names: %s", sceneHash, err.Error())
continue
}
newSceneJSON.Markers, err = scene.GetSceneMarkersJSON(sceneMarkerReader, tagReader, s)
if err != nil {
logger.Errorf("[scenes] <%s> error getting scene markers JSON: %s", sceneHash, err.Error())
continue
}
newSceneJSON.Movies, err = scene.GetSceneMoviesJSON(movieReader, sceneReader, s)
if err != nil {
logger.Errorf("[scenes] <%s> error getting scene movies JSON: %s", sceneHash, err.Error())
continue
}
if t.includeDependencies {
if s.StudioID.Valid {
t.studios.IDs = utils.IntAppendUnique(t.studios.IDs, int(s.StudioID.Int64))
}
t.galleries.IDs = utils.IntAppendUniques(t.galleries.IDs, gallery.GetIDs(galleries))
tagIDs, err := scene.GetDependentTagIDs(tagReader, sceneMarkerReader, s)
if err != nil {
logger.Errorf("[scenes] <%s> error getting scene tags: %s", sceneHash, err.Error())
continue
}
t.tags.IDs = utils.IntAppendUniques(t.tags.IDs, tagIDs)
movieIDs, err := scene.GetDependentMovieIDs(sceneReader, s)
if err != nil {
logger.Errorf("[scenes] <%s> error getting scene movies: %s", sceneHash, err.Error())
continue
}
t.movies.IDs = utils.IntAppendUniques(t.movies.IDs, movieIDs)
t.performers.IDs = utils.IntAppendUniques(t.performers.IDs, performer.GetIDs(performers))
}
sceneJSON, err := t.json.getScene(sceneHash)
if err == nil && jsonschema.CompareJSON(*sceneJSON, *newSceneJSON) {
continue
}
if err := t.json.saveScene(sceneHash, newSceneJSON); err != nil {
logger.Errorf("[scenes] <%s> failed to save json: %s", sceneHash, err.Error())
}
}
}
func (t *ExportTask) ExportImages(workers int, repo models.ReaderRepository) {
var imagesWg sync.WaitGroup
imageReader := repo.Image()
var images []*models.Image
var err error
all := t.full || (t.images != nil && t.images.all)
if all {
images, err = imageReader.All()
} else if t.images != nil && len(t.images.IDs) > 0 {
images, err = imageReader.FindMany(t.images.IDs)
}
if err != nil {
logger.Errorf("[images] failed to fetch images: %s", err.Error())
}
jobCh := make(chan *models.Image, workers*2) // make a buffered channel to feed workers
logger.Info("[images] exporting")
startTime := time.Now()
for w := 0; w < workers; w++ { // create export Image workers
imagesWg.Add(1)
go exportImage(&imagesWg, jobCh, repo, t)
}
for i, image := range images {
index := i + 1
if (i % 100) == 0 { // make progress easier to read
logger.Progressf("[images] %d of %d", index, len(images))
}
t.Mappings.Images = append(t.Mappings.Images, jsonschema.PathNameMapping{Path: image.Path, Checksum: image.Checksum})
jobCh <- image // feed workers
}
close(jobCh) // close channel so that workers will know no more jobs are available
imagesWg.Wait()
logger.Infof("[images] export complete in %s. %d workers used.", time.Since(startTime), workers)
}
func exportImage(wg *sync.WaitGroup, jobChan <-chan *models.Image, repo models.ReaderRepository, t *ExportTask) {
defer wg.Done()
studioReader := repo.Studio()
galleryReader := repo.Gallery()
performerReader := repo.Performer()
tagReader := repo.Tag()
for s := range jobChan {
imageHash := s.Checksum
newImageJSON := image.ToBasicJSON(s)
var err error
newImageJSON.Studio, err = image.GetStudioName(studioReader, s)
if err != nil {
logger.Errorf("[images] <%s> error getting image studio name: %s", imageHash, err.Error())
continue
}
imageGalleries, err := galleryReader.FindByImageID(s.ID)
if err != nil {
logger.Errorf("[images] <%s> error getting image galleries: %s", imageHash, err.Error())
continue
}
newImageJSON.Galleries = t.getGalleryChecksums(imageGalleries)
performers, err := performerReader.FindByImageID(s.ID)
if err != nil {
logger.Errorf("[images] <%s> error getting image performer names: %s", imageHash, err.Error())
continue
}
newImageJSON.Performers = performer.GetNames(performers)
tags, err := tagReader.FindByImageID(s.ID)
if err != nil {
logger.Errorf("[images] <%s> error getting image tag names: %s", imageHash, err.Error())
continue
}
newImageJSON.Tags = tag.GetNames(tags)
if t.includeDependencies {
if s.StudioID.Valid {
t.studios.IDs = utils.IntAppendUnique(t.studios.IDs, int(s.StudioID.Int64))
}
t.galleries.IDs = utils.IntAppendUniques(t.galleries.IDs, gallery.GetIDs(imageGalleries))
t.tags.IDs = utils.IntAppendUniques(t.tags.IDs, tag.GetIDs(tags))
t.performers.IDs = utils.IntAppendUniques(t.performers.IDs, performer.GetIDs(performers))
}
imageJSON, err := t.json.getImage(imageHash)
if err == nil && jsonschema.CompareJSON(*imageJSON, *newImageJSON) {
continue
}
if err := t.json.saveImage(imageHash, newImageJSON); err != nil {
logger.Errorf("[images] <%s> failed to save json: %s", imageHash, err.Error())
}
}
}
func (t *ExportTask) getGalleryChecksums(galleries []*models.Gallery) (ret []string) {
for _, g := range galleries {
ret = append(ret, g.Checksum)
}
return
}
func (t *ExportTask) ExportGalleries(workers int, repo models.ReaderRepository) {
var galleriesWg sync.WaitGroup
reader := repo.Gallery()
var galleries []*models.Gallery
var err error
all := t.full || (t.galleries != nil && t.galleries.all)
if all {
galleries, err = reader.All()
} else if t.galleries != nil && len(t.galleries.IDs) > 0 {
galleries, err = reader.FindMany(t.galleries.IDs)
}
if err != nil {
logger.Errorf("[galleries] failed to fetch galleries: %s", err.Error())
}
jobCh := make(chan *models.Gallery, workers*2) // make a buffered channel to feed workers
logger.Info("[galleries] exporting")
startTime := time.Now()
for w := 0; w < workers; w++ { // create export Scene workers
galleriesWg.Add(1)
go exportGallery(&galleriesWg, jobCh, repo, t)
}
for i, gallery := range galleries {
index := i + 1
if (i % 100) == 0 { // make progress easier to read
logger.Progressf("[galleries] %d of %d", index, len(galleries))
}
t.Mappings.Galleries = append(t.Mappings.Galleries, jsonschema.PathNameMapping{
Path: gallery.Path.String,
Name: gallery.Title.String,
Checksum: gallery.Checksum,
})
jobCh <- gallery
}
close(jobCh) // close channel so that workers will know no more jobs are available
galleriesWg.Wait()
logger.Infof("[galleries] export complete in %s. %d workers used.", time.Since(startTime), workers)
}
func exportGallery(wg *sync.WaitGroup, jobChan <-chan *models.Gallery, repo models.ReaderRepository, t *ExportTask) {
defer wg.Done()
studioReader := repo.Studio()
performerReader := repo.Performer()
tagReader := repo.Tag()
for g := range jobChan {
galleryHash := g.Checksum
newGalleryJSON, err := gallery.ToBasicJSON(g)
if err != nil {
logger.Errorf("[galleries] <%s> error getting gallery JSON: %s", galleryHash, err.Error())
continue
}
newGalleryJSON.Studio, err = gallery.GetStudioName(studioReader, g)
if err != nil {
logger.Errorf("[galleries] <%s> error getting gallery studio name: %s", galleryHash, err.Error())
continue
}
performers, err := performerReader.FindByGalleryID(g.ID)
if err != nil {
logger.Errorf("[galleries] <%s> error getting gallery performer names: %s", galleryHash, err.Error())
continue
}
newGalleryJSON.Performers = performer.GetNames(performers)
tags, err := tagReader.FindByGalleryID(g.ID)
if err != nil {
logger.Errorf("[galleries] <%s> error getting gallery tag names: %s", galleryHash, err.Error())
continue
}
newGalleryJSON.Tags = tag.GetNames(tags)
if t.includeDependencies {
if g.StudioID.Valid {
t.studios.IDs = utils.IntAppendUnique(t.studios.IDs, int(g.StudioID.Int64))
}
t.tags.IDs = utils.IntAppendUniques(t.tags.IDs, tag.GetIDs(tags))
t.performers.IDs = utils.IntAppendUniques(t.performers.IDs, performer.GetIDs(performers))
}
galleryJSON, err := t.json.getGallery(galleryHash)
if err == nil && jsonschema.CompareJSON(*galleryJSON, *newGalleryJSON) {
continue
}
if err := t.json.saveGallery(galleryHash, newGalleryJSON); err != nil {
logger.Errorf("[galleries] <%s> failed to save json: %s", galleryHash, err.Error())
}
}
}
func (t *ExportTask) ExportPerformers(workers int, repo models.ReaderRepository) {
var performersWg sync.WaitGroup
reader := repo.Performer()
var performers []*models.Performer
var err error
all := t.full || (t.performers != nil && t.performers.all)
if all {
performers, err = reader.All()
} else if t.performers != nil && len(t.performers.IDs) > 0 {
performers, err = reader.FindMany(t.performers.IDs)
}
if err != nil {
logger.Errorf("[performers] failed to fetch performers: %s", err.Error())
}
jobCh := make(chan *models.Performer, workers*2) // make a buffered channel to feed workers
logger.Info("[performers] exporting")
startTime := time.Now()
for w := 0; w < workers; w++ { // create export Performer workers
performersWg.Add(1)
go t.exportPerformer(&performersWg, jobCh, repo)
}
for i, performer := range performers {
index := i + 1
logger.Progressf("[performers] %d of %d", index, len(performers))
t.Mappings.Performers = append(t.Mappings.Performers, jsonschema.PathNameMapping{Name: performer.Name.String, Checksum: performer.Checksum})
jobCh <- performer // feed workers
}
close(jobCh) // close channel so workers will know that no more jobs are available
performersWg.Wait()
logger.Infof("[performers] export complete in %s. %d workers used.", time.Since(startTime), workers)
}
func (t *ExportTask) exportPerformer(wg *sync.WaitGroup, jobChan <-chan *models.Performer, repo models.ReaderRepository) {
defer wg.Done()
performerReader := repo.Performer()
for p := range jobChan {
newPerformerJSON, err := performer.ToJSON(performerReader, p)
if err != nil {
logger.Errorf("[performers] <%s> error getting performer JSON: %s", p.Checksum, err.Error())
continue
}
tags, err := repo.Tag().FindByPerformerID(p.ID)
if err != nil {
logger.Errorf("[performers] <%s> error getting performer tags: %s", p.Checksum, err.Error())
continue
}
newPerformerJSON.Tags = tag.GetNames(tags)
if t.includeDependencies {
t.tags.IDs = utils.IntAppendUniques(t.tags.IDs, tag.GetIDs(tags))
}
performerJSON, err := t.json.getPerformer(p.Checksum)
if err != nil {
logger.Debugf("[performers] error reading performer json: %s", err.Error())
} else if jsonschema.CompareJSON(*performerJSON, *newPerformerJSON) {
continue
}
if err := t.json.savePerformer(p.Checksum, newPerformerJSON); err != nil {
logger.Errorf("[performers] <%s> failed to save json: %s", p.Checksum, err.Error())
}
}
}
func (t *ExportTask) ExportStudios(workers int, repo models.ReaderRepository) {
var studiosWg sync.WaitGroup
reader := repo.Studio()
var studios []*models.Studio
var err error
all := t.full || (t.studios != nil && t.studios.all)
if all {
studios, err = reader.All()
} else if t.studios != nil && len(t.studios.IDs) > 0 {
studios, err = reader.FindMany(t.studios.IDs)
}
if err != nil {
logger.Errorf("[studios] failed to fetch studios: %s", err.Error())
}
logger.Info("[studios] exporting")
startTime := time.Now()
jobCh := make(chan *models.Studio, workers*2) // make a buffered channel to feed workers
for w := 0; w < workers; w++ { // create export Studio workers
studiosWg.Add(1)
go t.exportStudio(&studiosWg, jobCh, repo)
}
for i, studio := range studios {
index := i + 1
logger.Progressf("[studios] %d of %d", index, len(studios))
t.Mappings.Studios = append(t.Mappings.Studios, jsonschema.PathNameMapping{Name: studio.Name.String, Checksum: studio.Checksum})
jobCh <- studio // feed workers
}
close(jobCh)
studiosWg.Wait()
logger.Infof("[studios] export complete in %s. %d workers used.", time.Since(startTime), workers)
}
func (t *ExportTask) exportStudio(wg *sync.WaitGroup, jobChan <-chan *models.Studio, repo models.ReaderRepository) {
defer wg.Done()
studioReader := repo.Studio()
for s := range jobChan {
newStudioJSON, err := studio.ToJSON(studioReader, s)
if err != nil {
logger.Errorf("[studios] <%s> error getting studio JSON: %s", s.Checksum, err.Error())
continue
}
studioJSON, err := t.json.getStudio(s.Checksum)
if err == nil && jsonschema.CompareJSON(*studioJSON, *newStudioJSON) {
continue
}
if err := t.json.saveStudio(s.Checksum, newStudioJSON); err != nil {
logger.Errorf("[studios] <%s> failed to save json: %s", s.Checksum, err.Error())
}
}
}
func (t *ExportTask) ExportTags(workers int, repo models.ReaderRepository) {
var tagsWg sync.WaitGroup
reader := repo.Tag()
var tags []*models.Tag
var err error
all := t.full || (t.tags != nil && t.tags.all)
if all {
tags, err = reader.All()
} else if t.tags != nil && len(t.tags.IDs) > 0 {
tags, err = reader.FindMany(t.tags.IDs)
}
if err != nil {
logger.Errorf("[tags] failed to fetch tags: %s", err.Error())
}
logger.Info("[tags] exporting")
startTime := time.Now()
jobCh := make(chan *models.Tag, workers*2) // make a buffered channel to feed workers
for w := 0; w < workers; w++ { // create export Tag workers
tagsWg.Add(1)
go t.exportTag(&tagsWg, jobCh, repo)
}
for i, tag := range tags {
index := i + 1
logger.Progressf("[tags] %d of %d", index, len(tags))
// generate checksum on the fly by name, since we don't store it
checksum := utils.MD5FromString(tag.Name)
t.Mappings.Tags = append(t.Mappings.Tags, jsonschema.PathNameMapping{Name: tag.Name, Checksum: checksum})
jobCh <- tag // feed workers
}
close(jobCh)
tagsWg.Wait()
logger.Infof("[tags] export complete in %s. %d workers used.", time.Since(startTime), workers)
}
func (t *ExportTask) exportTag(wg *sync.WaitGroup, jobChan <-chan *models.Tag, repo models.ReaderRepository) {
defer wg.Done()
tagReader := repo.Tag()
for thisTag := range jobChan {
newTagJSON, err := tag.ToJSON(tagReader, thisTag)
if err != nil {
logger.Errorf("[tags] <%s> error getting tag JSON: %s", thisTag.Name, err.Error())
continue
}
// generate checksum on the fly by name, since we don't store it
checksum := utils.MD5FromString(thisTag.Name)
tagJSON, err := t.json.getTag(checksum)
if err == nil && jsonschema.CompareJSON(*tagJSON, *newTagJSON) {
continue
}
if err := t.json.saveTag(checksum, newTagJSON); err != nil {
logger.Errorf("[tags] <%s> failed to save json: %s", checksum, err.Error())
}
}
}
func (t *ExportTask) ExportMovies(workers int, repo models.ReaderRepository) {
var moviesWg sync.WaitGroup
reader := repo.Movie()
var movies []*models.Movie
var err error
all := t.full || (t.movies != nil && t.movies.all)
if all {
movies, err = reader.All()
} else if t.movies != nil && len(t.movies.IDs) > 0 {
movies, err = reader.FindMany(t.movies.IDs)
}
if err != nil {
logger.Errorf("[movies] failed to fetch movies: %s", err.Error())
}
logger.Info("[movies] exporting")
startTime := time.Now()
jobCh := make(chan *models.Movie, workers*2) // make a buffered channel to feed workers
for w := 0; w < workers; w++ { // create export Studio workers
moviesWg.Add(1)
go t.exportMovie(&moviesWg, jobCh, repo)
}
for i, movie := range movies {
index := i + 1
logger.Progressf("[movies] %d of %d", index, len(movies))
t.Mappings.Movies = append(t.Mappings.Movies, jsonschema.PathNameMapping{Name: movie.Name.String, Checksum: movie.Checksum})
jobCh <- movie // feed workers
}
close(jobCh)
moviesWg.Wait()
logger.Infof("[movies] export complete in %s. %d workers used.", time.Since(startTime), workers)
}
func (t *ExportTask) exportMovie(wg *sync.WaitGroup, jobChan <-chan *models.Movie, repo models.ReaderRepository) {
defer wg.Done()
movieReader := repo.Movie()
studioReader := repo.Studio()
for m := range jobChan {
newMovieJSON, err := movie.ToJSON(movieReader, studioReader, m)
if err != nil {
logger.Errorf("[movies] <%s> error getting tag JSON: %s", m.Checksum, err.Error())
continue
}
if t.includeDependencies {
if m.StudioID.Valid {
t.studios.IDs = utils.IntAppendUnique(t.studios.IDs, int(m.StudioID.Int64))
}
}
movieJSON, err := t.json.getMovie(m.Checksum)
if err != nil {
logger.Debugf("[movies] error reading movie json: %s", err.Error())
} else if jsonschema.CompareJSON(*movieJSON, *newMovieJSON) {
continue
}
if err := t.json.saveMovie(m.Checksum, newMovieJSON); err != nil {
logger.Errorf("[movies] <%s> failed to save json: %s", m.Checksum, err.Error())
}
}
}
func (t *ExportTask) ExportScrapedItems(repo models.ReaderRepository) {
qb := repo.ScrapedItem()
sqb := repo.Studio()
scrapedItems, err := qb.All()
if err != nil {
logger.Errorf("[scraped sites] failed to fetch all items: %s", err.Error())
}
logger.Info("[scraped sites] exporting")
scraped := []jsonschema.ScrapedItem{}
for i, scrapedItem := range scrapedItems {
index := i + 1
logger.Progressf("[scraped sites] %d of %d", index, len(scrapedItems))
var studioName string
if scrapedItem.StudioID.Valid {
studio, _ := sqb.Find(int(scrapedItem.StudioID.Int64))
if studio != nil {
studioName = studio.Name.String
}
}
newScrapedItemJSON := jsonschema.ScrapedItem{}
if scrapedItem.Title.Valid {
newScrapedItemJSON.Title = scrapedItem.Title.String
}
if scrapedItem.Description.Valid {
newScrapedItemJSON.Description = scrapedItem.Description.String
}
if scrapedItem.URL.Valid {
newScrapedItemJSON.URL = scrapedItem.URL.String
}
if scrapedItem.Date.Valid {
newScrapedItemJSON.Date = utils.GetYMDFromDatabaseDate(scrapedItem.Date.String)
}
if scrapedItem.Rating.Valid {
newScrapedItemJSON.Rating = scrapedItem.Rating.String
}
if scrapedItem.Tags.Valid {
newScrapedItemJSON.Tags = scrapedItem.Tags.String
}
if scrapedItem.Models.Valid {
newScrapedItemJSON.Models = scrapedItem.Models.String
}
if scrapedItem.Episode.Valid {
newScrapedItemJSON.Episode = int(scrapedItem.Episode.Int64)
}
if scrapedItem.GalleryFilename.Valid {
newScrapedItemJSON.GalleryFilename = scrapedItem.GalleryFilename.String
}
if scrapedItem.GalleryURL.Valid {
newScrapedItemJSON.GalleryURL = scrapedItem.GalleryURL.String
}
if scrapedItem.VideoFilename.Valid {
newScrapedItemJSON.VideoFilename = scrapedItem.VideoFilename.String
}
if scrapedItem.VideoURL.Valid {
newScrapedItemJSON.VideoURL = scrapedItem.VideoURL.String
}
newScrapedItemJSON.Studio = studioName
updatedAt := models.JSONTime{Time: scrapedItem.UpdatedAt.Timestamp} // TODO keeping ruby format
newScrapedItemJSON.UpdatedAt = updatedAt
scraped = append(scraped, newScrapedItemJSON)
}
scrapedJSON, err := t.json.getScraped()
if err != nil {
logger.Debugf("[scraped sites] error reading json: %s", err.Error())
}
if !jsonschema.CompareJSON(scrapedJSON, scraped) {
if err := t.json.saveScaped(scraped); err != nil {
logger.Errorf("[scraped sites] failed to save json: %s", err.Error())
}
}
logger.Infof("[scraped sites] export complete")
}