stash/pkg/manager/task_import.go

633 lines
17 KiB
Go
Raw Normal View History

2019-02-09 12:30:49 +00:00
package manager
import (
"archive/zip"
2019-02-09 12:30:49 +00:00
"context"
"database/sql"
"fmt"
"io"
"os"
"path/filepath"
"sync"
"time"
2019-02-14 23:42:52 +00:00
"github.com/stashapp/stash/pkg/database"
"github.com/stashapp/stash/pkg/gallery"
"github.com/stashapp/stash/pkg/image"
2019-02-14 23:42:52 +00:00
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/manager/config"
2019-02-14 23:42:52 +00:00
"github.com/stashapp/stash/pkg/manager/jsonschema"
2020-09-15 07:28:53 +00:00
"github.com/stashapp/stash/pkg/manager/paths"
2019-02-14 23:42:52 +00:00
"github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/movie"
"github.com/stashapp/stash/pkg/performer"
"github.com/stashapp/stash/pkg/scene"
"github.com/stashapp/stash/pkg/studio"
"github.com/stashapp/stash/pkg/tag"
2019-02-14 23:42:52 +00:00
"github.com/stashapp/stash/pkg/utils"
2019-02-09 12:30:49 +00:00
)
type ImportTask struct {
txnManager models.TransactionManager
json jsonUtils
2020-09-15 07:28:53 +00:00
BaseDir string
2021-01-31 21:15:10 +00:00
TmpZip string
Reset bool
DuplicateBehaviour models.ImportDuplicateEnum
MissingRefBehaviour models.ImportMissingRefEnum
mappings *jsonschema.Mappings
scraped []jsonschema.ScrapedItem
2020-08-06 01:21:14 +00:00
fileNamingAlgorithm models.HashAlgorithm
2019-02-09 12:30:49 +00:00
}
2021-01-31 21:15:10 +00:00
func CreateImportTask(a models.HashAlgorithm, input models.ImportObjectsInput) (*ImportTask, error) {
baseDir, err := instance.Paths.Generated.TempDir("import")
if err != nil {
logger.Errorf("error creating temporary directory for import: %s", err.Error())
return nil, err
}
tmpZip := ""
if input.File.File != nil {
tmpZip = filepath.Join(baseDir, "import.zip")
out, err := os.Create(tmpZip)
if err != nil {
return nil, err
}
_, err = io.Copy(out, input.File.File)
out.Close()
if err != nil {
return nil, err
}
}
return &ImportTask{
txnManager: GetInstance().TxnManager,
2021-01-31 21:15:10 +00:00
BaseDir: baseDir,
TmpZip: tmpZip,
Reset: false,
DuplicateBehaviour: input.DuplicateBehaviour,
MissingRefBehaviour: input.MissingRefBehaviour,
fileNamingAlgorithm: a,
2021-01-31 21:15:10 +00:00
}, nil
}
2021-05-24 04:24:18 +00:00
func (t *ImportTask) GetDescription() string {
return "Importing..."
}
2019-02-09 12:30:49 +00:00
func (t *ImportTask) Start(wg *sync.WaitGroup) {
2019-02-10 20:15:36 +00:00
defer wg.Done()
2021-01-31 21:15:10 +00:00
if t.TmpZip != "" {
defer func() {
err := utils.RemoveDir(t.BaseDir)
if err != nil {
logger.Errorf("error removing directory %s: %s", t.BaseDir, err.Error())
}
}()
if err := t.unzipFile(); err != nil {
logger.Errorf("error unzipping provided file for import: %s", err.Error())
return
}
}
2020-09-15 07:28:53 +00:00
t.json = jsonUtils{
json: *paths.GetJSONPaths(t.BaseDir),
}
// set default behaviour if not provided
if !t.DuplicateBehaviour.IsValid() {
t.DuplicateBehaviour = models.ImportDuplicateEnumFail
}
if !t.MissingRefBehaviour.IsValid() {
t.MissingRefBehaviour = models.ImportMissingRefEnumFail
2020-09-15 07:28:53 +00:00
}
t.mappings, _ = t.json.getMappings()
if t.mappings == nil {
2019-02-10 20:15:36 +00:00
logger.Error("missing mappings json")
return
2019-02-09 12:30:49 +00:00
}
2020-09-15 07:28:53 +00:00
scraped, _ := t.json.getScraped()
2019-02-09 12:30:49 +00:00
if scraped == nil {
logger.Warn("missing scraped json")
}
t.scraped = scraped
2019-02-09 12:30:49 +00:00
if t.Reset {
err := database.Reset(config.GetInstance().GetDatabasePath())
if err != nil {
logger.Errorf("Error resetting database: %s", err.Error())
return
}
}
2019-02-09 12:30:49 +00:00
ctx := context.TODO()
t.ImportTags(ctx)
2019-02-09 12:30:49 +00:00
t.ImportPerformers(ctx)
t.ImportStudios(ctx)
t.ImportMovies(ctx)
2019-02-09 12:30:49 +00:00
t.ImportGalleries(ctx)
t.ImportScrapedItems(ctx)
t.ImportScenes(ctx)
t.ImportImages(ctx)
2019-02-09 12:30:49 +00:00
}
func (t *ImportTask) unzipFile() error {
defer func() {
2021-01-31 21:15:10 +00:00
err := os.Remove(t.TmpZip)
2019-02-09 12:30:49 +00:00
if err != nil {
2021-01-31 21:15:10 +00:00
logger.Errorf("error removing temporary zip file %s: %s", t.TmpZip, err.Error())
}
}()
2019-02-09 12:30:49 +00:00
// now we can read the zip file
2021-01-31 21:15:10 +00:00
r, err := zip.OpenReader(t.TmpZip)
if err != nil {
return err
}
defer r.Close()
2019-02-09 12:30:49 +00:00
for _, f := range r.File {
fn := filepath.Join(t.BaseDir, f.Name)
if f.FileInfo().IsDir() {
Errcheck phase 1 (#1715) * Avoid redundant logging in migrations Return the error and let the caller handle the logging of the error if needed. While here, defer m.Close() to the function boundary. * Treat errors as values Use %v rather than %s and pass the errors directly. * Generate a wrapped error on stat-failure * Log 3 unchecked errors Rather than ignore errors, log them at the WARNING log level. The server has been functioning without these, so assume they are not at the ERROR level. * Propagate errors upward Failure in path generation was ignored. Propagate the errors upward the call stack, so it can be handled at the level of orchestration. * Warn on errors Log errors rather than quenching them. Errors are logged at the Warn-level for now. * Check error when creating test databases Use the builtin log package and stop the program fatally on error. * Add warnings to uncheck task errors Focus on the task system in a single commit, logging unchecked errors as warnings. * Warn-on-error in API routes Look through the API routes, and make sure errors are being logged if they occur. Prefer the Warn-log-level because none of these has proven to be fatal in the system up until now. * Propagate error when adding Util API * Propagate error on adding util API * Return unhandled error * JS log API: propagate and log errors * JS Plugins: log GQL addition failures. * Warn on failure to write to stdin * Warn on failure to stop task * Wrap viper.BindEnv The current viper code only errors if no name is provided, so it should never fail. Rewrite the code flow to factor through a panic-function. This removes error warnings from this part of the code. * Log errors in concurrency test If we can't initialize the configuration, treat the test as a failure. * Warn on errors in configuration code * Plug an unchecked error in gallery zip walking * Warn on screenshot serving failure * Warn on encoder screenshot failure * Warn on errors in path-handling code * Undo the errcheck on configurations for now. * Use one-line initializers where applicable rather than using err := f() if err!= nil { .. prefer the shorter if err := f(); err != nil { .. If f() isn't too long of a name, or wraps a function with a body.
2021-09-20 23:34:25 +00:00
if err := os.MkdirAll(fn, os.ModePerm); err != nil {
logger.Warnf("couldn't create directory %v while unzipping import file: %v", fn, err)
}
continue
2019-02-09 12:30:49 +00:00
}
if err := os.MkdirAll(filepath.Dir(fn), os.ModePerm); err != nil {
return err
2019-02-09 12:30:49 +00:00
}
o, err := os.OpenFile(fn, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode())
if err != nil {
return err
2019-02-09 12:30:49 +00:00
}
i, err := f.Open()
if err != nil {
o.Close()
return err
2019-02-09 12:30:49 +00:00
}
if _, err := io.Copy(o, i); err != nil {
o.Close()
i.Close()
return err
2019-02-09 12:30:49 +00:00
}
o.Close()
i.Close()
}
return nil
}
func (t *ImportTask) ImportPerformers(ctx context.Context) {
logger.Info("[performers] importing")
for i, mappingJSON := range t.mappings.Performers {
index := i + 1
performerJSON, err := t.json.getPerformer(mappingJSON.Checksum)
if err != nil {
logger.Errorf("[performers] failed to read json: %s", err.Error())
continue
2019-02-09 12:30:49 +00:00
}
logger.Progressf("[performers] %d of %d", index, len(t.mappings.Performers))
if err := t.txnManager.WithTxn(ctx, func(r models.Repository) error {
readerWriter := r.Performer()
importer := &performer.Importer{
ReaderWriter: readerWriter,
TagWriter: r.Tag(),
Input: *performerJSON,
}
return performImport(importer, t.DuplicateBehaviour)
}); err != nil {
logger.Errorf("[performers] <%s> import failed: %s", mappingJSON.Checksum, err.Error())
}
2019-02-09 12:30:49 +00:00
}
logger.Info("[performers] import complete")
}
func (t *ImportTask) ImportStudios(ctx context.Context) {
pendingParent := make(map[string][]*jsonschema.Studio)
2019-02-09 12:30:49 +00:00
logger.Info("[studios] importing")
for i, mappingJSON := range t.mappings.Studios {
2019-02-09 12:30:49 +00:00
index := i + 1
2020-09-15 07:28:53 +00:00
studioJSON, err := t.json.getStudio(mappingJSON.Checksum)
2019-02-09 12:30:49 +00:00
if err != nil {
logger.Errorf("[studios] failed to read json: %s", err.Error())
continue
}
logger.Progressf("[studios] %d of %d", index, len(t.mappings.Studios))
2019-02-09 12:30:49 +00:00
if err := t.txnManager.WithTxn(ctx, func(r models.Repository) error {
return t.ImportStudio(studioJSON, pendingParent, r.Studio())
}); err != nil {
if err == studio.ErrParentStudioNotExist {
// add to the pending parent list so that it is created after the parent
s := pendingParent[studioJSON.ParentStudio]
s = append(s, studioJSON)
pendingParent[studioJSON.ParentStudio] = s
continue
}
logger.Errorf("[studios] <%s> failed to create: %s", mappingJSON.Checksum, err.Error())
continue
}
}
2019-02-09 12:30:49 +00:00
// create the leftover studios, warning for missing parents
if len(pendingParent) > 0 {
logger.Warnf("[studios] importing studios with missing parents")
for _, s := range pendingParent {
for _, orphanStudioJSON := range s {
if err := t.txnManager.WithTxn(ctx, func(r models.Repository) error {
return t.ImportStudio(orphanStudioJSON, nil, r.Studio())
}); err != nil {
logger.Errorf("[studios] <%s> failed to create: %s", orphanStudioJSON.Name, err.Error())
continue
}
}
2019-02-09 12:30:49 +00:00
}
}
logger.Info("[studios] import complete")
}
func (t *ImportTask) ImportStudio(studioJSON *jsonschema.Studio, pendingParent map[string][]*jsonschema.Studio, readerWriter models.StudioReaderWriter) error {
importer := &studio.Importer{
ReaderWriter: readerWriter,
Input: *studioJSON,
MissingRefBehaviour: t.MissingRefBehaviour,
}
// first phase: return error if parent does not exist
if pendingParent != nil {
importer.MissingRefBehaviour = models.ImportMissingRefEnumFail
}
if err := performImport(importer, t.DuplicateBehaviour); err != nil {
return err
}
// now create the studios pending this studios creation
s := pendingParent[studioJSON.Name]
for _, childStudioJSON := range s {
// map is nil since we're not checking parent studios at this point
if err := t.ImportStudio(childStudioJSON, nil, readerWriter); err != nil {
return fmt.Errorf("failed to create child studio <%s>: %s", childStudioJSON.Name, err.Error())
}
}
// delete the entry from the map so that we know its not left over
delete(pendingParent, studioJSON.Name)
return nil
}
func (t *ImportTask) ImportMovies(ctx context.Context) {
logger.Info("[movies] importing")
for i, mappingJSON := range t.mappings.Movies {
index := i + 1
2020-09-15 07:28:53 +00:00
movieJSON, err := t.json.getMovie(mappingJSON.Checksum)
if err != nil {
logger.Errorf("[movies] failed to read json: %s", err.Error())
continue
}
logger.Progressf("[movies] %d of %d", index, len(t.mappings.Movies))
if err := t.txnManager.WithTxn(ctx, func(r models.Repository) error {
readerWriter := r.Movie()
studioReaderWriter := r.Studio()
movieImporter := &movie.Importer{
ReaderWriter: readerWriter,
StudioWriter: studioReaderWriter,
Input: *movieJSON,
MissingRefBehaviour: t.MissingRefBehaviour,
}
return performImport(movieImporter, t.DuplicateBehaviour)
}); err != nil {
logger.Errorf("[movies] <%s> import failed: %s", mappingJSON.Checksum, err.Error())
continue
}
}
logger.Info("[movies] import complete")
}
2019-02-09 12:30:49 +00:00
func (t *ImportTask) ImportGalleries(ctx context.Context) {
logger.Info("[galleries] importing")
2019-02-09 12:30:49 +00:00
for i, mappingJSON := range t.mappings.Galleries {
2019-02-09 12:30:49 +00:00
index := i + 1
galleryJSON, err := t.json.getGallery(mappingJSON.Checksum)
if err != nil {
logger.Errorf("[galleries] failed to read json: %s", err.Error())
continue
}
2019-02-09 12:30:49 +00:00
logger.Progressf("[galleries] %d of %d", index, len(t.mappings.Galleries))
2019-02-09 12:30:49 +00:00
if err := t.txnManager.WithTxn(ctx, func(r models.Repository) error {
readerWriter := r.Gallery()
tagWriter := r.Tag()
performerWriter := r.Performer()
studioWriter := r.Studio()
2019-02-09 12:30:49 +00:00
galleryImporter := &gallery.Importer{
ReaderWriter: readerWriter,
PerformerWriter: performerWriter,
StudioWriter: studioWriter,
TagWriter: tagWriter,
Input: *galleryJSON,
MissingRefBehaviour: t.MissingRefBehaviour,
}
2019-02-09 12:30:49 +00:00
return performImport(galleryImporter, t.DuplicateBehaviour)
}); err != nil {
logger.Errorf("[galleries] <%s> import failed to commit: %s", mappingJSON.Checksum, err.Error())
continue
}
2019-02-09 12:30:49 +00:00
}
2019-02-09 12:30:49 +00:00
logger.Info("[galleries] import complete")
}
func (t *ImportTask) ImportTags(ctx context.Context) {
Tag hierarchy (#1519) * Add migration script for tag relations table * Expand hierarchical filter features Expand the features of the hierarchical multi input filter with support for using a relations table, which only has parent_id and child_id columns, and support adding an additional intermediate table to join on, for example for scenes and tags which are linked by the scenes_tags table as well. * Add hierarchical filtering for tags * Add hierarchical tags support to scene markers Refactor filtering of scene markers to filterBuilder and in the process add support for hierarchical tags as well. * List parent and child tags on tag details page * Support setting parent and child tags Add support for setting parent and child tags during tag creation and tag updates. * Validate no loops are created in tags hierarchy * Update tag merging to support tag hierarcy * Add unit tests for tags.EnsureUniqueHierarchy * Fix applying recursive to with clause The SQL `RECURSIVE` of a `WITH` clause only needs to be applied once, imediately after the `WITH`. So this fixes the query building to do just that, automatically applying the `RECURSIVE` keyword when any added with clause is added as recursive. * Rename hierarchical root id column * Rewrite hierarchical filtering for performance Completely rewrite the hierarchical filtering to optimize for performance. Doing the recursive query in combination with a complex query seems to break SQLite optimizing some things which means that the recursive part might be 2,5 second slower than adding a static `VALUES()` list. This is mostly noticable in case of the tag hierarchy where setting an exclusion with any depth (or depth: all) being applied has this performance impact of 2,5 second. "Include" also suffered this issue, but some rewritten query by joining in the *_tags table in one pass and applying a `WHERE x IS NOT NULL` filter did seem to optimize that case. But that optimization isn't applied to the `IS NULL` filter of "exclude". Running a simple query beforehand to get all (recursive) items and then applying them to the query doesn't have this performance penalty. * Remove UI references to child studios and tags * Add parents to tag export * Support importing of parent relationship for tags * Assign stable ids to parent / child badges * Silence Apollo warning on parents/children fields on tags Silence warning triggered by Apollo GraphQL by explicitly instructing it to use the incoming parents/children values. By default it already does this, but it triggers a warning as it might be unintended that it uses the incoming values (instead of for example merging both arrays). Setting merge to false still applies the same behaviour (use only incoming values) but silences the warning as it's explicitly configured to work like this. * Rework detecting unique tag hierarchy Completely rework the unique tag hierarchy to detect invalid hierarchies for which a tag is "added in the middle". So when there are tags A <- B and A <- C, you could previously edit tag B and add tag C as a sub tag without it being noticed as parent A being applied twice (to tag C). While afterwards saving tag C would fail as tag A was applied as parent twice. The updated code correctly detects this scenario as well. Furthermore the error messaging has been reworked a bit and the message now mentions both the direct parent / sub tag as well as the tag which would results in the error. So in aboves example it would now show the message that tag C can't be applied because tag A already is a parent. * Update relations on cached tags when needed Update the relations on cached tags when a tag is created / updated / deleted so these always reflect the correct state. Otherwise (re)opening a tag might still show the old relations untill the page is fully reloaded or the list is navigated. But this obviously is strange when you for example have tag A, create or update tag B to have a relation to tag A, and from tags B page click through to tag A and it doesn't show that it is linked to tag B.
2021-09-09 04:58:43 +00:00
pendingParent := make(map[string][]*jsonschema.Tag)
logger.Info("[tags] importing")
2019-02-09 12:30:49 +00:00
for i, mappingJSON := range t.mappings.Tags {
2019-02-09 12:30:49 +00:00
index := i + 1
2020-09-15 07:28:53 +00:00
tagJSON, err := t.json.getTag(mappingJSON.Checksum)
2019-02-09 12:30:49 +00:00
if err != nil {
logger.Errorf("[tags] failed to read json: %s", err.Error())
2019-02-09 12:30:49 +00:00
continue
}
logger.Progressf("[tags] %d of %d", index, len(t.mappings.Tags))
if err := t.txnManager.WithTxn(ctx, func(r models.Repository) error {
Tag hierarchy (#1519) * Add migration script for tag relations table * Expand hierarchical filter features Expand the features of the hierarchical multi input filter with support for using a relations table, which only has parent_id and child_id columns, and support adding an additional intermediate table to join on, for example for scenes and tags which are linked by the scenes_tags table as well. * Add hierarchical filtering for tags * Add hierarchical tags support to scene markers Refactor filtering of scene markers to filterBuilder and in the process add support for hierarchical tags as well. * List parent and child tags on tag details page * Support setting parent and child tags Add support for setting parent and child tags during tag creation and tag updates. * Validate no loops are created in tags hierarchy * Update tag merging to support tag hierarcy * Add unit tests for tags.EnsureUniqueHierarchy * Fix applying recursive to with clause The SQL `RECURSIVE` of a `WITH` clause only needs to be applied once, imediately after the `WITH`. So this fixes the query building to do just that, automatically applying the `RECURSIVE` keyword when any added with clause is added as recursive. * Rename hierarchical root id column * Rewrite hierarchical filtering for performance Completely rewrite the hierarchical filtering to optimize for performance. Doing the recursive query in combination with a complex query seems to break SQLite optimizing some things which means that the recursive part might be 2,5 second slower than adding a static `VALUES()` list. This is mostly noticable in case of the tag hierarchy where setting an exclusion with any depth (or depth: all) being applied has this performance impact of 2,5 second. "Include" also suffered this issue, but some rewritten query by joining in the *_tags table in one pass and applying a `WHERE x IS NOT NULL` filter did seem to optimize that case. But that optimization isn't applied to the `IS NULL` filter of "exclude". Running a simple query beforehand to get all (recursive) items and then applying them to the query doesn't have this performance penalty. * Remove UI references to child studios and tags * Add parents to tag export * Support importing of parent relationship for tags * Assign stable ids to parent / child badges * Silence Apollo warning on parents/children fields on tags Silence warning triggered by Apollo GraphQL by explicitly instructing it to use the incoming parents/children values. By default it already does this, but it triggers a warning as it might be unintended that it uses the incoming values (instead of for example merging both arrays). Setting merge to false still applies the same behaviour (use only incoming values) but silences the warning as it's explicitly configured to work like this. * Rework detecting unique tag hierarchy Completely rework the unique tag hierarchy to detect invalid hierarchies for which a tag is "added in the middle". So when there are tags A <- B and A <- C, you could previously edit tag B and add tag C as a sub tag without it being noticed as parent A being applied twice (to tag C). While afterwards saving tag C would fail as tag A was applied as parent twice. The updated code correctly detects this scenario as well. Furthermore the error messaging has been reworked a bit and the message now mentions both the direct parent / sub tag as well as the tag which would results in the error. So in aboves example it would now show the message that tag C can't be applied because tag A already is a parent. * Update relations on cached tags when needed Update the relations on cached tags when a tag is created / updated / deleted so these always reflect the correct state. Otherwise (re)opening a tag might still show the old relations untill the page is fully reloaded or the list is navigated. But this obviously is strange when you for example have tag A, create or update tag B to have a relation to tag A, and from tags B page click through to tag A and it doesn't show that it is linked to tag B.
2021-09-09 04:58:43 +00:00
return t.ImportTag(tagJSON, pendingParent, false, r.Tag())
}); err != nil {
if parentError, ok := err.(tag.ParentTagNotExistError); ok {
pendingParent[parentError.MissingParent()] = append(pendingParent[parentError.MissingParent()], tagJSON)
continue
}
2019-02-09 12:30:49 +00:00
logger.Errorf("[tags] <%s> failed to import: %s", mappingJSON.Checksum, err.Error())
continue
2019-02-09 12:30:49 +00:00
}
}
Tag hierarchy (#1519) * Add migration script for tag relations table * Expand hierarchical filter features Expand the features of the hierarchical multi input filter with support for using a relations table, which only has parent_id and child_id columns, and support adding an additional intermediate table to join on, for example for scenes and tags which are linked by the scenes_tags table as well. * Add hierarchical filtering for tags * Add hierarchical tags support to scene markers Refactor filtering of scene markers to filterBuilder and in the process add support for hierarchical tags as well. * List parent and child tags on tag details page * Support setting parent and child tags Add support for setting parent and child tags during tag creation and tag updates. * Validate no loops are created in tags hierarchy * Update tag merging to support tag hierarcy * Add unit tests for tags.EnsureUniqueHierarchy * Fix applying recursive to with clause The SQL `RECURSIVE` of a `WITH` clause only needs to be applied once, imediately after the `WITH`. So this fixes the query building to do just that, automatically applying the `RECURSIVE` keyword when any added with clause is added as recursive. * Rename hierarchical root id column * Rewrite hierarchical filtering for performance Completely rewrite the hierarchical filtering to optimize for performance. Doing the recursive query in combination with a complex query seems to break SQLite optimizing some things which means that the recursive part might be 2,5 second slower than adding a static `VALUES()` list. This is mostly noticable in case of the tag hierarchy where setting an exclusion with any depth (or depth: all) being applied has this performance impact of 2,5 second. "Include" also suffered this issue, but some rewritten query by joining in the *_tags table in one pass and applying a `WHERE x IS NOT NULL` filter did seem to optimize that case. But that optimization isn't applied to the `IS NULL` filter of "exclude". Running a simple query beforehand to get all (recursive) items and then applying them to the query doesn't have this performance penalty. * Remove UI references to child studios and tags * Add parents to tag export * Support importing of parent relationship for tags * Assign stable ids to parent / child badges * Silence Apollo warning on parents/children fields on tags Silence warning triggered by Apollo GraphQL by explicitly instructing it to use the incoming parents/children values. By default it already does this, but it triggers a warning as it might be unintended that it uses the incoming values (instead of for example merging both arrays). Setting merge to false still applies the same behaviour (use only incoming values) but silences the warning as it's explicitly configured to work like this. * Rework detecting unique tag hierarchy Completely rework the unique tag hierarchy to detect invalid hierarchies for which a tag is "added in the middle". So when there are tags A <- B and A <- C, you could previously edit tag B and add tag C as a sub tag without it being noticed as parent A being applied twice (to tag C). While afterwards saving tag C would fail as tag A was applied as parent twice. The updated code correctly detects this scenario as well. Furthermore the error messaging has been reworked a bit and the message now mentions both the direct parent / sub tag as well as the tag which would results in the error. So in aboves example it would now show the message that tag C can't be applied because tag A already is a parent. * Update relations on cached tags when needed Update the relations on cached tags when a tag is created / updated / deleted so these always reflect the correct state. Otherwise (re)opening a tag might still show the old relations untill the page is fully reloaded or the list is navigated. But this obviously is strange when you for example have tag A, create or update tag B to have a relation to tag A, and from tags B page click through to tag A and it doesn't show that it is linked to tag B.
2021-09-09 04:58:43 +00:00
for _, s := range pendingParent {
for _, orphanTagJSON := range s {
if err := t.txnManager.WithTxn(ctx, func(r models.Repository) error {
return t.ImportTag(orphanTagJSON, nil, true, r.Tag())
}); err != nil {
logger.Errorf("[tags] <%s> failed to create: %s", orphanTagJSON.Name, err.Error())
continue
}
}
}
2019-02-09 12:30:49 +00:00
logger.Info("[tags] import complete")
}
Tag hierarchy (#1519) * Add migration script for tag relations table * Expand hierarchical filter features Expand the features of the hierarchical multi input filter with support for using a relations table, which only has parent_id and child_id columns, and support adding an additional intermediate table to join on, for example for scenes and tags which are linked by the scenes_tags table as well. * Add hierarchical filtering for tags * Add hierarchical tags support to scene markers Refactor filtering of scene markers to filterBuilder and in the process add support for hierarchical tags as well. * List parent and child tags on tag details page * Support setting parent and child tags Add support for setting parent and child tags during tag creation and tag updates. * Validate no loops are created in tags hierarchy * Update tag merging to support tag hierarcy * Add unit tests for tags.EnsureUniqueHierarchy * Fix applying recursive to with clause The SQL `RECURSIVE` of a `WITH` clause only needs to be applied once, imediately after the `WITH`. So this fixes the query building to do just that, automatically applying the `RECURSIVE` keyword when any added with clause is added as recursive. * Rename hierarchical root id column * Rewrite hierarchical filtering for performance Completely rewrite the hierarchical filtering to optimize for performance. Doing the recursive query in combination with a complex query seems to break SQLite optimizing some things which means that the recursive part might be 2,5 second slower than adding a static `VALUES()` list. This is mostly noticable in case of the tag hierarchy where setting an exclusion with any depth (or depth: all) being applied has this performance impact of 2,5 second. "Include" also suffered this issue, but some rewritten query by joining in the *_tags table in one pass and applying a `WHERE x IS NOT NULL` filter did seem to optimize that case. But that optimization isn't applied to the `IS NULL` filter of "exclude". Running a simple query beforehand to get all (recursive) items and then applying them to the query doesn't have this performance penalty. * Remove UI references to child studios and tags * Add parents to tag export * Support importing of parent relationship for tags * Assign stable ids to parent / child badges * Silence Apollo warning on parents/children fields on tags Silence warning triggered by Apollo GraphQL by explicitly instructing it to use the incoming parents/children values. By default it already does this, but it triggers a warning as it might be unintended that it uses the incoming values (instead of for example merging both arrays). Setting merge to false still applies the same behaviour (use only incoming values) but silences the warning as it's explicitly configured to work like this. * Rework detecting unique tag hierarchy Completely rework the unique tag hierarchy to detect invalid hierarchies for which a tag is "added in the middle". So when there are tags A <- B and A <- C, you could previously edit tag B and add tag C as a sub tag without it being noticed as parent A being applied twice (to tag C). While afterwards saving tag C would fail as tag A was applied as parent twice. The updated code correctly detects this scenario as well. Furthermore the error messaging has been reworked a bit and the message now mentions both the direct parent / sub tag as well as the tag which would results in the error. So in aboves example it would now show the message that tag C can't be applied because tag A already is a parent. * Update relations on cached tags when needed Update the relations on cached tags when a tag is created / updated / deleted so these always reflect the correct state. Otherwise (re)opening a tag might still show the old relations untill the page is fully reloaded or the list is navigated. But this obviously is strange when you for example have tag A, create or update tag B to have a relation to tag A, and from tags B page click through to tag A and it doesn't show that it is linked to tag B.
2021-09-09 04:58:43 +00:00
func (t *ImportTask) ImportTag(tagJSON *jsonschema.Tag, pendingParent map[string][]*jsonschema.Tag, fail bool, readerWriter models.TagReaderWriter) error {
importer := &tag.Importer{
ReaderWriter: readerWriter,
Input: *tagJSON,
MissingRefBehaviour: t.MissingRefBehaviour,
}
// first phase: return error if parent does not exist
if !fail {
importer.MissingRefBehaviour = models.ImportMissingRefEnumFail
}
if err := performImport(importer, t.DuplicateBehaviour); err != nil {
return err
}
for _, childTagJSON := range pendingParent[tagJSON.Name] {
if err := t.ImportTag(childTagJSON, pendingParent, fail, readerWriter); err != nil {
if parentError, ok := err.(tag.ParentTagNotExistError); ok {
pendingParent[parentError.MissingParent()] = append(pendingParent[parentError.MissingParent()], tagJSON)
continue
}
return fmt.Errorf("failed to create child tag <%s>: %s", childTagJSON.Name, err.Error())
}
}
delete(pendingParent, tagJSON.Name)
return nil
}
2019-02-09 12:30:49 +00:00
func (t *ImportTask) ImportScrapedItems(ctx context.Context) {
if err := t.txnManager.WithTxn(ctx, func(r models.Repository) error {
logger.Info("[scraped sites] importing")
qb := r.ScrapedItem()
sqb := r.Studio()
currentTime := time.Now()
for i, mappingJSON := range t.scraped {
index := i + 1
logger.Progressf("[scraped sites] %d of %d", index, len(t.mappings.Scenes))
newScrapedItem := models.ScrapedItem{
Title: sql.NullString{String: mappingJSON.Title, Valid: true},
Description: sql.NullString{String: mappingJSON.Description, Valid: true},
URL: sql.NullString{String: mappingJSON.URL, Valid: true},
Date: models.SQLiteDate{String: mappingJSON.Date, Valid: true},
Rating: sql.NullString{String: mappingJSON.Rating, Valid: true},
Tags: sql.NullString{String: mappingJSON.Tags, Valid: true},
Models: sql.NullString{String: mappingJSON.Models, Valid: true},
Episode: sql.NullInt64{Int64: int64(mappingJSON.Episode), Valid: true},
GalleryFilename: sql.NullString{String: mappingJSON.GalleryFilename, Valid: true},
GalleryURL: sql.NullString{String: mappingJSON.GalleryURL, Valid: true},
VideoFilename: sql.NullString{String: mappingJSON.VideoFilename, Valid: true},
VideoURL: sql.NullString{String: mappingJSON.VideoURL, Valid: true},
CreatedAt: models.SQLiteTimestamp{Timestamp: currentTime},
UpdatedAt: models.SQLiteTimestamp{Timestamp: t.getTimeFromJSONTime(mappingJSON.UpdatedAt)},
}
2019-02-09 12:30:49 +00:00
studio, err := sqb.FindByName(mappingJSON.Studio, false)
if err != nil {
logger.Errorf("[scraped sites] failed to fetch studio: %s", err.Error())
}
if studio != nil {
newScrapedItem.StudioID = sql.NullInt64{Int64: int64(studio.ID), Valid: true}
}
2019-02-09 12:30:49 +00:00
_, err = qb.Create(newScrapedItem)
if err != nil {
logger.Errorf("[scraped sites] <%s> failed to create: %s", newScrapedItem.Title.String, err.Error())
}
2019-02-09 12:30:49 +00:00
}
return nil
}); err != nil {
2019-02-09 12:30:49 +00:00
logger.Errorf("[scraped sites] import failed to commit: %s", err.Error())
}
2019-02-09 12:30:49 +00:00
logger.Info("[scraped sites] import complete")
}
func (t *ImportTask) ImportScenes(ctx context.Context) {
logger.Info("[scenes] importing")
2019-02-09 12:30:49 +00:00
for i, mappingJSON := range t.mappings.Scenes {
2019-02-09 12:30:49 +00:00
index := i + 1
logger.Progressf("[scenes] %d of %d", index, len(t.mappings.Scenes))
2019-02-09 12:30:49 +00:00
2020-09-15 07:28:53 +00:00
sceneJSON, err := t.json.getScene(mappingJSON.Checksum)
2019-02-09 12:30:49 +00:00
if err != nil {
logger.Infof("[scenes] <%s> json parse failure: %s", mappingJSON.Checksum, err.Error())
continue
}
2020-08-06 01:21:14 +00:00
sceneHash := mappingJSON.Checksum
if err := t.txnManager.WithTxn(ctx, func(r models.Repository) error {
readerWriter := r.Scene()
tagWriter := r.Tag()
galleryWriter := r.Gallery()
movieWriter := r.Movie()
performerWriter := r.Performer()
studioWriter := r.Studio()
markerWriter := r.SceneMarker()
sceneImporter := &scene.Importer{
ReaderWriter: readerWriter,
Input: *sceneJSON,
Path: mappingJSON.Path,
FileNamingAlgorithm: t.fileNamingAlgorithm,
MissingRefBehaviour: t.MissingRefBehaviour,
GalleryWriter: galleryWriter,
MovieWriter: movieWriter,
PerformerWriter: performerWriter,
StudioWriter: studioWriter,
TagWriter: tagWriter,
2019-02-09 12:30:49 +00:00
}
if err := performImport(sceneImporter, t.DuplicateBehaviour); err != nil {
return err
}
// import the scene markers
for _, m := range sceneJSON.Markers {
markerImporter := &scene.MarkerImporter{
SceneID: sceneImporter.ID,
Input: m,
MissingRefBehaviour: t.MissingRefBehaviour,
ReaderWriter: markerWriter,
TagWriter: tagWriter,
}
if err := performImport(markerImporter, t.DuplicateBehaviour); err != nil {
return err
}
}
2019-02-09 12:30:49 +00:00
return nil
}); err != nil {
logger.Errorf("[scenes] <%s> import failed: %s", sceneHash, err.Error())
2019-02-09 12:30:49 +00:00
}
}
logger.Info("[scenes] import complete")
}
func (t *ImportTask) ImportImages(ctx context.Context) {
logger.Info("[images] importing")
for i, mappingJSON := range t.mappings.Images {
index := i + 1
logger.Progressf("[images] %d of %d", index, len(t.mappings.Images))
imageJSON, err := t.json.getImage(mappingJSON.Checksum)
if err != nil {
logger.Infof("[images] <%s> json parse failure: %s", mappingJSON.Checksum, err.Error())
continue
}
imageHash := mappingJSON.Checksum
if err := t.txnManager.WithTxn(ctx, func(r models.Repository) error {
readerWriter := r.Image()
tagWriter := r.Tag()
galleryWriter := r.Gallery()
performerWriter := r.Performer()
studioWriter := r.Studio()
imageImporter := &image.Importer{
ReaderWriter: readerWriter,
Input: *imageJSON,
Path: mappingJSON.Path,
MissingRefBehaviour: t.MissingRefBehaviour,
GalleryWriter: galleryWriter,
PerformerWriter: performerWriter,
StudioWriter: studioWriter,
TagWriter: tagWriter,
}
return performImport(imageImporter, t.DuplicateBehaviour)
}); err != nil {
logger.Errorf("[images] <%s> import failed: %s", imageHash, err.Error())
}
}
logger.Info("[images] import complete")
}
var currentLocation = time.Now().Location()
func (t *ImportTask) getTimeFromJSONTime(jsonTime models.JSONTime) time.Time {
if currentLocation != nil {
if jsonTime.IsZero() {
return time.Now().In(currentLocation)
} else {
return jsonTime.Time.In(currentLocation)
}
} else {
if jsonTime.IsZero() {
return time.Now()
} else {
return jsonTime.Time
}
}
}