stash/internal/manager/task_import.go

796 lines
22 KiB
Go
Raw Normal View History

2019-02-09 12:30:49 +00:00
package manager
import (
"archive/zip"
2019-02-09 12:30:49 +00:00
"context"
"database/sql"
Errorlint sweep + minor linter tweaks (#1796) * Replace error assertions with Go 1.13 style Use `errors.As(..)` over type assertions. This enables better use of wrapped errors in the future, and lets us pass some errorlint checks in the process. The rewrite is entirely mechanical, and uses a standard idiom for doing so. * Use Go 1.13's errors.Is(..) Rather than directly checking for error equality, use errors.Is(..). This protects against error wrapping issues in the future. Even though something like sql.ErrNoRows doesn't need the wrapping, do so anyway, for the sake of consistency throughout the code base. The change almost lets us pass the `errorlint` Go checker except for a missing case in `js.go` which is to be handled separately; it isn't mechanical, like these changes are. * Remove goconst goconst isn't a useful linter in many cases, because it's false positive rate is high. It's 100% for the current code base. * Avoid direct comparison of errors in recover() Assert that we are catching an error from recover(). If we are, check that the error caught matches errStop. * Enable the "errorlint" checker Configure the checker to avoid checking for errorf wraps. These are often false positives since the suggestion is to blanket wrap errors with %w, and that exposes the underlying API which you might not want to do. The other warnings are good however, and with the current patch stack, the code base passes all these checks as well. * Configure rowserrcheck The project uses sqlx. Configure rowserrcheck to include said package. * Mechanically rewrite a large set of errors Mechanically search for errors that look like fmt.Errorf("...%s", err.Error()) and rewrite those into fmt.Errorf("...%v", err) The `fmt` package is error-aware and knows how to call err.Error() itself. The rationale is that this is more idiomatic Go; it paves the way for using error wrapping later with %w in some sites. This patch only addresses the entirely mechanical rewriting caught by a project-side search/replace. There are more individual sites not addressed by this patch.
2021-10-12 03:03:08 +00:00
"errors"
"fmt"
"io"
"os"
"path/filepath"
"time"
"github.com/99designs/gqlgen/graphql"
"github.com/stashapp/stash/pkg/fsutil"
"github.com/stashapp/stash/pkg/gallery"
"github.com/stashapp/stash/pkg/image"
2019-02-14 23:42:52 +00:00
"github.com/stashapp/stash/pkg/logger"
"github.com/stashapp/stash/pkg/models"
"github.com/stashapp/stash/pkg/models/json"
"github.com/stashapp/stash/pkg/models/jsonschema"
"github.com/stashapp/stash/pkg/models/paths"
"github.com/stashapp/stash/pkg/movie"
"github.com/stashapp/stash/pkg/performer"
"github.com/stashapp/stash/pkg/scene"
"github.com/stashapp/stash/pkg/studio"
"github.com/stashapp/stash/pkg/tag"
2019-02-09 12:30:49 +00:00
)
type ImportTask struct {
File storage rewrite (#2676) * Restructure data layer part 2 (#2599) * Refactor and separate image model * Refactor image query builder * Handle relationships in image query builder * Remove relationship management methods * Refactor gallery model/query builder * Add scenes to gallery model * Convert scene model * Refactor scene models * Remove unused methods * Add unit tests for gallery * Add image tests * Add scene tests * Convert unnecessary scene value pointers to values * Convert unnecessary pointer values to values * Refactor scene partial * Add scene partial tests * Refactor ImagePartial * Add image partial tests * Refactor gallery partial update * Add partial gallery update tests * Use zero/null package for null values * Add files and scan system * Add sqlite implementation for files/folders * Add unit tests for files/folders * Image refactors * Update image data layer * Refactor gallery model and creation * Refactor scene model * Refactor scenes * Don't set title from filename * Allow galleries to freely add/remove images * Add multiple scene file support to graphql and UI * Add multiple file support for images in graphql/UI * Add multiple file for galleries in graphql/UI * Remove use of some deprecated fields * Remove scene path usage * Remove gallery path usage * Remove path from image * Move funscript to video file * Refactor caption detection * Migrate existing data * Add post commit/rollback hook system * Lint. Comment out import/export tests * Add WithDatabase read only wrapper * Prepend tasks to list * Add 32 pre-migration * Add warnings in release and migration notes
2022-07-13 06:30:54 +00:00
txnManager Repository
json jsonUtils
2020-09-15 07:28:53 +00:00
BaseDir string
2021-01-31 21:15:10 +00:00
TmpZip string
Reset bool
DuplicateBehaviour ImportDuplicateEnum
MissingRefBehaviour models.ImportMissingRefEnum
scraped []jsonschema.ScrapedItem
2020-08-06 01:21:14 +00:00
fileNamingAlgorithm models.HashAlgorithm
2019-02-09 12:30:49 +00:00
}
type ImportObjectsInput struct {
File graphql.Upload `json:"file"`
DuplicateBehaviour ImportDuplicateEnum `json:"duplicateBehaviour"`
MissingRefBehaviour models.ImportMissingRefEnum `json:"missingRefBehaviour"`
}
func CreateImportTask(a models.HashAlgorithm, input ImportObjectsInput) (*ImportTask, error) {
2021-01-31 21:15:10 +00:00
baseDir, err := instance.Paths.Generated.TempDir("import")
if err != nil {
logger.Errorf("error creating temporary directory for import: %s", err.Error())
return nil, err
}
tmpZip := ""
if input.File.File != nil {
tmpZip = filepath.Join(baseDir, "import.zip")
out, err := os.Create(tmpZip)
if err != nil {
return nil, err
}
_, err = io.Copy(out, input.File.File)
out.Close()
if err != nil {
return nil, err
}
}
return &ImportTask{
txnManager: GetInstance().Repository,
2021-01-31 21:15:10 +00:00
BaseDir: baseDir,
TmpZip: tmpZip,
Reset: false,
DuplicateBehaviour: input.DuplicateBehaviour,
MissingRefBehaviour: input.MissingRefBehaviour,
fileNamingAlgorithm: a,
2021-01-31 21:15:10 +00:00
}, nil
}
2021-05-24 04:24:18 +00:00
func (t *ImportTask) GetDescription() string {
return "Importing..."
}
Toward better context handling (#1835) * Use the request context The code uses context.Background() in a flow where there is a http.Request. Use the requests context instead. * Use a true context in the plugin example Let AddTag/RemoveTag take a context and use that context throughout the example. * Avoid the use of context.Background Prefer context.TODO over context.Background deep in the call chain. This marks the site as something which we need to context-handle later, and also makes it clear to the reader that the context is sort-of temporary in the code base. While here, be consistent in handling the `act` variable in each branch of the if .. { .. } .. check. * Prefer context.TODO over context.Background For the different scraping operations here, there is a context higher up the call chain, which we ought to use. Mark the call-sites as TODO for now, so we can come back later on a sweep of which parts can be context-lifted. * Thread context upwards Initialization requires context for transactions. Thread the context upward the call chain. At the intialization call, add a context.TODO since we can't break this yet. The singleton assumption prevents us from pulling it up into main for now. * make tasks context-aware Change the task interface to understand contexts. Pass the context down in some of the branches where it is needed. * Make QueryStashBoxScene context-aware This call naturally sits inside the request-context. Use it. * Introduce a context in the JS plugin code This allows us to use a context for HTTP calls inside the system. Mark the context with a TODO at top level for now. * Nitpick error formatting Use %v rather than %s for error interfaces. Do not begin an error strong with a capital letter. * Avoid the use of http.Get in FFMPEG download chain Since http.Get has no context, it isn't possible to break out or have policy induced. The call will block until the GET completes. Rewrite to use a http Request and provide a context. Thread the context through the call chain for now. provide context.TODO() at the top level of the initialization chain. * Make getRemoteCDPWSAddress aware of contexts Eliminate a call to http.Get and replace it with a context-aware variant. Push the context upwards in the call chain, but plug it before the scraper interface so we don't have to rewrite said interface yet. Plugged with context.TODO() * Scraper: make the getImage function context-aware Use a context, and pass it upwards. Plug it with context.TODO() up the chain before the rewrite gets too much out of hand for now. Minor tweaks along the way, remove a call to context.Background() deep in the call chain. * Make NOTIFY request context-aware The call sits inside a Request-handler. So it's natural to use the requests context as the context for the outgoing HTTP request. * Use a context in the url scraper code We are sitting in code which has a context, so utilize it for the request as well. * Use a context when checking versions When we check the version of stash on Github, use a context. Thread the context up to the initialization routine of the HTTP/GraphQL server and plug it with a context.TODO() for now. This paves the way for providing a context to the HTTP server code in a future patch. * Make utils func ReadImage context-aware In almost all of the cases, there is a context in the call chain which is a natural use. This is true for all the GraphQL mutations. The exception is in task_stash_box_tag, so plug that task with context.TODO() for now. * Make stash-box get context-aware Thread a context through the call chain until we hit the Client API. Plug it with context.TODO() there for now. * Enable the noctx linter The code is now free of any uncontexted HTTP request. This means we pass the noctx linter, and we can enable it in the code base.
2021-10-14 04:32:41 +00:00
func (t *ImportTask) Start(ctx context.Context) {
2021-01-31 21:15:10 +00:00
if t.TmpZip != "" {
defer func() {
err := fsutil.RemoveDir(t.BaseDir)
if err != nil {
logger.Errorf("error removing directory %s: %s", t.BaseDir, err.Error())
}
}()
if err := t.unzipFile(); err != nil {
logger.Errorf("error unzipping provided file for import: %s", err.Error())
return
}
}
2020-09-15 07:28:53 +00:00
t.json = jsonUtils{
json: *paths.GetJSONPaths(t.BaseDir),
}
// set default behaviour if not provided
if !t.DuplicateBehaviour.IsValid() {
t.DuplicateBehaviour = ImportDuplicateEnumFail
}
if !t.MissingRefBehaviour.IsValid() {
t.MissingRefBehaviour = models.ImportMissingRefEnumFail
2020-09-15 07:28:53 +00:00
}
scraped, _ := t.json.getScraped()
2019-02-09 12:30:49 +00:00
if scraped == nil {
logger.Warn("missing scraped json")
}
t.scraped = scraped
2019-02-09 12:30:49 +00:00
if t.Reset {
err := t.txnManager.Reset()
if err != nil {
logger.Errorf("Error resetting database: %s", err.Error())
return
}
}
2019-02-09 12:30:49 +00:00
t.ImportTags(ctx)
2019-02-09 12:30:49 +00:00
t.ImportPerformers(ctx)
t.ImportStudios(ctx)
t.ImportMovies(ctx)
t.ImportFiles(ctx)
2019-02-09 12:30:49 +00:00
t.ImportGalleries(ctx)
t.ImportScrapedItems(ctx)
t.ImportScenes(ctx)
t.ImportImages(ctx)
2019-02-09 12:30:49 +00:00
}
func (t *ImportTask) unzipFile() error {
defer func() {
2021-01-31 21:15:10 +00:00
err := os.Remove(t.TmpZip)
2019-02-09 12:30:49 +00:00
if err != nil {
2021-01-31 21:15:10 +00:00
logger.Errorf("error removing temporary zip file %s: %s", t.TmpZip, err.Error())
}
}()
2019-02-09 12:30:49 +00:00
// now we can read the zip file
2021-01-31 21:15:10 +00:00
r, err := zip.OpenReader(t.TmpZip)
if err != nil {
return err
}
defer r.Close()
2019-02-09 12:30:49 +00:00
for _, f := range r.File {
fn := filepath.Join(t.BaseDir, f.Name)
if f.FileInfo().IsDir() {
Errcheck phase 1 (#1715) * Avoid redundant logging in migrations Return the error and let the caller handle the logging of the error if needed. While here, defer m.Close() to the function boundary. * Treat errors as values Use %v rather than %s and pass the errors directly. * Generate a wrapped error on stat-failure * Log 3 unchecked errors Rather than ignore errors, log them at the WARNING log level. The server has been functioning without these, so assume they are not at the ERROR level. * Propagate errors upward Failure in path generation was ignored. Propagate the errors upward the call stack, so it can be handled at the level of orchestration. * Warn on errors Log errors rather than quenching them. Errors are logged at the Warn-level for now. * Check error when creating test databases Use the builtin log package and stop the program fatally on error. * Add warnings to uncheck task errors Focus on the task system in a single commit, logging unchecked errors as warnings. * Warn-on-error in API routes Look through the API routes, and make sure errors are being logged if they occur. Prefer the Warn-log-level because none of these has proven to be fatal in the system up until now. * Propagate error when adding Util API * Propagate error on adding util API * Return unhandled error * JS log API: propagate and log errors * JS Plugins: log GQL addition failures. * Warn on failure to write to stdin * Warn on failure to stop task * Wrap viper.BindEnv The current viper code only errors if no name is provided, so it should never fail. Rewrite the code flow to factor through a panic-function. This removes error warnings from this part of the code. * Log errors in concurrency test If we can't initialize the configuration, treat the test as a failure. * Warn on errors in configuration code * Plug an unchecked error in gallery zip walking * Warn on screenshot serving failure * Warn on encoder screenshot failure * Warn on errors in path-handling code * Undo the errcheck on configurations for now. * Use one-line initializers where applicable rather than using err := f() if err!= nil { .. prefer the shorter if err := f(); err != nil { .. If f() isn't too long of a name, or wraps a function with a body.
2021-09-20 23:34:25 +00:00
if err := os.MkdirAll(fn, os.ModePerm); err != nil {
logger.Warnf("couldn't create directory %v while unzipping import file: %v", fn, err)
}
continue
2019-02-09 12:30:49 +00:00
}
if err := os.MkdirAll(filepath.Dir(fn), os.ModePerm); err != nil {
return err
2019-02-09 12:30:49 +00:00
}
o, err := os.OpenFile(fn, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode())
if err != nil {
return err
2019-02-09 12:30:49 +00:00
}
i, err := f.Open()
if err != nil {
o.Close()
return err
2019-02-09 12:30:49 +00:00
}
if _, err := io.Copy(o, i); err != nil {
o.Close()
i.Close()
return err
2019-02-09 12:30:49 +00:00
}
o.Close()
i.Close()
}
return nil
}
func (t *ImportTask) ImportPerformers(ctx context.Context) {
logger.Info("[performers] importing")
path := t.json.json.Performers
files, err := os.ReadDir(path)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
logger.Errorf("[performers] failed to read performers directory: %v", err)
}
return
}
for i, fi := range files {
index := i + 1
performerJSON, err := jsonschema.LoadPerformerFile(filepath.Join(path, fi.Name()))
if err != nil {
logger.Errorf("[performers] failed to read json: %s", err.Error())
continue
2019-02-09 12:30:49 +00:00
}
logger.Progressf("[performers] %d of %d", index, len(files))
if err := t.txnManager.WithTxn(ctx, func(ctx context.Context) error {
r := t.txnManager
readerWriter := r.Performer
importer := &performer.Importer{
ReaderWriter: readerWriter,
TagWriter: r.Tag,
Input: *performerJSON,
}
return performImport(ctx, importer, t.DuplicateBehaviour)
}); err != nil {
logger.Errorf("[performers] <%s> import failed: %s", fi.Name(), err.Error())
}
2019-02-09 12:30:49 +00:00
}
logger.Info("[performers] import complete")
}
func (t *ImportTask) ImportStudios(ctx context.Context) {
pendingParent := make(map[string][]*jsonschema.Studio)
2019-02-09 12:30:49 +00:00
logger.Info("[studios] importing")
path := t.json.json.Studios
files, err := os.ReadDir(path)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
logger.Errorf("[studios] failed to read studios directory: %v", err)
}
return
}
for i, fi := range files {
2019-02-09 12:30:49 +00:00
index := i + 1
studioJSON, err := jsonschema.LoadStudioFile(filepath.Join(path, fi.Name()))
2019-02-09 12:30:49 +00:00
if err != nil {
logger.Errorf("[studios] failed to read json: %s", err.Error())
continue
}
logger.Progressf("[studios] %d of %d", index, len(files))
2019-02-09 12:30:49 +00:00
if err := t.txnManager.WithTxn(ctx, func(ctx context.Context) error {
return t.ImportStudio(ctx, studioJSON, pendingParent, t.txnManager.Studio)
}); err != nil {
Errorlint sweep + minor linter tweaks (#1796) * Replace error assertions with Go 1.13 style Use `errors.As(..)` over type assertions. This enables better use of wrapped errors in the future, and lets us pass some errorlint checks in the process. The rewrite is entirely mechanical, and uses a standard idiom for doing so. * Use Go 1.13's errors.Is(..) Rather than directly checking for error equality, use errors.Is(..). This protects against error wrapping issues in the future. Even though something like sql.ErrNoRows doesn't need the wrapping, do so anyway, for the sake of consistency throughout the code base. The change almost lets us pass the `errorlint` Go checker except for a missing case in `js.go` which is to be handled separately; it isn't mechanical, like these changes are. * Remove goconst goconst isn't a useful linter in many cases, because it's false positive rate is high. It's 100% for the current code base. * Avoid direct comparison of errors in recover() Assert that we are catching an error from recover(). If we are, check that the error caught matches errStop. * Enable the "errorlint" checker Configure the checker to avoid checking for errorf wraps. These are often false positives since the suggestion is to blanket wrap errors with %w, and that exposes the underlying API which you might not want to do. The other warnings are good however, and with the current patch stack, the code base passes all these checks as well. * Configure rowserrcheck The project uses sqlx. Configure rowserrcheck to include said package. * Mechanically rewrite a large set of errors Mechanically search for errors that look like fmt.Errorf("...%s", err.Error()) and rewrite those into fmt.Errorf("...%v", err) The `fmt` package is error-aware and knows how to call err.Error() itself. The rationale is that this is more idiomatic Go; it paves the way for using error wrapping later with %w in some sites. This patch only addresses the entirely mechanical rewriting caught by a project-side search/replace. There are more individual sites not addressed by this patch.
2021-10-12 03:03:08 +00:00
if errors.Is(err, studio.ErrParentStudioNotExist) {
// add to the pending parent list so that it is created after the parent
s := pendingParent[studioJSON.ParentStudio]
s = append(s, studioJSON)
pendingParent[studioJSON.ParentStudio] = s
continue
}
logger.Errorf("[studios] <%s> failed to create: %s", fi.Name(), err.Error())
continue
}
}
2019-02-09 12:30:49 +00:00
// create the leftover studios, warning for missing parents
if len(pendingParent) > 0 {
logger.Warnf("[studios] importing studios with missing parents")
for _, s := range pendingParent {
for _, orphanStudioJSON := range s {
if err := t.txnManager.WithTxn(ctx, func(ctx context.Context) error {
return t.ImportStudio(ctx, orphanStudioJSON, nil, t.txnManager.Studio)
}); err != nil {
logger.Errorf("[studios] <%s> failed to create: %s", orphanStudioJSON.Name, err.Error())
continue
}
}
2019-02-09 12:30:49 +00:00
}
}
logger.Info("[studios] import complete")
}
func (t *ImportTask) ImportStudio(ctx context.Context, studioJSON *jsonschema.Studio, pendingParent map[string][]*jsonschema.Studio, readerWriter studio.NameFinderCreatorUpdater) error {
importer := &studio.Importer{
ReaderWriter: readerWriter,
Input: *studioJSON,
MissingRefBehaviour: t.MissingRefBehaviour,
}
// first phase: return error if parent does not exist
if pendingParent != nil {
importer.MissingRefBehaviour = models.ImportMissingRefEnumFail
}
if err := performImport(ctx, importer, t.DuplicateBehaviour); err != nil {
return err
}
// now create the studios pending this studios creation
s := pendingParent[studioJSON.Name]
for _, childStudioJSON := range s {
// map is nil since we're not checking parent studios at this point
if err := t.ImportStudio(ctx, childStudioJSON, nil, readerWriter); err != nil {
return fmt.Errorf("failed to create child studio <%s>: %s", childStudioJSON.Name, err.Error())
}
}
// delete the entry from the map so that we know its not left over
delete(pendingParent, studioJSON.Name)
return nil
}
func (t *ImportTask) ImportMovies(ctx context.Context) {
logger.Info("[movies] importing")
path := t.json.json.Movies
files, err := os.ReadDir(path)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
logger.Errorf("[movies] failed to read movies directory: %v", err)
}
return
}
for i, fi := range files {
index := i + 1
movieJSON, err := jsonschema.LoadMovieFile(filepath.Join(path, fi.Name()))
if err != nil {
logger.Errorf("[movies] failed to read json: %s", err.Error())
continue
}
logger.Progressf("[movies] %d of %d", index, len(files))
if err := t.txnManager.WithTxn(ctx, func(ctx context.Context) error {
r := t.txnManager
readerWriter := r.Movie
studioReaderWriter := r.Studio
movieImporter := &movie.Importer{
ReaderWriter: readerWriter,
StudioWriter: studioReaderWriter,
Input: *movieJSON,
MissingRefBehaviour: t.MissingRefBehaviour,
}
return performImport(ctx, movieImporter, t.DuplicateBehaviour)
}); err != nil {
logger.Errorf("[movies] <%s> import failed: %s", fi.Name(), err.Error())
continue
}
}
logger.Info("[movies] import complete")
}
func (t *ImportTask) ImportFiles(ctx context.Context) {
logger.Info("[files] importing")
path := t.json.json.Files
files, err := os.ReadDir(path)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
logger.Errorf("[files] failed to read files directory: %v", err)
}
return
}
pendingParent := make(map[string][]jsonschema.DirEntry)
for i, fi := range files {
index := i + 1
fileJSON, err := jsonschema.LoadFileFile(filepath.Join(path, fi.Name()))
if err != nil {
logger.Errorf("[files] failed to read json: %s", err.Error())
continue
}
logger.Progressf("[files] %d of %d", index, len(files))
if err := t.txnManager.WithTxn(ctx, func(ctx context.Context) error {
return t.ImportFile(ctx, fileJSON, pendingParent)
}); err != nil {
if errors.Is(err, errZipFileNotExist) {
// add to the pending parent list so that it is created after the parent
s := pendingParent[fileJSON.DirEntry().ZipFile]
s = append(s, fileJSON)
pendingParent[fileJSON.DirEntry().ZipFile] = s
continue
}
logger.Errorf("[files] <%s> failed to create: %s", fi.Name(), err.Error())
continue
}
}
// create the leftover studios, warning for missing parents
if len(pendingParent) > 0 {
logger.Warnf("[files] importing files with missing zip files")
for _, s := range pendingParent {
for _, orphanFileJSON := range s {
if err := t.txnManager.WithTxn(ctx, func(ctx context.Context) error {
return t.ImportFile(ctx, orphanFileJSON, nil)
}); err != nil {
logger.Errorf("[files] <%s> failed to create: %s", orphanFileJSON.DirEntry().Path, err.Error())
continue
}
}
}
}
logger.Info("[files] import complete")
}
func (t *ImportTask) ImportFile(ctx context.Context, fileJSON jsonschema.DirEntry, pendingParent map[string][]jsonschema.DirEntry) error {
r := t.txnManager
readerWriter := r.File
fileImporter := &fileFolderImporter{
ReaderWriter: readerWriter,
FolderStore: r.Folder,
Input: fileJSON,
}
// ignore duplicate files - don't overwrite
if err := performImport(ctx, fileImporter, ImportDuplicateEnumIgnore); err != nil {
return err
}
// now create the files pending this file's creation
s := pendingParent[fileJSON.DirEntry().Path]
for _, childFileJSON := range s {
// map is nil since we're not checking parent studios at this point
if err := t.ImportFile(ctx, childFileJSON, nil); err != nil {
return fmt.Errorf("failed to create child file <%s>: %s", childFileJSON.DirEntry().Path, err.Error())
}
}
// delete the entry from the map so that we know its not left over
delete(pendingParent, fileJSON.DirEntry().Path)
return nil
}
2019-02-09 12:30:49 +00:00
func (t *ImportTask) ImportGalleries(ctx context.Context) {
logger.Info("[galleries] importing")
2019-02-09 12:30:49 +00:00
path := t.json.json.Galleries
files, err := os.ReadDir(path)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
logger.Errorf("[galleries] failed to read galleries directory: %v", err)
}
return
}
for i, fi := range files {
2019-02-09 12:30:49 +00:00
index := i + 1
galleryJSON, err := jsonschema.LoadGalleryFile(filepath.Join(path, fi.Name()))
if err != nil {
logger.Errorf("[galleries] failed to read json: %s", err.Error())
continue
}
2019-02-09 12:30:49 +00:00
logger.Progressf("[galleries] %d of %d", index, len(files))
2019-02-09 12:30:49 +00:00
if err := t.txnManager.WithTxn(ctx, func(ctx context.Context) error {
r := t.txnManager
readerWriter := r.Gallery
tagWriter := r.Tag
performerWriter := r.Performer
studioWriter := r.Studio
2019-02-09 12:30:49 +00:00
galleryImporter := &gallery.Importer{
ReaderWriter: readerWriter,
FolderFinder: r.Folder,
FileFinder: r.File,
PerformerWriter: performerWriter,
StudioWriter: studioWriter,
TagWriter: tagWriter,
Input: *galleryJSON,
MissingRefBehaviour: t.MissingRefBehaviour,
}
2019-02-09 12:30:49 +00:00
return performImport(ctx, galleryImporter, t.DuplicateBehaviour)
}); err != nil {
logger.Errorf("[galleries] <%s> import failed to commit: %s", fi.Name(), err.Error())
continue
}
2019-02-09 12:30:49 +00:00
}
2019-02-09 12:30:49 +00:00
logger.Info("[galleries] import complete")
}
func (t *ImportTask) ImportTags(ctx context.Context) {
Tag hierarchy (#1519) * Add migration script for tag relations table * Expand hierarchical filter features Expand the features of the hierarchical multi input filter with support for using a relations table, which only has parent_id and child_id columns, and support adding an additional intermediate table to join on, for example for scenes and tags which are linked by the scenes_tags table as well. * Add hierarchical filtering for tags * Add hierarchical tags support to scene markers Refactor filtering of scene markers to filterBuilder and in the process add support for hierarchical tags as well. * List parent and child tags on tag details page * Support setting parent and child tags Add support for setting parent and child tags during tag creation and tag updates. * Validate no loops are created in tags hierarchy * Update tag merging to support tag hierarcy * Add unit tests for tags.EnsureUniqueHierarchy * Fix applying recursive to with clause The SQL `RECURSIVE` of a `WITH` clause only needs to be applied once, imediately after the `WITH`. So this fixes the query building to do just that, automatically applying the `RECURSIVE` keyword when any added with clause is added as recursive. * Rename hierarchical root id column * Rewrite hierarchical filtering for performance Completely rewrite the hierarchical filtering to optimize for performance. Doing the recursive query in combination with a complex query seems to break SQLite optimizing some things which means that the recursive part might be 2,5 second slower than adding a static `VALUES()` list. This is mostly noticable in case of the tag hierarchy where setting an exclusion with any depth (or depth: all) being applied has this performance impact of 2,5 second. "Include" also suffered this issue, but some rewritten query by joining in the *_tags table in one pass and applying a `WHERE x IS NOT NULL` filter did seem to optimize that case. But that optimization isn't applied to the `IS NULL` filter of "exclude". Running a simple query beforehand to get all (recursive) items and then applying them to the query doesn't have this performance penalty. * Remove UI references to child studios and tags * Add parents to tag export * Support importing of parent relationship for tags * Assign stable ids to parent / child badges * Silence Apollo warning on parents/children fields on tags Silence warning triggered by Apollo GraphQL by explicitly instructing it to use the incoming parents/children values. By default it already does this, but it triggers a warning as it might be unintended that it uses the incoming values (instead of for example merging both arrays). Setting merge to false still applies the same behaviour (use only incoming values) but silences the warning as it's explicitly configured to work like this. * Rework detecting unique tag hierarchy Completely rework the unique tag hierarchy to detect invalid hierarchies for which a tag is "added in the middle". So when there are tags A <- B and A <- C, you could previously edit tag B and add tag C as a sub tag without it being noticed as parent A being applied twice (to tag C). While afterwards saving tag C would fail as tag A was applied as parent twice. The updated code correctly detects this scenario as well. Furthermore the error messaging has been reworked a bit and the message now mentions both the direct parent / sub tag as well as the tag which would results in the error. So in aboves example it would now show the message that tag C can't be applied because tag A already is a parent. * Update relations on cached tags when needed Update the relations on cached tags when a tag is created / updated / deleted so these always reflect the correct state. Otherwise (re)opening a tag might still show the old relations untill the page is fully reloaded or the list is navigated. But this obviously is strange when you for example have tag A, create or update tag B to have a relation to tag A, and from tags B page click through to tag A and it doesn't show that it is linked to tag B.
2021-09-09 04:58:43 +00:00
pendingParent := make(map[string][]*jsonschema.Tag)
logger.Info("[tags] importing")
2019-02-09 12:30:49 +00:00
path := t.json.json.Tags
files, err := os.ReadDir(path)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
logger.Errorf("[tags] failed to read tags directory: %v", err)
}
return
}
for i, fi := range files {
2019-02-09 12:30:49 +00:00
index := i + 1
tagJSON, err := jsonschema.LoadTagFile(filepath.Join(path, fi.Name()))
2019-02-09 12:30:49 +00:00
if err != nil {
logger.Errorf("[tags] failed to read json: %s", err.Error())
2019-02-09 12:30:49 +00:00
continue
}
logger.Progressf("[tags] %d of %d", index, len(files))
if err := t.txnManager.WithTxn(ctx, func(ctx context.Context) error {
return t.ImportTag(ctx, tagJSON, pendingParent, false, t.txnManager.Tag)
Tag hierarchy (#1519) * Add migration script for tag relations table * Expand hierarchical filter features Expand the features of the hierarchical multi input filter with support for using a relations table, which only has parent_id and child_id columns, and support adding an additional intermediate table to join on, for example for scenes and tags which are linked by the scenes_tags table as well. * Add hierarchical filtering for tags * Add hierarchical tags support to scene markers Refactor filtering of scene markers to filterBuilder and in the process add support for hierarchical tags as well. * List parent and child tags on tag details page * Support setting parent and child tags Add support for setting parent and child tags during tag creation and tag updates. * Validate no loops are created in tags hierarchy * Update tag merging to support tag hierarcy * Add unit tests for tags.EnsureUniqueHierarchy * Fix applying recursive to with clause The SQL `RECURSIVE` of a `WITH` clause only needs to be applied once, imediately after the `WITH`. So this fixes the query building to do just that, automatically applying the `RECURSIVE` keyword when any added with clause is added as recursive. * Rename hierarchical root id column * Rewrite hierarchical filtering for performance Completely rewrite the hierarchical filtering to optimize for performance. Doing the recursive query in combination with a complex query seems to break SQLite optimizing some things which means that the recursive part might be 2,5 second slower than adding a static `VALUES()` list. This is mostly noticable in case of the tag hierarchy where setting an exclusion with any depth (or depth: all) being applied has this performance impact of 2,5 second. "Include" also suffered this issue, but some rewritten query by joining in the *_tags table in one pass and applying a `WHERE x IS NOT NULL` filter did seem to optimize that case. But that optimization isn't applied to the `IS NULL` filter of "exclude". Running a simple query beforehand to get all (recursive) items and then applying them to the query doesn't have this performance penalty. * Remove UI references to child studios and tags * Add parents to tag export * Support importing of parent relationship for tags * Assign stable ids to parent / child badges * Silence Apollo warning on parents/children fields on tags Silence warning triggered by Apollo GraphQL by explicitly instructing it to use the incoming parents/children values. By default it already does this, but it triggers a warning as it might be unintended that it uses the incoming values (instead of for example merging both arrays). Setting merge to false still applies the same behaviour (use only incoming values) but silences the warning as it's explicitly configured to work like this. * Rework detecting unique tag hierarchy Completely rework the unique tag hierarchy to detect invalid hierarchies for which a tag is "added in the middle". So when there are tags A <- B and A <- C, you could previously edit tag B and add tag C as a sub tag without it being noticed as parent A being applied twice (to tag C). While afterwards saving tag C would fail as tag A was applied as parent twice. The updated code correctly detects this scenario as well. Furthermore the error messaging has been reworked a bit and the message now mentions both the direct parent / sub tag as well as the tag which would results in the error. So in aboves example it would now show the message that tag C can't be applied because tag A already is a parent. * Update relations on cached tags when needed Update the relations on cached tags when a tag is created / updated / deleted so these always reflect the correct state. Otherwise (re)opening a tag might still show the old relations untill the page is fully reloaded or the list is navigated. But this obviously is strange when you for example have tag A, create or update tag B to have a relation to tag A, and from tags B page click through to tag A and it doesn't show that it is linked to tag B.
2021-09-09 04:58:43 +00:00
}); err != nil {
Errorlint sweep + minor linter tweaks (#1796) * Replace error assertions with Go 1.13 style Use `errors.As(..)` over type assertions. This enables better use of wrapped errors in the future, and lets us pass some errorlint checks in the process. The rewrite is entirely mechanical, and uses a standard idiom for doing so. * Use Go 1.13's errors.Is(..) Rather than directly checking for error equality, use errors.Is(..). This protects against error wrapping issues in the future. Even though something like sql.ErrNoRows doesn't need the wrapping, do so anyway, for the sake of consistency throughout the code base. The change almost lets us pass the `errorlint` Go checker except for a missing case in `js.go` which is to be handled separately; it isn't mechanical, like these changes are. * Remove goconst goconst isn't a useful linter in many cases, because it's false positive rate is high. It's 100% for the current code base. * Avoid direct comparison of errors in recover() Assert that we are catching an error from recover(). If we are, check that the error caught matches errStop. * Enable the "errorlint" checker Configure the checker to avoid checking for errorf wraps. These are often false positives since the suggestion is to blanket wrap errors with %w, and that exposes the underlying API which you might not want to do. The other warnings are good however, and with the current patch stack, the code base passes all these checks as well. * Configure rowserrcheck The project uses sqlx. Configure rowserrcheck to include said package. * Mechanically rewrite a large set of errors Mechanically search for errors that look like fmt.Errorf("...%s", err.Error()) and rewrite those into fmt.Errorf("...%v", err) The `fmt` package is error-aware and knows how to call err.Error() itself. The rationale is that this is more idiomatic Go; it paves the way for using error wrapping later with %w in some sites. This patch only addresses the entirely mechanical rewriting caught by a project-side search/replace. There are more individual sites not addressed by this patch.
2021-10-12 03:03:08 +00:00
var parentError tag.ParentTagNotExistError
if errors.As(err, &parentError) {
Tag hierarchy (#1519) * Add migration script for tag relations table * Expand hierarchical filter features Expand the features of the hierarchical multi input filter with support for using a relations table, which only has parent_id and child_id columns, and support adding an additional intermediate table to join on, for example for scenes and tags which are linked by the scenes_tags table as well. * Add hierarchical filtering for tags * Add hierarchical tags support to scene markers Refactor filtering of scene markers to filterBuilder and in the process add support for hierarchical tags as well. * List parent and child tags on tag details page * Support setting parent and child tags Add support for setting parent and child tags during tag creation and tag updates. * Validate no loops are created in tags hierarchy * Update tag merging to support tag hierarcy * Add unit tests for tags.EnsureUniqueHierarchy * Fix applying recursive to with clause The SQL `RECURSIVE` of a `WITH` clause only needs to be applied once, imediately after the `WITH`. So this fixes the query building to do just that, automatically applying the `RECURSIVE` keyword when any added with clause is added as recursive. * Rename hierarchical root id column * Rewrite hierarchical filtering for performance Completely rewrite the hierarchical filtering to optimize for performance. Doing the recursive query in combination with a complex query seems to break SQLite optimizing some things which means that the recursive part might be 2,5 second slower than adding a static `VALUES()` list. This is mostly noticable in case of the tag hierarchy where setting an exclusion with any depth (or depth: all) being applied has this performance impact of 2,5 second. "Include" also suffered this issue, but some rewritten query by joining in the *_tags table in one pass and applying a `WHERE x IS NOT NULL` filter did seem to optimize that case. But that optimization isn't applied to the `IS NULL` filter of "exclude". Running a simple query beforehand to get all (recursive) items and then applying them to the query doesn't have this performance penalty. * Remove UI references to child studios and tags * Add parents to tag export * Support importing of parent relationship for tags * Assign stable ids to parent / child badges * Silence Apollo warning on parents/children fields on tags Silence warning triggered by Apollo GraphQL by explicitly instructing it to use the incoming parents/children values. By default it already does this, but it triggers a warning as it might be unintended that it uses the incoming values (instead of for example merging both arrays). Setting merge to false still applies the same behaviour (use only incoming values) but silences the warning as it's explicitly configured to work like this. * Rework detecting unique tag hierarchy Completely rework the unique tag hierarchy to detect invalid hierarchies for which a tag is "added in the middle". So when there are tags A <- B and A <- C, you could previously edit tag B and add tag C as a sub tag without it being noticed as parent A being applied twice (to tag C). While afterwards saving tag C would fail as tag A was applied as parent twice. The updated code correctly detects this scenario as well. Furthermore the error messaging has been reworked a bit and the message now mentions both the direct parent / sub tag as well as the tag which would results in the error. So in aboves example it would now show the message that tag C can't be applied because tag A already is a parent. * Update relations on cached tags when needed Update the relations on cached tags when a tag is created / updated / deleted so these always reflect the correct state. Otherwise (re)opening a tag might still show the old relations untill the page is fully reloaded or the list is navigated. But this obviously is strange when you for example have tag A, create or update tag B to have a relation to tag A, and from tags B page click through to tag A and it doesn't show that it is linked to tag B.
2021-09-09 04:58:43 +00:00
pendingParent[parentError.MissingParent()] = append(pendingParent[parentError.MissingParent()], tagJSON)
continue
}
2019-02-09 12:30:49 +00:00
logger.Errorf("[tags] <%s> failed to import: %s", fi.Name(), err.Error())
continue
2019-02-09 12:30:49 +00:00
}
}
Tag hierarchy (#1519) * Add migration script for tag relations table * Expand hierarchical filter features Expand the features of the hierarchical multi input filter with support for using a relations table, which only has parent_id and child_id columns, and support adding an additional intermediate table to join on, for example for scenes and tags which are linked by the scenes_tags table as well. * Add hierarchical filtering for tags * Add hierarchical tags support to scene markers Refactor filtering of scene markers to filterBuilder and in the process add support for hierarchical tags as well. * List parent and child tags on tag details page * Support setting parent and child tags Add support for setting parent and child tags during tag creation and tag updates. * Validate no loops are created in tags hierarchy * Update tag merging to support tag hierarcy * Add unit tests for tags.EnsureUniqueHierarchy * Fix applying recursive to with clause The SQL `RECURSIVE` of a `WITH` clause only needs to be applied once, imediately after the `WITH`. So this fixes the query building to do just that, automatically applying the `RECURSIVE` keyword when any added with clause is added as recursive. * Rename hierarchical root id column * Rewrite hierarchical filtering for performance Completely rewrite the hierarchical filtering to optimize for performance. Doing the recursive query in combination with a complex query seems to break SQLite optimizing some things which means that the recursive part might be 2,5 second slower than adding a static `VALUES()` list. This is mostly noticable in case of the tag hierarchy where setting an exclusion with any depth (or depth: all) being applied has this performance impact of 2,5 second. "Include" also suffered this issue, but some rewritten query by joining in the *_tags table in one pass and applying a `WHERE x IS NOT NULL` filter did seem to optimize that case. But that optimization isn't applied to the `IS NULL` filter of "exclude". Running a simple query beforehand to get all (recursive) items and then applying them to the query doesn't have this performance penalty. * Remove UI references to child studios and tags * Add parents to tag export * Support importing of parent relationship for tags * Assign stable ids to parent / child badges * Silence Apollo warning on parents/children fields on tags Silence warning triggered by Apollo GraphQL by explicitly instructing it to use the incoming parents/children values. By default it already does this, but it triggers a warning as it might be unintended that it uses the incoming values (instead of for example merging both arrays). Setting merge to false still applies the same behaviour (use only incoming values) but silences the warning as it's explicitly configured to work like this. * Rework detecting unique tag hierarchy Completely rework the unique tag hierarchy to detect invalid hierarchies for which a tag is "added in the middle". So when there are tags A <- B and A <- C, you could previously edit tag B and add tag C as a sub tag without it being noticed as parent A being applied twice (to tag C). While afterwards saving tag C would fail as tag A was applied as parent twice. The updated code correctly detects this scenario as well. Furthermore the error messaging has been reworked a bit and the message now mentions both the direct parent / sub tag as well as the tag which would results in the error. So in aboves example it would now show the message that tag C can't be applied because tag A already is a parent. * Update relations on cached tags when needed Update the relations on cached tags when a tag is created / updated / deleted so these always reflect the correct state. Otherwise (re)opening a tag might still show the old relations untill the page is fully reloaded or the list is navigated. But this obviously is strange when you for example have tag A, create or update tag B to have a relation to tag A, and from tags B page click through to tag A and it doesn't show that it is linked to tag B.
2021-09-09 04:58:43 +00:00
for _, s := range pendingParent {
for _, orphanTagJSON := range s {
if err := t.txnManager.WithTxn(ctx, func(ctx context.Context) error {
return t.ImportTag(ctx, orphanTagJSON, nil, true, t.txnManager.Tag)
Tag hierarchy (#1519) * Add migration script for tag relations table * Expand hierarchical filter features Expand the features of the hierarchical multi input filter with support for using a relations table, which only has parent_id and child_id columns, and support adding an additional intermediate table to join on, for example for scenes and tags which are linked by the scenes_tags table as well. * Add hierarchical filtering for tags * Add hierarchical tags support to scene markers Refactor filtering of scene markers to filterBuilder and in the process add support for hierarchical tags as well. * List parent and child tags on tag details page * Support setting parent and child tags Add support for setting parent and child tags during tag creation and tag updates. * Validate no loops are created in tags hierarchy * Update tag merging to support tag hierarcy * Add unit tests for tags.EnsureUniqueHierarchy * Fix applying recursive to with clause The SQL `RECURSIVE` of a `WITH` clause only needs to be applied once, imediately after the `WITH`. So this fixes the query building to do just that, automatically applying the `RECURSIVE` keyword when any added with clause is added as recursive. * Rename hierarchical root id column * Rewrite hierarchical filtering for performance Completely rewrite the hierarchical filtering to optimize for performance. Doing the recursive query in combination with a complex query seems to break SQLite optimizing some things which means that the recursive part might be 2,5 second slower than adding a static `VALUES()` list. This is mostly noticable in case of the tag hierarchy where setting an exclusion with any depth (or depth: all) being applied has this performance impact of 2,5 second. "Include" also suffered this issue, but some rewritten query by joining in the *_tags table in one pass and applying a `WHERE x IS NOT NULL` filter did seem to optimize that case. But that optimization isn't applied to the `IS NULL` filter of "exclude". Running a simple query beforehand to get all (recursive) items and then applying them to the query doesn't have this performance penalty. * Remove UI references to child studios and tags * Add parents to tag export * Support importing of parent relationship for tags * Assign stable ids to parent / child badges * Silence Apollo warning on parents/children fields on tags Silence warning triggered by Apollo GraphQL by explicitly instructing it to use the incoming parents/children values. By default it already does this, but it triggers a warning as it might be unintended that it uses the incoming values (instead of for example merging both arrays). Setting merge to false still applies the same behaviour (use only incoming values) but silences the warning as it's explicitly configured to work like this. * Rework detecting unique tag hierarchy Completely rework the unique tag hierarchy to detect invalid hierarchies for which a tag is "added in the middle". So when there are tags A <- B and A <- C, you could previously edit tag B and add tag C as a sub tag without it being noticed as parent A being applied twice (to tag C). While afterwards saving tag C would fail as tag A was applied as parent twice. The updated code correctly detects this scenario as well. Furthermore the error messaging has been reworked a bit and the message now mentions both the direct parent / sub tag as well as the tag which would results in the error. So in aboves example it would now show the message that tag C can't be applied because tag A already is a parent. * Update relations on cached tags when needed Update the relations on cached tags when a tag is created / updated / deleted so these always reflect the correct state. Otherwise (re)opening a tag might still show the old relations untill the page is fully reloaded or the list is navigated. But this obviously is strange when you for example have tag A, create or update tag B to have a relation to tag A, and from tags B page click through to tag A and it doesn't show that it is linked to tag B.
2021-09-09 04:58:43 +00:00
}); err != nil {
logger.Errorf("[tags] <%s> failed to create: %s", orphanTagJSON.Name, err.Error())
continue
}
}
}
2019-02-09 12:30:49 +00:00
logger.Info("[tags] import complete")
}
func (t *ImportTask) ImportTag(ctx context.Context, tagJSON *jsonschema.Tag, pendingParent map[string][]*jsonschema.Tag, fail bool, readerWriter tag.NameFinderCreatorUpdater) error {
Tag hierarchy (#1519) * Add migration script for tag relations table * Expand hierarchical filter features Expand the features of the hierarchical multi input filter with support for using a relations table, which only has parent_id and child_id columns, and support adding an additional intermediate table to join on, for example for scenes and tags which are linked by the scenes_tags table as well. * Add hierarchical filtering for tags * Add hierarchical tags support to scene markers Refactor filtering of scene markers to filterBuilder and in the process add support for hierarchical tags as well. * List parent and child tags on tag details page * Support setting parent and child tags Add support for setting parent and child tags during tag creation and tag updates. * Validate no loops are created in tags hierarchy * Update tag merging to support tag hierarcy * Add unit tests for tags.EnsureUniqueHierarchy * Fix applying recursive to with clause The SQL `RECURSIVE` of a `WITH` clause only needs to be applied once, imediately after the `WITH`. So this fixes the query building to do just that, automatically applying the `RECURSIVE` keyword when any added with clause is added as recursive. * Rename hierarchical root id column * Rewrite hierarchical filtering for performance Completely rewrite the hierarchical filtering to optimize for performance. Doing the recursive query in combination with a complex query seems to break SQLite optimizing some things which means that the recursive part might be 2,5 second slower than adding a static `VALUES()` list. This is mostly noticable in case of the tag hierarchy where setting an exclusion with any depth (or depth: all) being applied has this performance impact of 2,5 second. "Include" also suffered this issue, but some rewritten query by joining in the *_tags table in one pass and applying a `WHERE x IS NOT NULL` filter did seem to optimize that case. But that optimization isn't applied to the `IS NULL` filter of "exclude". Running a simple query beforehand to get all (recursive) items and then applying them to the query doesn't have this performance penalty. * Remove UI references to child studios and tags * Add parents to tag export * Support importing of parent relationship for tags * Assign stable ids to parent / child badges * Silence Apollo warning on parents/children fields on tags Silence warning triggered by Apollo GraphQL by explicitly instructing it to use the incoming parents/children values. By default it already does this, but it triggers a warning as it might be unintended that it uses the incoming values (instead of for example merging both arrays). Setting merge to false still applies the same behaviour (use only incoming values) but silences the warning as it's explicitly configured to work like this. * Rework detecting unique tag hierarchy Completely rework the unique tag hierarchy to detect invalid hierarchies for which a tag is "added in the middle". So when there are tags A <- B and A <- C, you could previously edit tag B and add tag C as a sub tag without it being noticed as parent A being applied twice (to tag C). While afterwards saving tag C would fail as tag A was applied as parent twice. The updated code correctly detects this scenario as well. Furthermore the error messaging has been reworked a bit and the message now mentions both the direct parent / sub tag as well as the tag which would results in the error. So in aboves example it would now show the message that tag C can't be applied because tag A already is a parent. * Update relations on cached tags when needed Update the relations on cached tags when a tag is created / updated / deleted so these always reflect the correct state. Otherwise (re)opening a tag might still show the old relations untill the page is fully reloaded or the list is navigated. But this obviously is strange when you for example have tag A, create or update tag B to have a relation to tag A, and from tags B page click through to tag A and it doesn't show that it is linked to tag B.
2021-09-09 04:58:43 +00:00
importer := &tag.Importer{
ReaderWriter: readerWriter,
Input: *tagJSON,
MissingRefBehaviour: t.MissingRefBehaviour,
}
// first phase: return error if parent does not exist
if !fail {
importer.MissingRefBehaviour = models.ImportMissingRefEnumFail
}
if err := performImport(ctx, importer, t.DuplicateBehaviour); err != nil {
Tag hierarchy (#1519) * Add migration script for tag relations table * Expand hierarchical filter features Expand the features of the hierarchical multi input filter with support for using a relations table, which only has parent_id and child_id columns, and support adding an additional intermediate table to join on, for example for scenes and tags which are linked by the scenes_tags table as well. * Add hierarchical filtering for tags * Add hierarchical tags support to scene markers Refactor filtering of scene markers to filterBuilder and in the process add support for hierarchical tags as well. * List parent and child tags on tag details page * Support setting parent and child tags Add support for setting parent and child tags during tag creation and tag updates. * Validate no loops are created in tags hierarchy * Update tag merging to support tag hierarcy * Add unit tests for tags.EnsureUniqueHierarchy * Fix applying recursive to with clause The SQL `RECURSIVE` of a `WITH` clause only needs to be applied once, imediately after the `WITH`. So this fixes the query building to do just that, automatically applying the `RECURSIVE` keyword when any added with clause is added as recursive. * Rename hierarchical root id column * Rewrite hierarchical filtering for performance Completely rewrite the hierarchical filtering to optimize for performance. Doing the recursive query in combination with a complex query seems to break SQLite optimizing some things which means that the recursive part might be 2,5 second slower than adding a static `VALUES()` list. This is mostly noticable in case of the tag hierarchy where setting an exclusion with any depth (or depth: all) being applied has this performance impact of 2,5 second. "Include" also suffered this issue, but some rewritten query by joining in the *_tags table in one pass and applying a `WHERE x IS NOT NULL` filter did seem to optimize that case. But that optimization isn't applied to the `IS NULL` filter of "exclude". Running a simple query beforehand to get all (recursive) items and then applying them to the query doesn't have this performance penalty. * Remove UI references to child studios and tags * Add parents to tag export * Support importing of parent relationship for tags * Assign stable ids to parent / child badges * Silence Apollo warning on parents/children fields on tags Silence warning triggered by Apollo GraphQL by explicitly instructing it to use the incoming parents/children values. By default it already does this, but it triggers a warning as it might be unintended that it uses the incoming values (instead of for example merging both arrays). Setting merge to false still applies the same behaviour (use only incoming values) but silences the warning as it's explicitly configured to work like this. * Rework detecting unique tag hierarchy Completely rework the unique tag hierarchy to detect invalid hierarchies for which a tag is "added in the middle". So when there are tags A <- B and A <- C, you could previously edit tag B and add tag C as a sub tag without it being noticed as parent A being applied twice (to tag C). While afterwards saving tag C would fail as tag A was applied as parent twice. The updated code correctly detects this scenario as well. Furthermore the error messaging has been reworked a bit and the message now mentions both the direct parent / sub tag as well as the tag which would results in the error. So in aboves example it would now show the message that tag C can't be applied because tag A already is a parent. * Update relations on cached tags when needed Update the relations on cached tags when a tag is created / updated / deleted so these always reflect the correct state. Otherwise (re)opening a tag might still show the old relations untill the page is fully reloaded or the list is navigated. But this obviously is strange when you for example have tag A, create or update tag B to have a relation to tag A, and from tags B page click through to tag A and it doesn't show that it is linked to tag B.
2021-09-09 04:58:43 +00:00
return err
}
for _, childTagJSON := range pendingParent[tagJSON.Name] {
if err := t.ImportTag(ctx, childTagJSON, pendingParent, fail, readerWriter); err != nil {
Errorlint sweep + minor linter tweaks (#1796) * Replace error assertions with Go 1.13 style Use `errors.As(..)` over type assertions. This enables better use of wrapped errors in the future, and lets us pass some errorlint checks in the process. The rewrite is entirely mechanical, and uses a standard idiom for doing so. * Use Go 1.13's errors.Is(..) Rather than directly checking for error equality, use errors.Is(..). This protects against error wrapping issues in the future. Even though something like sql.ErrNoRows doesn't need the wrapping, do so anyway, for the sake of consistency throughout the code base. The change almost lets us pass the `errorlint` Go checker except for a missing case in `js.go` which is to be handled separately; it isn't mechanical, like these changes are. * Remove goconst goconst isn't a useful linter in many cases, because it's false positive rate is high. It's 100% for the current code base. * Avoid direct comparison of errors in recover() Assert that we are catching an error from recover(). If we are, check that the error caught matches errStop. * Enable the "errorlint" checker Configure the checker to avoid checking for errorf wraps. These are often false positives since the suggestion is to blanket wrap errors with %w, and that exposes the underlying API which you might not want to do. The other warnings are good however, and with the current patch stack, the code base passes all these checks as well. * Configure rowserrcheck The project uses sqlx. Configure rowserrcheck to include said package. * Mechanically rewrite a large set of errors Mechanically search for errors that look like fmt.Errorf("...%s", err.Error()) and rewrite those into fmt.Errorf("...%v", err) The `fmt` package is error-aware and knows how to call err.Error() itself. The rationale is that this is more idiomatic Go; it paves the way for using error wrapping later with %w in some sites. This patch only addresses the entirely mechanical rewriting caught by a project-side search/replace. There are more individual sites not addressed by this patch.
2021-10-12 03:03:08 +00:00
var parentError tag.ParentTagNotExistError
if errors.As(err, &parentError) {
Tag hierarchy (#1519) * Add migration script for tag relations table * Expand hierarchical filter features Expand the features of the hierarchical multi input filter with support for using a relations table, which only has parent_id and child_id columns, and support adding an additional intermediate table to join on, for example for scenes and tags which are linked by the scenes_tags table as well. * Add hierarchical filtering for tags * Add hierarchical tags support to scene markers Refactor filtering of scene markers to filterBuilder and in the process add support for hierarchical tags as well. * List parent and child tags on tag details page * Support setting parent and child tags Add support for setting parent and child tags during tag creation and tag updates. * Validate no loops are created in tags hierarchy * Update tag merging to support tag hierarcy * Add unit tests for tags.EnsureUniqueHierarchy * Fix applying recursive to with clause The SQL `RECURSIVE` of a `WITH` clause only needs to be applied once, imediately after the `WITH`. So this fixes the query building to do just that, automatically applying the `RECURSIVE` keyword when any added with clause is added as recursive. * Rename hierarchical root id column * Rewrite hierarchical filtering for performance Completely rewrite the hierarchical filtering to optimize for performance. Doing the recursive query in combination with a complex query seems to break SQLite optimizing some things which means that the recursive part might be 2,5 second slower than adding a static `VALUES()` list. This is mostly noticable in case of the tag hierarchy where setting an exclusion with any depth (or depth: all) being applied has this performance impact of 2,5 second. "Include" also suffered this issue, but some rewritten query by joining in the *_tags table in one pass and applying a `WHERE x IS NOT NULL` filter did seem to optimize that case. But that optimization isn't applied to the `IS NULL` filter of "exclude". Running a simple query beforehand to get all (recursive) items and then applying them to the query doesn't have this performance penalty. * Remove UI references to child studios and tags * Add parents to tag export * Support importing of parent relationship for tags * Assign stable ids to parent / child badges * Silence Apollo warning on parents/children fields on tags Silence warning triggered by Apollo GraphQL by explicitly instructing it to use the incoming parents/children values. By default it already does this, but it triggers a warning as it might be unintended that it uses the incoming values (instead of for example merging both arrays). Setting merge to false still applies the same behaviour (use only incoming values) but silences the warning as it's explicitly configured to work like this. * Rework detecting unique tag hierarchy Completely rework the unique tag hierarchy to detect invalid hierarchies for which a tag is "added in the middle". So when there are tags A <- B and A <- C, you could previously edit tag B and add tag C as a sub tag without it being noticed as parent A being applied twice (to tag C). While afterwards saving tag C would fail as tag A was applied as parent twice. The updated code correctly detects this scenario as well. Furthermore the error messaging has been reworked a bit and the message now mentions both the direct parent / sub tag as well as the tag which would results in the error. So in aboves example it would now show the message that tag C can't be applied because tag A already is a parent. * Update relations on cached tags when needed Update the relations on cached tags when a tag is created / updated / deleted so these always reflect the correct state. Otherwise (re)opening a tag might still show the old relations untill the page is fully reloaded or the list is navigated. But this obviously is strange when you for example have tag A, create or update tag B to have a relation to tag A, and from tags B page click through to tag A and it doesn't show that it is linked to tag B.
2021-09-09 04:58:43 +00:00
pendingParent[parentError.MissingParent()] = append(pendingParent[parentError.MissingParent()], tagJSON)
continue
}
return fmt.Errorf("failed to create child tag <%s>: %s", childTagJSON.Name, err.Error())
}
}
delete(pendingParent, tagJSON.Name)
return nil
}
2019-02-09 12:30:49 +00:00
func (t *ImportTask) ImportScrapedItems(ctx context.Context) {
if err := t.txnManager.WithTxn(ctx, func(ctx context.Context) error {
logger.Info("[scraped sites] importing")
r := t.txnManager
qb := r.ScrapedItem
sqb := r.Studio
currentTime := time.Now()
for i, mappingJSON := range t.scraped {
index := i + 1
logger.Progressf("[scraped sites] %d of %d", index, len(t.scraped))
newScrapedItem := models.ScrapedItem{
Title: sql.NullString{String: mappingJSON.Title, Valid: true},
Description: sql.NullString{String: mappingJSON.Description, Valid: true},
URL: sql.NullString{String: mappingJSON.URL, Valid: true},
Date: models.SQLiteDate{String: mappingJSON.Date, Valid: true},
Rating: sql.NullString{String: mappingJSON.Rating, Valid: true},
Tags: sql.NullString{String: mappingJSON.Tags, Valid: true},
Models: sql.NullString{String: mappingJSON.Models, Valid: true},
Episode: sql.NullInt64{Int64: int64(mappingJSON.Episode), Valid: true},
GalleryFilename: sql.NullString{String: mappingJSON.GalleryFilename, Valid: true},
GalleryURL: sql.NullString{String: mappingJSON.GalleryURL, Valid: true},
VideoFilename: sql.NullString{String: mappingJSON.VideoFilename, Valid: true},
VideoURL: sql.NullString{String: mappingJSON.VideoURL, Valid: true},
CreatedAt: models.SQLiteTimestamp{Timestamp: currentTime},
UpdatedAt: models.SQLiteTimestamp{Timestamp: t.getTimeFromJSONTime(mappingJSON.UpdatedAt)},
}
2019-02-09 12:30:49 +00:00
studio, err := sqb.FindByName(ctx, mappingJSON.Studio, false)
if err != nil {
logger.Errorf("[scraped sites] failed to fetch studio: %s", err.Error())
}
if studio != nil {
newScrapedItem.StudioID = sql.NullInt64{Int64: int64(studio.ID), Valid: true}
}
2019-02-09 12:30:49 +00:00
_, err = qb.Create(ctx, newScrapedItem)
if err != nil {
logger.Errorf("[scraped sites] <%s> failed to create: %s", newScrapedItem.Title.String, err.Error())
}
2019-02-09 12:30:49 +00:00
}
return nil
}); err != nil {
2019-02-09 12:30:49 +00:00
logger.Errorf("[scraped sites] import failed to commit: %s", err.Error())
}
2019-02-09 12:30:49 +00:00
logger.Info("[scraped sites] import complete")
}
func (t *ImportTask) ImportScenes(ctx context.Context) {
logger.Info("[scenes] importing")
2019-02-09 12:30:49 +00:00
path := t.json.json.Scenes
files, err := os.ReadDir(path)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
logger.Errorf("[scenes] failed to read scenes directory: %v", err)
}
return
}
for i, fi := range files {
2019-02-09 12:30:49 +00:00
index := i + 1
logger.Progressf("[scenes] %d of %d", index, len(files))
2019-02-09 12:30:49 +00:00
sceneJSON, err := jsonschema.LoadSceneFile(filepath.Join(path, fi.Name()))
2019-02-09 12:30:49 +00:00
if err != nil {
logger.Infof("[scenes] <%s> json parse failure: %s", fi.Name(), err.Error())
2019-02-09 12:30:49 +00:00
continue
}
if err := t.txnManager.WithTxn(ctx, func(ctx context.Context) error {
r := t.txnManager
readerWriter := r.Scene
tagWriter := r.Tag
galleryWriter := r.Gallery
movieWriter := r.Movie
performerWriter := r.Performer
studioWriter := r.Studio
markerWriter := r.SceneMarker
sceneImporter := &scene.Importer{
ReaderWriter: readerWriter,
Input: *sceneJSON,
FileFinder: r.File,
FileNamingAlgorithm: t.fileNamingAlgorithm,
MissingRefBehaviour: t.MissingRefBehaviour,
GalleryFinder: galleryWriter,
MovieWriter: movieWriter,
PerformerWriter: performerWriter,
StudioWriter: studioWriter,
TagWriter: tagWriter,
2019-02-09 12:30:49 +00:00
}
if err := performImport(ctx, sceneImporter, t.DuplicateBehaviour); err != nil {
return err
}
// import the scene markers
for _, m := range sceneJSON.Markers {
markerImporter := &scene.MarkerImporter{
SceneID: sceneImporter.ID,
Input: m,
MissingRefBehaviour: t.MissingRefBehaviour,
ReaderWriter: markerWriter,
TagWriter: tagWriter,
}
if err := performImport(ctx, markerImporter, t.DuplicateBehaviour); err != nil {
return err
}
}
2019-02-09 12:30:49 +00:00
return nil
}); err != nil {
logger.Errorf("[scenes] <%s> import failed: %s", fi.Name(), err.Error())
2019-02-09 12:30:49 +00:00
}
}
logger.Info("[scenes] import complete")
}
func (t *ImportTask) ImportImages(ctx context.Context) {
logger.Info("[images] importing")
path := t.json.json.Images
files, err := os.ReadDir(path)
if err != nil {
if !errors.Is(err, os.ErrNotExist) {
logger.Errorf("[images] failed to read images directory: %v", err)
}
return
}
for i, fi := range files {
index := i + 1
logger.Progressf("[images] %d of %d", index, len(files))
imageJSON, err := jsonschema.LoadImageFile(filepath.Join(path, fi.Name()))
if err != nil {
logger.Infof("[images] <%s> json parse failure: %s", fi.Name(), err.Error())
continue
}
if err := t.txnManager.WithTxn(ctx, func(ctx context.Context) error {
r := t.txnManager
readerWriter := r.Image
tagWriter := r.Tag
galleryWriter := r.Gallery
performerWriter := r.Performer
studioWriter := r.Studio
imageImporter := &image.Importer{
ReaderWriter: readerWriter,
FileFinder: r.File,
Input: *imageJSON,
MissingRefBehaviour: t.MissingRefBehaviour,
GalleryFinder: galleryWriter,
PerformerWriter: performerWriter,
StudioWriter: studioWriter,
TagWriter: tagWriter,
}
return performImport(ctx, imageImporter, t.DuplicateBehaviour)
}); err != nil {
logger.Errorf("[images] <%s> import failed: %s", fi.Name(), err.Error())
}
}
logger.Info("[images] import complete")
}
var currentLocation = time.Now().Location()
func (t *ImportTask) getTimeFromJSONTime(jsonTime json.JSONTime) time.Time {
if currentLocation != nil {
if jsonTime.IsZero() {
return time.Now().In(currentLocation)
} else {
return jsonTime.Time.In(currentLocation)
}
} else {
if jsonTime.IsZero() {
return time.Now()
} else {
return jsonTime.Time
}
}
}