From 9351a0b2a46db4029b42c5424052df6caa6ef23e Mon Sep 17 00:00:00 2001 From: WithoutPants <53250216+WithoutPants@users.noreply.github.com> Date: Fri, 23 Dec 2022 09:15:27 +1100 Subject: [PATCH] Add anonymise database task (#3186) --- graphql/documents/mutations/metadata.graphql | 4 + graphql/schema/schema.graphql | 3 + graphql/schema/types/metadata.graphql | 4 + internal/api/resolver_mutation_metadata.go | 52 ++ pkg/sqlite/anonymise.go | 836 ++++++++++++++++++ pkg/sqlite/anonymise_test.go | 39 + pkg/sqlite/database.go | 41 +- pkg/utils/func.go | 12 + .../Settings/Tasks/DataManagementTasks.tsx | 65 +- .../Settings/Tasks/SettingsTasksPanel.tsx | 16 +- ui/v2.5/src/core/StashService.ts | 6 + ui/v2.5/src/docs/en/Changelog/v0190.md | 1 + ui/v2.5/src/locales/en-GB.json | 7 +- 13 files changed, 1078 insertions(+), 8 deletions(-) create mode 100644 pkg/sqlite/anonymise.go create mode 100644 pkg/sqlite/anonymise_test.go create mode 100644 pkg/utils/func.go diff --git a/graphql/documents/mutations/metadata.graphql b/graphql/documents/mutations/metadata.graphql index 068665d9f..0d6486bed 100644 --- a/graphql/documents/mutations/metadata.graphql +++ b/graphql/documents/mutations/metadata.graphql @@ -41,3 +41,7 @@ mutation MigrateHashNaming { mutation BackupDatabase($input: BackupDatabaseInput!) { backupDatabase(input: $input) } + +mutation AnonymiseDatabase($input: AnonymiseDatabaseInput!) { + anonymiseDatabase(input: $input) +} diff --git a/graphql/schema/schema.graphql b/graphql/schema/schema.graphql index 959e52b99..6f17704bb 100644 --- a/graphql/schema/schema.graphql +++ b/graphql/schema/schema.graphql @@ -281,6 +281,9 @@ type Mutation { metadataIdentify(input: IdentifyMetadataInput!): ID! """Migrate generated files for the current hash naming""" migrateHashNaming: ID! + + """Anonymise the database in a separate file. Optionally returns a link to download the database file""" + anonymiseDatabase(input: AnonymiseDatabaseInput!): String """Reload scrapers""" reloadScrapers: Boolean! diff --git a/graphql/schema/types/metadata.graphql b/graphql/schema/types/metadata.graphql index 96784ee9d..bf3ee2566 100644 --- a/graphql/schema/types/metadata.graphql +++ b/graphql/schema/types/metadata.graphql @@ -263,6 +263,10 @@ input BackupDatabaseInput { download: Boolean } +input AnonymiseDatabaseInput { + download: Boolean +} + enum SystemStatusEnum { SETUP NEEDS_MIGRATION diff --git a/internal/api/resolver_mutation_metadata.go b/internal/api/resolver_mutation_metadata.go index 040dc9fc1..6b0eba66f 100644 --- a/internal/api/resolver_mutation_metadata.go +++ b/internal/api/resolver_mutation_metadata.go @@ -156,3 +156,55 @@ func (r *mutationResolver) BackupDatabase(ctx context.Context, input BackupDatab return nil, nil } + +func (r *mutationResolver) AnonymiseDatabase(ctx context.Context, input AnonymiseDatabaseInput) (*string, error) { + // if download is true, then backup to temporary file and return a link + download := input.Download != nil && *input.Download + mgr := manager.GetInstance() + database := mgr.Database + var outPath string + if download { + if err := fsutil.EnsureDir(mgr.Paths.Generated.Downloads); err != nil { + return nil, fmt.Errorf("could not create backup directory %v: %w", mgr.Paths.Generated.Downloads, err) + } + f, err := os.CreateTemp(mgr.Paths.Generated.Downloads, "anonymous*.sqlite") + if err != nil { + return nil, err + } + + outPath = f.Name() + f.Close() + } else { + backupDirectoryPath := mgr.Config.GetBackupDirectoryPathOrDefault() + if backupDirectoryPath != "" { + if err := fsutil.EnsureDir(backupDirectoryPath); err != nil { + return nil, fmt.Errorf("could not create backup directory %v: %w", backupDirectoryPath, err) + } + } + outPath = database.AnonymousDatabasePath(backupDirectoryPath) + } + + err := database.Anonymise(outPath) + if err != nil { + logger.Errorf("Error anonymising database: %v", err) + return nil, err + } + + if download { + downloadHash, err := mgr.DownloadStore.RegisterFile(outPath, "", false) + if err != nil { + return nil, fmt.Errorf("error registering file for download: %w", err) + } + logger.Debugf("Generated anonymised file %s with hash %s", outPath, downloadHash) + + baseURL, _ := ctx.Value(BaseURLCtxKey).(string) + + fn := filepath.Base(database.DatabaseBackupPath("")) + ret := baseURL + "/downloads/" + downloadHash + "/" + fn + return &ret, nil + } else { + logger.Infof("Successfully anonymised database to: %s", outPath) + } + + return nil, nil +} diff --git a/pkg/sqlite/anonymise.go b/pkg/sqlite/anonymise.go new file mode 100644 index 000000000..d980b8d72 --- /dev/null +++ b/pkg/sqlite/anonymise.go @@ -0,0 +1,836 @@ +package sqlite + +import ( + "context" + "crypto/rand" + "database/sql" + "fmt" + "math/big" + "path/filepath" + "strings" + "unicode" + + "github.com/doug-martin/goqu/v9" + "github.com/doug-martin/goqu/v9/exp" + "github.com/jmoiron/sqlx" + "github.com/stashapp/stash/pkg/logger" + "github.com/stashapp/stash/pkg/txn" + "github.com/stashapp/stash/pkg/utils" +) + +const ( + letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + hex = "0123456789abcdef" +) + +type Anonymiser struct { + *Database +} + +func NewAnonymiser(db *Database, outPath string) (*Anonymiser, error) { + if _, err := db.db.Exec(fmt.Sprintf(`VACUUM INTO "%s"`, outPath)); err != nil { + return nil, fmt.Errorf("vacuuming into %s: %w", outPath, err) + } + + newDB := NewDatabase() + if err := newDB.Open(outPath); err != nil { + return nil, fmt.Errorf("opening %s: %w", outPath, err) + } + + return &Anonymiser{Database: newDB}, nil +} + +func (db *Anonymiser) Anonymise(ctx context.Context) error { + if err := func() error { + defer db.Close() + + return utils.Do([]func() error{ + func() error { return db.deleteBlobs() }, + func() error { return db.deleteStashIDs() }, + func() error { return db.anonymiseFolders(ctx) }, + func() error { return db.anonymiseFiles(ctx) }, + func() error { return db.anonymiseFingerprints(ctx) }, + func() error { return db.anonymiseScenes(ctx) }, + func() error { return db.anonymiseImages(ctx) }, + func() error { return db.anonymiseGalleries(ctx) }, + func() error { return db.anonymisePerformers(ctx) }, + func() error { return db.anonymiseStudios(ctx) }, + func() error { return db.anonymiseTags(ctx) }, + func() error { return db.anonymiseMovies(ctx) }, + func() error { db.optimise(); return nil }, + }) + }(); err != nil { + // delete the database + _ = db.Remove() + + return err + } + + return nil +} + +func (db *Anonymiser) truncateTable(tableName string) error { + _, err := db.db.Exec("DELETE FROM " + tableName) + return err +} + +func (db *Anonymiser) deleteBlobs() error { + return utils.Do([]func() error{ + func() error { return db.truncateTable("scenes_cover") }, + func() error { return db.truncateTable("movies_images") }, + func() error { return db.truncateTable("performers_image") }, + func() error { return db.truncateTable("studios_image") }, + func() error { return db.truncateTable("tags_image") }, + }) +} + +func (db *Anonymiser) deleteStashIDs() error { + return utils.Do([]func() error{ + func() error { return db.truncateTable("scene_stash_ids") }, + func() error { return db.truncateTable("studio_stash_ids") }, + func() error { return db.truncateTable("performer_stash_ids") }, + }) +} + +func (db *Anonymiser) anonymiseFolders(ctx context.Context) error { + logger.Infof("Anonymising folders") + return txn.WithTxn(ctx, db, func(ctx context.Context) error { + return db.anonymiseFoldersRecurse(ctx, 0, "") + }) +} + +func (db *Anonymiser) anonymiseFoldersRecurse(ctx context.Context, parentFolderID int, parentPath string) error { + table := folderTableMgr.table + + stmt := dialect.Update(table) + + if parentFolderID == 0 { + stmt = stmt.Set(goqu.Record{"path": goqu.Cast(table.Col(idColumn), "VARCHAR")}).Where(table.Col("parent_folder_id").IsNull()) + } else { + stmt = stmt.Prepared(true).Set(goqu.Record{ + "path": goqu.L("? || ? || id", parentPath, string(filepath.Separator)), + }).Where(table.Col("parent_folder_id").Eq(parentFolderID)) + } + + if _, err := exec(ctx, stmt); err != nil { + return fmt.Errorf("anonymising %s: %w", table.GetTable(), err) + } + + // now recurse to sub-folders + query := dialect.From(table).Select(table.Col(idColumn), table.Col("path")) + if parentFolderID == 0 { + query = query.Where(table.Col("parent_folder_id").IsNull()) + } else { + query = query.Where(table.Col("parent_folder_id").Eq(parentFolderID)) + } + + const single = false + return queryFunc(ctx, query, single, func(rows *sqlx.Rows) error { + var id int + var path string + if err := rows.Scan(&id, &path); err != nil { + return err + } + + return db.anonymiseFoldersRecurse(ctx, id, path) + }) +} + +func (db *Anonymiser) anonymiseFiles(ctx context.Context) error { + logger.Infof("Anonymising files") + return txn.WithTxn(ctx, db, func(ctx context.Context) error { + table := fileTableMgr.table + stmt := dialect.Update(table).Set(goqu.Record{"basename": goqu.Cast(table.Col(idColumn), "VARCHAR")}) + + if _, err := exec(ctx, stmt); err != nil { + return fmt.Errorf("anonymising %s: %w", table.GetTable(), err) + } + + return nil + }) +} + +func (db *Anonymiser) anonymiseFingerprints(ctx context.Context) error { + logger.Infof("Anonymising fingerprints") + table := fingerprintTableMgr.table + lastID := 0 + lastType := "" + total := 0 + const logEvery = 10000 + + for gotSome := true; gotSome; { + if err := txn.WithTxn(ctx, db, func(ctx context.Context) error { + query := dialect.From(table).Select( + table.Col(fileIDColumn), + table.Col("type"), + table.Col("fingerprint"), + ).Where(goqu.L("(file_id, type)").Gt(goqu.L("(?, ?)", lastID, lastType))).Limit(1000) + + gotSome = false + + const single = false + return queryFunc(ctx, query, single, func(rows *sqlx.Rows) error { + var ( + id int + typ string + fingerprint string + ) + + if err := rows.Scan( + &id, + &typ, + &fingerprint, + ); err != nil { + return err + } + + if err := db.anonymiseFingerprint(ctx, table, "fingerprint", fingerprint); err != nil { + return err + } + + lastID = id + lastType = typ + + gotSome = true + total++ + + if total%logEvery == 0 { + logger.Infof("Anonymised %d fingerprints", total) + } + + return nil + }) + }); err != nil { + return err + } + } + + return nil +} + +func (db *Anonymiser) anonymiseScenes(ctx context.Context) error { + logger.Infof("Anonymising scenes") + table := sceneTableMgr.table + lastID := 0 + total := 0 + const logEvery = 10000 + + for gotSome := true; gotSome; { + if err := txn.WithTxn(ctx, db, func(ctx context.Context) error { + query := dialect.From(table).Select( + table.Col(idColumn), + table.Col("title"), + table.Col("details"), + table.Col("url"), + table.Col("code"), + table.Col("director"), + ).Where(table.Col(idColumn).Gt(lastID)).Limit(1000) + + gotSome = false + + const single = false + return queryFunc(ctx, query, single, func(rows *sqlx.Rows) error { + var ( + id int + title sql.NullString + details sql.NullString + url sql.NullString + code sql.NullString + director sql.NullString + ) + + if err := rows.Scan( + &id, + &title, + &details, + &url, + &code, + &director, + ); err != nil { + return err + } + + set := goqu.Record{} + + // if title set set new title + db.obfuscateNullString(set, "title", title) + db.obfuscateNullString(set, "details", details) + db.obfuscateNullString(set, "url", url) + + if len(set) > 0 { + stmt := dialect.Update(table).Set(set).Where(table.Col(idColumn).Eq(id)) + + if _, err := exec(ctx, stmt); err != nil { + return fmt.Errorf("anonymising %s: %w", table.GetTable(), err) + } + } + + if code.Valid { + if err := db.anonymiseText(ctx, table, "code", code.String); err != nil { + return err + } + } + + if director.Valid { + if err := db.anonymiseText(ctx, table, "director", director.String); err != nil { + return err + } + } + + lastID = id + gotSome = true + total++ + + if total%logEvery == 0 { + logger.Infof("Anonymised %d scenes", total) + } + + return nil + }) + }); err != nil { + return err + } + } + + return nil +} + +func (db *Anonymiser) anonymiseImages(ctx context.Context) error { + logger.Infof("Anonymising images") + table := imageTableMgr.table + lastID := 0 + total := 0 + const logEvery = 10000 + + for gotSome := true; gotSome; { + if err := txn.WithTxn(ctx, db, func(ctx context.Context) error { + query := dialect.From(table).Select( + table.Col(idColumn), + table.Col("title"), + table.Col("url"), + ).Where(table.Col(idColumn).Gt(lastID)).Limit(1000) + + gotSome = false + + const single = false + return queryFunc(ctx, query, single, func(rows *sqlx.Rows) error { + var ( + id int + title sql.NullString + url sql.NullString + ) + + if err := rows.Scan( + &id, + &title, + &url, + ); err != nil { + return err + } + + set := goqu.Record{} + db.obfuscateNullString(set, "title", title) + db.obfuscateNullString(set, "url", url) + + if len(set) > 0 { + stmt := dialect.Update(table).Set(set).Where(table.Col(idColumn).Eq(id)) + + if _, err := exec(ctx, stmt); err != nil { + return fmt.Errorf("anonymising %s: %w", table.GetTable(), err) + } + } + + lastID = id + gotSome = true + total++ + + if total%logEvery == 0 { + logger.Infof("Anonymised %d images", total) + } + + return nil + }) + }); err != nil { + return err + } + } + + return nil +} + +func (db *Anonymiser) anonymiseGalleries(ctx context.Context) error { + logger.Infof("Anonymising galleries") + table := galleryTableMgr.table + lastID := 0 + total := 0 + const logEvery = 10000 + + for gotSome := true; gotSome; { + if err := txn.WithTxn(ctx, db, func(ctx context.Context) error { + query := dialect.From(table).Select( + table.Col(idColumn), + table.Col("title"), + table.Col("details"), + ).Where(table.Col(idColumn).Gt(lastID)).Limit(1000) + + gotSome = false + + const single = false + return queryFunc(ctx, query, single, func(rows *sqlx.Rows) error { + var ( + id int + title sql.NullString + details sql.NullString + ) + + if err := rows.Scan( + &id, + &title, + &details, + ); err != nil { + return err + } + + set := goqu.Record{} + db.obfuscateNullString(set, "title", title) + db.obfuscateNullString(set, "details", details) + + if len(set) > 0 { + stmt := dialect.Update(table).Set(set).Where(table.Col(idColumn).Eq(id)) + + if _, err := exec(ctx, stmt); err != nil { + return fmt.Errorf("anonymising %s: %w", table.GetTable(), err) + } + } + + lastID = id + gotSome = true + total++ + + if total%logEvery == 0 { + logger.Infof("Anonymised %d galleries", total) + } + + return nil + }) + }); err != nil { + return err + } + } + + return nil +} + +func (db *Anonymiser) anonymisePerformers(ctx context.Context) error { + logger.Infof("Anonymising performers") + table := performerTableMgr.table + lastID := 0 + total := 0 + const logEvery = 10000 + + for gotSome := true; gotSome; { + if err := txn.WithTxn(ctx, db, func(ctx context.Context) error { + query := dialect.From(table).Select( + table.Col(idColumn), + table.Col("name"), + table.Col("details"), + table.Col("url"), + table.Col("twitter"), + table.Col("instagram"), + table.Col("tattoos"), + table.Col("piercings"), + ).Where(table.Col(idColumn).Gt(lastID)).Limit(1000) + + gotSome = false + + const single = false + return queryFunc(ctx, query, single, func(rows *sqlx.Rows) error { + var ( + id int + name sql.NullString + details sql.NullString + url sql.NullString + twitter sql.NullString + instagram sql.NullString + tattoos sql.NullString + piercings sql.NullString + ) + + if err := rows.Scan( + &id, + &name, + &details, + &url, + &twitter, + &instagram, + &tattoos, + &piercings, + ); err != nil { + return err + } + + set := goqu.Record{} + db.obfuscateNullString(set, "name", name) + db.obfuscateNullString(set, "details", details) + db.obfuscateNullString(set, "url", url) + db.obfuscateNullString(set, "twitter", twitter) + db.obfuscateNullString(set, "instagram", instagram) + db.obfuscateNullString(set, "tattoos", tattoos) + db.obfuscateNullString(set, "piercings", piercings) + + if len(set) > 0 { + stmt := dialect.Update(table).Set(set).Where(table.Col(idColumn).Eq(id)) + + if _, err := exec(ctx, stmt); err != nil { + return fmt.Errorf("anonymising %s: %w", table.GetTable(), err) + } + } + + lastID = id + gotSome = true + total++ + + if total%logEvery == 0 { + logger.Infof("Anonymised %d performers", total) + } + + return nil + }) + }); err != nil { + return err + } + } + + if err := db.anonymiseAliases(ctx, goqu.T(performersAliasesTable), "performer_id"); err != nil { + return err + } + + return nil +} + +func (db *Anonymiser) anonymiseStudios(ctx context.Context) error { + logger.Infof("Anonymising studios") + table := studioTableMgr.table + lastID := 0 + total := 0 + const logEvery = 10000 + + for gotSome := true; gotSome; { + if err := txn.WithTxn(ctx, db, func(ctx context.Context) error { + query := dialect.From(table).Select( + table.Col(idColumn), + table.Col("name"), + table.Col("url"), + table.Col("details"), + ).Where(table.Col(idColumn).Gt(lastID)).Limit(1000) + + gotSome = false + + const single = false + return queryFunc(ctx, query, single, func(rows *sqlx.Rows) error { + var ( + id int + name sql.NullString + url sql.NullString + details sql.NullString + ) + + if err := rows.Scan( + &id, + &name, + &url, + &details, + ); err != nil { + return err + } + + set := goqu.Record{} + db.obfuscateNullString(set, "name", name) + db.obfuscateNullString(set, "url", url) + db.obfuscateNullString(set, "details", details) + + if len(set) > 0 { + stmt := dialect.Update(table).Set(set).Where(table.Col(idColumn).Eq(id)) + + if _, err := exec(ctx, stmt); err != nil { + return fmt.Errorf("anonymising %s: %w", table.GetTable(), err) + } + } + + lastID = id + gotSome = true + total++ + + // TODO - anonymise studio aliases + + if total%logEvery == 0 { + logger.Infof("Anonymised %d studios", total) + } + + return nil + }) + }); err != nil { + return err + } + } + + if err := db.anonymiseAliases(ctx, goqu.T(studioAliasesTable), "studio_id"); err != nil { + return err + } + + return nil +} + +func (db *Anonymiser) anonymiseAliases(ctx context.Context, table exp.IdentifierExpression, idColumn string) error { + lastID := 0 + lastAlias := "" + total := 0 + const logEvery = 10000 + + for gotSome := true; gotSome; { + if err := txn.WithTxn(ctx, db, func(ctx context.Context) error { + query := dialect.From(table).Select( + table.Col(idColumn), + table.Col("alias"), + ).Where(goqu.L("(" + idColumn + ", alias)").Gt(goqu.L("(?, ?)", lastID, lastAlias))).Limit(1000) + + gotSome = false + + const single = false + return queryFunc(ctx, query, single, func(rows *sqlx.Rows) error { + var ( + id int + alias sql.NullString + ) + + if err := rows.Scan( + &id, + &alias, + ); err != nil { + return err + } + + set := goqu.Record{} + db.obfuscateNullString(set, "alias", alias) + + if len(set) > 0 { + stmt := dialect.Update(table).Set(set).Where( + table.Col(idColumn).Eq(id), + table.Col("alias").Eq(alias), + ) + + if _, err := exec(ctx, stmt); err != nil { + return fmt.Errorf("anonymising %s: %w", table.GetTable(), err) + } + } + + lastID = id + lastAlias = alias.String + gotSome = true + total++ + + if total%logEvery == 0 { + logger.Infof("Anonymised %d %s aliases", total, table.GetTable()) + } + + return nil + }) + }); err != nil { + return err + } + } + + return nil +} + +func (db *Anonymiser) anonymiseTags(ctx context.Context) error { + logger.Infof("Anonymising tags") + table := tagTableMgr.table + lastID := 0 + total := 0 + const logEvery = 10000 + + for gotSome := true; gotSome; { + if err := txn.WithTxn(ctx, db, func(ctx context.Context) error { + query := dialect.From(table).Select( + table.Col(idColumn), + table.Col("name"), + table.Col("description"), + ).Where(table.Col(idColumn).Gt(lastID)).Limit(1000) + + gotSome = false + + const single = false + return queryFunc(ctx, query, single, func(rows *sqlx.Rows) error { + var ( + id int + name sql.NullString + description sql.NullString + ) + + if err := rows.Scan( + &id, + &name, + &description, + ); err != nil { + return err + } + + set := goqu.Record{} + db.obfuscateNullString(set, "name", name) + db.obfuscateNullString(set, "description", description) + + if len(set) > 0 { + stmt := dialect.Update(table).Set(set).Where(table.Col(idColumn).Eq(id)) + + if _, err := exec(ctx, stmt); err != nil { + return fmt.Errorf("anonymising %s: %w", table.GetTable(), err) + } + } + + lastID = id + gotSome = true + total++ + + if total%logEvery == 0 { + logger.Infof("Anonymised %d tags", total) + } + + return nil + }) + }); err != nil { + return err + } + } + + if err := db.anonymiseAliases(ctx, goqu.T(tagAliasesTable), "tag_id"); err != nil { + return err + } + + return nil +} + +func (db *Anonymiser) anonymiseMovies(ctx context.Context) error { + logger.Infof("Anonymising movies") + table := movieTableMgr.table + lastID := 0 + total := 0 + const logEvery = 10000 + + for gotSome := true; gotSome; { + if err := txn.WithTxn(ctx, db, func(ctx context.Context) error { + query := dialect.From(table).Select( + table.Col(idColumn), + table.Col("name"), + table.Col("aliases"), + table.Col("synopsis"), + table.Col("url"), + table.Col("director"), + ).Where(table.Col(idColumn).Gt(lastID)).Limit(1000) + + gotSome = false + + const single = false + return queryFunc(ctx, query, single, func(rows *sqlx.Rows) error { + var ( + id int + name sql.NullString + aliases sql.NullString + synopsis sql.NullString + url sql.NullString + director sql.NullString + ) + + if err := rows.Scan( + &id, + &name, + &aliases, + &synopsis, + &url, + &director, + ); err != nil { + return err + } + + set := goqu.Record{} + db.obfuscateNullString(set, "name", name) + db.obfuscateNullString(set, "aliases", aliases) + db.obfuscateNullString(set, "synopsis", synopsis) + db.obfuscateNullString(set, "url", url) + db.obfuscateNullString(set, "director", director) + + if len(set) > 0 { + stmt := dialect.Update(table).Set(set).Where(table.Col(idColumn).Eq(id)) + + if _, err := exec(ctx, stmt); err != nil { + return fmt.Errorf("anonymising %s: %w", table.GetTable(), err) + } + } + + lastID = id + gotSome = true + total++ + + if total%logEvery == 0 { + logger.Infof("Anonymised %d movies", total) + } + + return nil + }) + }); err != nil { + return err + } + } + + return nil +} + +func (db *Anonymiser) anonymiseText(ctx context.Context, table exp.IdentifierExpression, column string, value string) error { + set := goqu.Record{} + set[column] = db.obfuscateString(value, letters) + + stmt := dialect.Update(table).Set(set).Where(table.Col(column).Eq(value)) + + if _, err := exec(ctx, stmt); err != nil { + return fmt.Errorf("anonymising %s: %w", column, err) + } + + return nil +} + +func (db *Anonymiser) anonymiseFingerprint(ctx context.Context, table exp.IdentifierExpression, column string, value string) error { + set := goqu.Record{} + set[column] = db.obfuscateString(value, hex) + + stmt := dialect.Update(table).Set(set).Where(table.Col(column).Eq(value)) + + if _, err := exec(ctx, stmt); err != nil { + return fmt.Errorf("anonymising %s: %w", column, err) + } + + return nil +} + +func (db *Anonymiser) obfuscateNullString(out goqu.Record, column string, in sql.NullString) { + if in.Valid { + out[column] = db.obfuscateString(in.String, letters) + } +} + +func (db *Anonymiser) obfuscateString(in string, dict string) string { + out := strings.Builder{} + for _, c := range in { + if unicode.IsSpace(c) { + out.WriteRune(c) + } else { + num, err := rand.Int(rand.Reader, big.NewInt(int64(len(dict)))) + if err != nil { + panic("error generating random number") + } + + out.WriteByte(dict[num.Int64()]) + } + } + + return out.String() +} diff --git a/pkg/sqlite/anonymise_test.go b/pkg/sqlite/anonymise_test.go new file mode 100644 index 000000000..868224eef --- /dev/null +++ b/pkg/sqlite/anonymise_test.go @@ -0,0 +1,39 @@ +//go:build integration +// +build integration + +package sqlite_test + +import ( + "context" + "os" + "testing" + + "github.com/stashapp/stash/pkg/sqlite" +) + +func TestAnonymiser_Anonymise(t *testing.T) { + f, err := os.CreateTemp("", "*.sqlite") + if err != nil { + t.Errorf("Could not create temporary file: %v", err) + return + } + + f.Close() + defer os.Remove(f.Name()) + + // use existing database + anonymiser, err := sqlite.NewAnonymiser(db, f.Name()) + if err != nil { + t.Errorf("Could not create anonymiser: %v", err) + return + } + + if err := anonymiser.Anonymise(context.Background()); err != nil { + t.Errorf("Could not anonymise: %v", err) + return + } + + t.Logf("Anonymised database written to %s", f.Name()) + + // TODO - ensure anonymous +} diff --git a/pkg/sqlite/database.go b/pkg/sqlite/database.go index 63a427ed8..c4d8bea31 100644 --- a/pkg/sqlite/database.go +++ b/pkg/sqlite/database.go @@ -212,7 +212,7 @@ func (db *Database) open(disableForeignKeys bool) (*sqlx.DB, error) { return conn, nil } -func (db *Database) Reset() error { +func (db *Database) Remove() error { databasePath := db.dbPath err := db.Close() @@ -236,6 +236,15 @@ func (db *Database) Reset() error { } } + return nil +} + +func (db *Database) Reset() error { + databasePath := db.dbPath + if err := db.Remove(); err != nil { + return err + } + if err := db.Open(databasePath); err != nil { return fmt.Errorf("[reset DB] unable to initialize: %w", err) } @@ -265,6 +274,16 @@ func (db *Database) Backup(backupPath string) error { return nil } +func (db *Database) Anonymise(outPath string) error { + anon, err := NewAnonymiser(db, outPath) + + if err != nil { + return err + } + + return anon.Anonymise(context.Background()) +} + func (db *Database) RestoreFromBackup(backupPath string) error { logger.Infof("Restoring backup database %s into %s", backupPath, db.dbPath) return os.Rename(backupPath, db.dbPath) @@ -293,6 +312,16 @@ func (db *Database) DatabaseBackupPath(backupDirectoryPath string) string { return fn } +func (db *Database) AnonymousDatabasePath(backupDirectoryPath string) string { + fn := fmt.Sprintf("%s.anonymous.%d.%s", filepath.Base(db.dbPath), db.schemaVersion, time.Now().Format("20060102_150405")) + + if backupDirectoryPath != "" { + return filepath.Join(backupDirectoryPath, fn) + } + + return fn +} + func (db *Database) Version() uint { return db.schemaVersion } @@ -383,8 +412,14 @@ func (db *Database) RunMigrations() error { } // optimize database after migration + db.optimise() + + return nil +} + +func (db *Database) optimise() { logger.Info("Optimizing database") - _, err = db.db.Exec("ANALYZE") + _, err := db.db.Exec("ANALYZE") if err != nil { logger.Warnf("error while performing post-migration optimization: %v", err) } @@ -392,8 +427,6 @@ func (db *Database) RunMigrations() error { if err != nil { logger.Warnf("error while performing post-migration vacuum: %v", err) } - - return nil } func (db *Database) runCustomMigrations(ctx context.Context, fns []customMigrationFunc) error { diff --git a/pkg/utils/func.go b/pkg/utils/func.go new file mode 100644 index 000000000..a84091721 --- /dev/null +++ b/pkg/utils/func.go @@ -0,0 +1,12 @@ +package utils + +// Do executes each function in the slice in order. If any function returns an error, it is returned immediately. +func Do(fn []func() error) error { + for _, f := range fn { + if err := f(); err != nil { + return err + } + } + + return nil +} diff --git a/ui/v2.5/src/components/Settings/Tasks/DataManagementTasks.tsx b/ui/v2.5/src/components/Settings/Tasks/DataManagementTasks.tsx index 663eaa77c..0f57afd3c 100644 --- a/ui/v2.5/src/components/Settings/Tasks/DataManagementTasks.tsx +++ b/ui/v2.5/src/components/Settings/Tasks/DataManagementTasks.tsx @@ -7,6 +7,7 @@ import { mutateBackupDatabase, mutateMetadataImport, mutateMetadataClean, + mutateAnonymiseDatabase, } from "src/core/StashService"; import { useToast } from "src/hooks"; import { downloadFile } from "src/utils"; @@ -149,10 +150,12 @@ const CleanOptions: React.FC = ({ interface IDataManagementTasks { setIsBackupRunning: (v: boolean) => void; + setIsAnonymiseRunning: (v: boolean) => void; } export const DataManagementTasks: React.FC = ({ setIsBackupRunning, + setIsAnonymiseRunning, }) => { const intl = useIntl(); const Toast = useToast(); @@ -259,7 +262,7 @@ export const DataManagementTasks: React.FC = ({ Toast.success({ content: intl.formatMessage( { id: "config.tasks.added_job_to_queue" }, - { operation_name: intl.formatMessage({ id: "actions.backup" }) } + { operation_name: intl.formatMessage({ id: "actions.export" }) } ), }); } catch (err) { @@ -286,6 +289,25 @@ export const DataManagementTasks: React.FC = ({ } } + async function onAnonymise(download?: boolean) { + try { + setIsAnonymiseRunning(true); + const ret = await mutateAnonymiseDatabase({ + download, + }); + + // download the result + if (download && ret.data && ret.data.anonymiseDatabase) { + const link = ret.data.anonymiseDatabase; + downloadFile(link); + } + } catch (e) { + Toast.error(e); + } finally { + setIsAnonymiseRunning(false); + } + } + return ( {renderImportAlert()} @@ -361,7 +383,7 @@ export const DataManagementTasks: React.FC = ({ type="submit" onClick={() => onExport()} > - + … @@ -433,6 +455,45 @@ export const DataManagementTasks: React.FC = ({ + + + [origFilename].anonymous.sqlite.[schemaVersion].[YYYYMMDD_HHMMSS] + + ), + } + )} + > + + + + + + + + { const intl = useIntl(); const [isBackupRunning, setIsBackupRunning] = useState(false); + const [isAnonymiseRunning, setIsAnonymiseRunning] = useState(false); if (isBackupRunning) { return ( @@ -18,6 +19,16 @@ export const SettingsTasksPanel: React.FC = () => { ); } + if (isAnonymiseRunning) { + return ( + + ); + } + return (
@@ -28,7 +39,10 @@ export const SettingsTasksPanel: React.FC = () => {

- +
diff --git a/ui/v2.5/src/core/StashService.ts b/ui/v2.5/src/core/StashService.ts index b48233a89..7aa6040a5 100644 --- a/ui/v2.5/src/core/StashService.ts +++ b/ui/v2.5/src/core/StashService.ts @@ -1234,6 +1234,12 @@ export const mutateBackupDatabase = (input: GQL.BackupDatabaseInput) => variables: { input }, }); +export const mutateAnonymiseDatabase = (input: GQL.AnonymiseDatabaseInput) => + client.mutate({ + mutation: GQL.AnonymiseDatabaseDocument, + variables: { input }, + }); + export const mutateStashBoxBatchPerformerTag = ( input: GQL.StashBoxBatchPerformerTagInput ) => diff --git a/ui/v2.5/src/docs/en/Changelog/v0190.md b/ui/v2.5/src/docs/en/Changelog/v0190.md index ffde96fb9..2bae90887 100644 --- a/ui/v2.5/src/docs/en/Changelog/v0190.md +++ b/ui/v2.5/src/docs/en/Changelog/v0190.md @@ -5,6 +5,7 @@ * Added URL and Date fields to Images. ([#3015](https://github.com/stashapp/stash/pull/3015)) * Added support for plugins to add injected CSS and Javascript to the UI. ([#3195](https://github.com/stashapp/stash/pull/3195)) * Added disambiguation field to Performers, to differentiate between performers with the same name. ([#3113](https://github.com/stashapp/stash/pull/3113)) +* Added Anonymise task to generate an anonymised version of the database. ([#3186](https://github.com/stashapp/stash/pull/3186)) ### 🎨 Improvements * Changed performer aliases to be a list, rather than a string field. ([#3113](https://github.com/stashapp/stash/pull/3113)) diff --git a/ui/v2.5/src/locales/en-GB.json b/ui/v2.5/src/locales/en-GB.json index 21b19547b..76f28ccfb 100644 --- a/ui/v2.5/src/locales/en-GB.json +++ b/ui/v2.5/src/locales/en-GB.json @@ -6,6 +6,7 @@ "add_to_entity": "Add to {entityType}", "allow": "Allow", "allow_temporarily": "Allow temporarily", + "anonymise": "Anonymise", "apply": "Apply", "auto_tag": "Auto Tag", "backup": "Backup", @@ -32,10 +33,11 @@ "delete_stashid": "Delete StashID", "disallow": "Disallow", "download": "Download", + "download_anonymised": "Download anonymised", "download_backup": "Download Backup", "edit": "Edit", "edit_entity": "Edit {entityType}", - "export": "Export…", + "export": "Export", "export_all": "Export all…", "find": "Find", "finish": "Finish", @@ -346,6 +348,9 @@ }, "tasks": { "added_job_to_queue": "Added {operation_name} to job queue", + "anonymising_database": "Anonymising database", + "anonymise_and_download": "Makes an anonymised copy of the database and downloads the resulting file.", + "anonymise_database": "Makes a copy of the database to the backups directory, anonymising all sensitive data. This can then be provided to others for troubleshooting and debugging purposes. The original database is not modified. Anonymised database uses the filename format {filename_format}.", "auto_tag": { "auto_tagging_all_paths": "Auto Tagging all paths", "auto_tagging_paths": "Auto Tagging the following paths"