From d0f2c97a04b53cc30a20f76568e807df291171f9 Mon Sep 17 00:00:00 2001 From: Hydrus Network Developer Date: Wed, 23 Oct 2024 15:26:37 -0500 Subject: [PATCH] Version 595 --- db/help my db is broke.txt | 9 +- docs/changelog.md | 127 ++++--- docs/developer_api.md | 8 +- docs/duplicates_auto_resolution.md | 47 +++ docs/filetypes.md | 2 +- docs/getting_started_downloading.md | 10 +- docs/getting_started_more_tags.md | 2 +- docs/getting_started_tags.md | 2 +- docs/old_changelog.html | 54 +++ hydrus/client/ClientData.py | 32 ++ hydrus/client/ClientOptions.py | 3 +- hydrus/client/ClientRendering.py | 4 +- hydrus/client/ClientStrings.py | 45 ++- hydrus/client/ClientUgoiraHandling.py | 51 ++- hydrus/client/db/ClientDB.py | 73 ++++ hydrus/client/db/ClientDBFilesDuplicates.py | 2 +- .../ClientDBFilesDuplicatesAutoResolution.py | 46 +-- hydrus/client/duplicates/ClientDuplicates.py | 10 +- ...s.py => ClientDuplicatesAutoResolution.py} | 109 +++++- hydrus/client/gui/ClientGUI.py | 26 ++ hydrus/client/gui/ClientGUIStringPanels.py | 9 +- hydrus/client/gui/canvas/ClientGUICanvas.py | 26 +- .../client/gui/canvas/ClientGUICanvasMedia.py | 11 +- .../ClientGUIDuplicateActions.py} | 0 .../ClientGUIDuplicatesAutoResolution.py | 353 ++++++++++++++++++ hydrus/client/gui/duplicates/__init__.py | 0 .../client/gui/exporting/ClientGUIExport.py | 1 + hydrus/client/gui/lists/ClientGUIListBoxes.py | 42 ++- .../gui/lists/ClientGUIListBoxesData.py | 2 +- .../gui/lists/ClientGUIListConstants.py | 40 ++ hydrus/client/gui/lists/ClientGUIListCtrl.py | 43 +-- .../client/gui/media/ClientGUIMediaMenus.py | 16 +- .../gui/media/ClientGUIMediaModalActions.py | 37 ++ .../gui/media/ClientGUIMediaSimpleActions.py | 3 +- .../pages/ClientGUIManagementController.py | 4 +- .../gui/pages/ClientGUIManagementPanels.py | 27 ++ .../gui/pages/ClientGUIMediaResultsPanel.py | 26 +- hydrus/client/gui/pages/ClientGUIPages.py | 47 ++- .../gui/panels/ClientGUIManageOptionsPanel.py | 15 +- .../client/gui/search/ClientGUIACDropdown.py | 105 ++++-- .../gui/search/ClientGUISearchPanels.py | 2 +- .../client/importing/ClientImportFileSeeds.py | 85 ++++- .../importing/ClientImportSimpleURLs.py | 11 +- .../importing/options/FileImportOptions.py | 5 +- .../importing/options/TagImportOptions.py | 21 +- hydrus/client/media/ClientMedia.py | 36 +- .../api/ClientLocalServerResourcesGetFiles.py | 5 +- .../search/ClientSearchFavouriteSearches.py | 38 +- hydrus/core/HydrusConstants.py | 5 +- hydrus/core/HydrusData.py | 2 +- hydrus/core/HydrusLists.py | 23 ++ hydrus/core/HydrusNumbers.py | 29 -- hydrus/core/HydrusSerialisable.py | 8 +- hydrus/core/files/HydrusUgoiraHandling.py | 57 +-- hydrus/test/TestClientAPI.py | 2 + hydrus/test/TestClientParsing.py | 16 +- hydrus/test/TestHydrusSerialisable.py | 16 + .../docker/client/supervisord.conf | 2 +- static/x.svg | 4 +- static/x_white.svg | 3 + 60 files changed, 1452 insertions(+), 387 deletions(-) create mode 100644 docs/duplicates_auto_resolution.md rename hydrus/client/duplicates/{ClientAutoDuplicates.py => ClientDuplicatesAutoResolution.py} (66%) rename hydrus/client/gui/{ClientGUIDuplicates.py => duplicates/ClientGUIDuplicateActions.py} (100%) create mode 100644 hydrus/client/gui/duplicates/ClientGUIDuplicatesAutoResolution.py create mode 100644 hydrus/client/gui/duplicates/__init__.py create mode 100644 static/x_white.svg diff --git a/db/help my db is broke.txt b/db/help my db is broke.txt index ad2d93e0..6e73cab3 100644 --- a/db/help my db is broke.txt +++ b/db/help my db is broke.txt @@ -77,7 +77,7 @@ And when you are ready to close the shell cleanly, go: .exit -It can be slow. A few MB a second is typical on an HDD (SSDs obviously faster), so expect a 10GB file to take a while. If it takes hours and hours, and your Task Manager suggests only 50KB/s read, consider again if your hard drive is healthy or not. +It can be slow. A few MB a second is typical on an HDD (SSDs obviously faster), so expect a 10GB file to take a while. A 60GB mappings.db may take two hours. If it takes way way too long, and your Task Manager suggests only 50KB/s read, consider again if your hard drive is healthy or not. Please note that newer versions of SQLite support a second check command: @@ -97,6 +97,7 @@ PRAGMA integrity_check; PRAGMA integrity_check; .exit +(this one can take ages simply because of the size of the file, so only do it if you really need to check; if you know it is broke, just move on to cloning now, no need to waste time confirming it) .open client.mappings.db PRAGMA integrity_check; .exit @@ -118,7 +119,7 @@ This instructs the database to copy itself to a new file. When it comes across d And wait a bit. It'll report its progress as it tries to copy your db. It will be slow. Remember to go '.exit' once it is done to close the shell neatly. -If the clone says some errors like 'subtags_fts4_content already exists' but keeps on working, don't worry about it! That isn't a real error. Same if you get an error about a missing 'sqlite_stat1' table. +If the clone says some errors involving 'fts' or 'fts4', like 'subtags_fts4_content already exists', but it keeps on working, don't worry about it! That probably isn't a real error, and even if it is, 'fts' stuff will be automatically fixed on the next boot. Same if you get any errors about 'sqlite_stat1' table. Once it is done, the cloned file may be significantly smaller than the original. 50% reduction in size is typical. This means some data has been lost. If this is in client.caches.db, client.master.db or client.mappings.db, it is probably fine (particularly if you sync to a tag repository like the PTR), as we can regenerate that data with some work once you are working again. @@ -140,6 +141,8 @@ If a different file is broken, then use of these: Do not delete your original files for now. Just rename them to 'old' and move them somewhere safe. Also be careful not to lose track of which is which (filesize and creation date can help here)! Make sure your new cloned files are all named right and then try running the client! +You do not have to do an integrity_check after you clone. If you know your current hard drive is good, you can assume a clone works to create a 'clean' database. + ** repair ** This command tries to fix a database file in place. I do not recommend it as it works very very slowly. While it may be able to recover some rows a clones would lose, it cannot fix everything and may leave you with a database that is still malformed, so you'll want to run integrity_check again and do a clone if needed. @@ -250,4 +253,4 @@ If you do not have a backup routine, this is the time to sort it out. If you can Check out the hydrus 'getting started with installing/updating/backing up' help for some more info about routines and programs like FreeFileSync. -https://hydrusnetwork.github.io/hydrus/after_disaster.html \ No newline at end of file +https://hydrusnetwork.github.io/hydrus/after_disaster.html diff --git a/docs/changelog.md b/docs/changelog.md index 8a3bd664..b8ffe03d 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -7,6 +7,73 @@ title: Changelog !!! note This is the new changelog, only the most recent builds. For all versions, see the [old changelog](old_changelog.html). +## [Version 595](https://github.com/hydrusnetwork/hydrus/releases/tag/v595) + +### ugoiras + +* thanks to a user who put in a lot of work, we finally have Ugoira rendering! all ugoiras will now animate using the hydrus native animation player. if the ugoira has json timing data in its zip (those downloaded with PixivUtil and gallery-dl will!), we will use that, but if it is just a zip of images (which is most older ugoiras you'll see in the wild), it'll check a couple of note names for the timing data, and, failing that, will assign a default 125ms per frame fallback. ugoiras without internal timing data will currently get no 'duration' metadata property, but right-clicking on them will show their note-based or simulated duration on the file info line +* all existing ugoiras will be metadata rescanned and thumbnail regenned on update +* technical info here: https://hydrusnetwork.github.io/hydrus/filetypes.html#ugoira +* ugoira metadata and thumbnail generation is cleaner +* a bug in ugoira thumbnail selection, when the file contains non-image files, is fixed +* a future step will be to write a special hook into the hydrus downloader engine to recognise ugoiras (typically on Pixiv) and splice the timing data into the zip on download, at which point we'll finally be able to turn on Ugoira downloading on Pixiv on our end. for now, please check out PixivUtil or gallery-dl to get rich Ugoiras +* I'd like to bake the simulated or note-based durations into the database somehow, as I don't like the underlying media object thinking these things have no duration, but it'll need more thought + +### misc + +* all multi-column lists now sort string columns in a caseless manner. a subscription called 'Tents' will now slot between 'sandwiches' and 'umbrellas' +* in 'favourite searches', the 'folder' name now has hacky nested folder support. just put '/' in the folder name and it'll make nested submenus. in future this will be implemented with a nicer tree widget +* file logs now load faster in a couple of ways, which should speed up UI session and subscriptions dialog load. previously, there were two rounds of URL normalisation on URL file import object load, one wasteful and one fixable with a cache; these are now dealt with. thanks to the users who sent in profiles of the subscriptions dialog opening; let me know how things seem now (hopefully this fixes/relieves #1612) +* added 'Swap in common resolution labels' to `options->media viewer`. this lets you turn off the '1080p' and '4k'-style label swap-ins for common resolutions on file descriptor strings +* the 'are you sure you want to exit the client? 3 pages say "I am still importing"' popup now says the page names, and in a pretty way, and it shows multiple messages nicer +* the primary 'sort these tags in a human way m8' routine now uses unicode tech to sort things like ß better +* the String Converter can decode 'hex' and 'base64' again (so you can now do '68656c6c6f20776f726c64' or 'aGVsbG8gd29ybGQ=' to 'hello world'). these functions were a holdover from hash parsing in the python 2 times, but I've brushed them off and cleared out the 'what if we put raw bytes in the parsing system bro' nonsense we used to have to deal with. these types are now explictly UTF-8. I also added a couple unit tests for them +* fixed an options initialisation bug where setting two files in the duplicate filter as 'not related' was updating the A file to have the B file's file modified time if that was earlier!! if you have files in this category, you will be asked on update if you want to reset their file modified date back to what is actually on disk (the duplicate merge would not have overwritten this; this only happens if you edit the time in the times dialog by hand). a unit test now checks this situation. sorry for the trouble, and thank you to the user who noticed and reported this +* the hydrus Docker package now sets the 'hydrus' process to `autorestart=unexpected`. I understand this makes `file->exit` stick without an automatic restart. it seems like commanding the whole Docker image to shut down still causes a near-instant unclean exit (some SIGTERM thing isn't being caught right, I think), but `file->exit` should now be doable beforehand. we will keep working here + +### more OR preds + +* the new 'replace selected with their OR' and the original 'add an OR of the selected' are now mutually exclusive, depending on whether the current selection is entirely in the active search list +* added 'start an OR with selected', which opens the 'edit OR predicate' panel on the current selection. this works if you only select one item, too +* added 'dissolve selected into single predicates', when you select only OR predicates. it does the opposite of the 'replace' +* the new OR menu gubbins is now in its own separated menu section on the tag right-click +* the indent for OR sub preds is moved up from two spaces to four + +### urls + +* wrote some help about the 'force page refetch' checkboxes in 'tag import options' here: https://hydrusnetwork.github.io/hydrus/getting_started_downloading.html#force_page_fetch +* added a new submenu `urls->force metadata refetch` that lets you quickly and automatically create a new urls downloader page with the selected files' 'x URL Class' urls with the tag import options set to the respective URLs' default but with these checkboxes all set for you. we finally have a simple answer to 'I messed up my tag parse, I need to redownload these files to get the tags'! +* the urls menu offers the 'for x url class' even when only one file is selected now. crazy files with fifty of the same url class can now be handled + +### duplicates auto-resolution + +* wrote some placeholder UI for the new system. anyone who happens to be in advanced mode will see another tab on duplicate filter pages. you can poke around if you like, but it is mostly just blank lists that aren't plugged into anything +* wrote some placeholder help too. same deal, just a placeholder that you have to look for to find that I'll keep working on +* I still feel good about the duplicates auto-resolution system. there is much more work to do, but I'll keep iterating and fleshing things out + +### client api + +* the new `/get_files/file_path` command now returns the `filetype` and `size` of the file +* updated the Client API help and unit tests for this +* client api version is now 73 + +### new build stuff + +* the library updates we've been testing the past few weeks have gone well, so I am rolling them into the normal builds for everyone. the libraries that do 'fetch stuff from the internet' and 'help python manage its packages' are being updated because of some security problems that I don't think matter for us at all (there's some persistent https verification thing in requests that I know we don't care about, and a malicious URL exploit in setuptools that only matters if you are using it to download packages, which, as I understand, we don't), but we are going to be good and update anyway +* `requests` is updated from `2.31.0` to `2.32.3` +* `setuptools` is updated from `69.1.1` to `70.3.0` +* `PyInstaller` is updated from `6.2` to `6.7` for Windows and Linux to handle the new `setuptools` +* there do not appear to be any update conflicts with dlls or anything, so just update like you normally do. I don't think the new pyinstaller will have problems with older/weirder Windows, but let me know if you run into anything +* users who run from source may like to reinstall their venvs after pulling to get the new libraries too + +### boring cleanup + +* refactored `ClientGUIDuplicates` to a new `duplicates` gui module and renamed it to `ClientGUIDuplicateActions` +* harmonised some duplicates auto-resolution terminology across the client to exactly that form. not auto-duplicates or duplicate auto resolution, but 'duplicates auto-resolution' +* fixed some bad help link anchors +* clarified a couple things in the 'help my db is broke.txt' document +* updated the new x.svg to a black version; it looks a bit better in light & dark styles + ## [Version 594](https://github.com/hydrusnetwork/hydrus/releases/tag/v594) ### misc @@ -330,63 +397,3 @@ title: Changelog * added `/add_urls/migrate_files` to copy files to new local file domains (essentially doing _files->add to_ from the thumbnail menu) * with (I think) all multiple local file service capabilities added to the Client API, issue #251 is finally ticked off * client api version is now 68 - -## [Version 585](https://github.com/hydrusnetwork/hydrus/releases/tag/v585) - -### the new asynchronous siblings and parent dialogs - -* the `tags->manage tag siblings/parents` dialogs now load quickly. rather than fetching all known pairs on every open, they now only load pertinent pairs as they are needed. if you type in tag A in the left or right side, all the pairs that involve A directly or point to a pair that involves A directly or indirectly are loaded in the background (usually so fast it seems instant). the dialog can still do 'ah, that would cause a conflict, what do you want to do?' logic, but it only fetches what it needs -* the main edit operations in this dialog are now 'asynchronous', which means there is actually a short delay between the action firing and the UI updating. most of the time it is so fast it isn't noticeable, and in general because of other cleanup it tends to be faster about everything it does -* the dialogs now have a sticky workspace 'memory'. when you type tags in, the dialog still shows the related rows as normal, but now it does not clear those rows away once you actually enter those new pairs. the 'workspace' shows anything related to anything you have typed until you hit the new 'wipe workspace' button, which will reset back to a blank view. I hope this makes it less frustrating to work on a large group--it now stays in view the whole time, rather than the 'current' stuff jumping in and out of view vs the pending/petitioned as you type and submit stuff. the 'wipe workspace' button also has the current workspace tags in its tooltip -* the 'show all pairs' checkbox remains. it may well take twenty seconds to load up the hundreds of thousands of pairs from the PTR, but you can do it -* also added is a 'show pending and petitioned groups', which will load up anything waiting to be uploaded to a tag repository, and all related pairs -* when a user with 'modify siblings/parents' adds a pair, the auto-assigned 'reason' is now "Entered by a janitor.' (previously it was the enigmatic "admin") -* some misc layout improvements aross the board. the green/red text at the top is compressed; the 'num pairs' now shows the current number of pairs count; there are more rows for the pairs list, fewer for the input list; and the pairs list eats up all new expand space -* a great amount of misc code cleanup in all these panels and systems, and most of the logic is shared between both sibling and parent dialogs. a lot of janky old stuff is cleared up! -* these dialogs are better about showing invalid, duplicated, or loop-causing pairs. the idea is to show you everything as-is in storage so you can better directly edit problems out (previously, I am pretty sure it was sometimes collapsing stuff and obscuring problems) -* the 'manage tag parents' dialog now auto-petitions new loops when entering pairs (it was just siblings before) -* this tech now works on multiple potential loops, rather than just the first -* the 'manage tag parents' dialog now detects pre-existing loops in the database record and warns about this when trying to enter pairs that join the loop (it was just siblings before) -* this tech works better and now detects multiple loops, including completely invalid records that nonetheless exist (e.g. `a->b, a->c` siblings that point to more than one locations), and when it reports them, it now reports them all in one dialog, and it shows the actual `a->b->c->d` route that forms the loop -* a bad final 'do not allow loop-inputs' backstop check in the main pair-add routine is removed--it was not helping - -### misc - -* hitting escape on any taglist will now deselect all tags -* added 'Do not allow mouse media drag-panning when the media has duration' to the _options->media viewer_ page. if you often misclick and pan when scrubbing through videos, try it out! -* the media viewer's top hover window no longer shows every 'added-to' time for all the local file services; it was spammy, so it now just says 'imported: (time)'. the related 'hide uninteresting import time' option is retired. I also removed the 'archived: (time)' label, so this is now pretty much just 'imported, modified'. if I bring detailed times back to the file summary, it'll be part of a more flexible system. note that all these timestamps are still available in the media top-row flyout menu -* the file log and gallery log now copy their urls/sources on a ctrl+c hit. also, the 'copy' right-click commands here also no longer unhelpfully double-newline-separates rows -* a `StringConverter` edit panel now throws up a yes/no confirmation if you try to ok on a regex substitution that seems to match a group in the pattern but has an empty string in the 'replacement' box -* updated the 'test' versions of OpenCV (4.10.0.84), Pyside6 (6.7.2), and python-mpv (1.0.7). I'll be testing these myself, and devving with them, mostly to iron out some Qt 6.7.x stuff we've seen, and then put out a future release with them -* added a note to the default_mpv.conf to say 'try commenting out the audio normalisation line if you get mpv problems and are on Arch' -* added different example launch paths to the 'external programs' options panel depending on the current OS -* added a note about running with `QT_QPA_PLATFORM=xcb` on Wayland to the install help -* refactored the `ClientGUIFileSeedCache` and `ClientGUIGallerySeedLog` files, which do the file and gallery log panels, up to the 'gui.importing' module -* thanks to a user, added a new darkmode 'Nord' stylesheet - -### fixes - -* fixed 'scrub invalidity' in the manage logins dialog--sorry, it was a stupid typo from the recent multiple-column list rework. also, this button is now only enabled if the login script is active -* fixed a bug opening the 'migrate files' dialog when you have no files! -* I force-added `Accept-Language: en-US,en;q=0.5` to the client's default http headers for pixiv.net. this appears to get the API to give us English tags again. let me know if this completely screws anything up -* updated the 'do we have enough disk space to do this transaction?' test to check for double the destination disk amount. thanks to the user who helped navigate this--regardless of temp dir work, when you do a vacuum or other gigantic single transaction, there is a very brief period as the transaction commits when either the stuffed WAL journal or (for a vacuum) cloned db file exists at the same time in the same folder as the original db file. I also updated the text in the 'review vacuum data' window to talk about this a bit. good luck vacuuming your client.mappings.db file bros -* improved the error handling when a sidecar import fails--it now says the original file path in the report -* improved failure-recovery of unicode decoding (usually used in webpage parsing) when the given text includes errors and the encoding is `ISO-8859-1` (or the encoding is unparseable and `requests` falls back to it) and/or if `chardet` is not available -* I hacked the menubar padding back to something sensible on the new 'windows11' style int Qt 6.7.x. for whatever reason, this new style adds about 15px of padding/margin to each menubar menu button. I am aware the collect-by combobox is still busted in this style--let me know if you spot anything else! btw switching from 'windows11' to 'windowsvista' seems to make all the menubar menus transparent, let's go -* improved the layout of the 'edit client api access key permissions' panel. it wasn't vertically expanding before -* fixed up some keypress handling in taglists. some stuff that was being swallowed or promoted unintentionally is fixed -* thanks to a user, fixed a weird bug in the 'repair missing file storage locations' boot repair dialog where it would always say you only had missing thumbs -* also thanks to that user, the 'repair missing file storage locations' dialog now checks `client_files` and `thumbnails` subdirectories when trying to auto-discover with the 'add a possibly correct location' action - -### some hash-sorting stuff - -* _you can probably ignore this section, don't worry about it_ -* you can now sort by blurhash. this works at the database level too, when mixed with system:limit -* when sorting by pixel hash, a file search with system:limit now pre-sorts by pixel hash before the limit clips the resultset -* when sorting by pixel hash or blurhash, the files with no such hash (e.g. audio files) are now always put at the end -* searching many tens of thousands of files and sorting by hash, pixel hash, or blurhash is now just a tiny bit faster - -### client api - -* the new `/manage_services/get_pending_counts` command now includes the 'Services Object' in its response -* the client api version is now 67 diff --git a/docs/developer_api.md b/docs/developer_api.md index e80e6d09..41767eb5 100644 --- a/docs/developer_api.md +++ b/docs/developer_api.md @@ -2193,11 +2193,13 @@ Only use one. As with metadata fetching, you may only use the hash argument if y ``` Response: -: The actual path to the file on the host system. +: The actual path to the file on the host system. Filetype and size are included for convenience. ``` json title="Example response" { - "path" : "D:\hydrus_files\f7f\7f30c113810985b69014957c93bc25e8eb4cf3355dae36d8b9d011d8b0cf623a.jpg" + "path" : "D:\hydrus_files\f7f\7f30c113810985b69014957c93bc25e8eb4cf3355dae36d8b9d011d8b0cf623a.jpg", + "filetype" : "image/jpeg", + "size" : 95237 } ``` @@ -2584,7 +2586,7 @@ If there are no potential duplicate groups in the search, this returns an empty ### **POST `/manage_file_relationships/remove_potentials`** { id="manage_file_relationships_remove_potentials" } -Remove all potential pairs that any of the given files are a part of. If you hit [/manage\_file\_relationships/get\_file\_relationships](#get-manage_file_relationshipsget_file_relationships) after this on any of these files, they will have no potential relationships, and any hashes that were potential to them before will no longer, conversely, refer to these files as potentials. +Remove all potential pairs that any of the given files are a part of. If you hit [/manage\_file\_relationships/get\_file\_relationships](#manage_file_relationships_get_file_relationships) after this on any of these files, they will have no potential relationships, and any hashes that were potential to them before will no longer, conversely, refer to these files as potentials. Restricted access: : YES. Manage File Relationships permission needed. diff --git a/docs/duplicates_auto_resolution.md b/docs/duplicates_auto_resolution.md new file mode 100644 index 00000000..5ea00142 --- /dev/null +++ b/docs/duplicates_auto_resolution.md @@ -0,0 +1,47 @@ +--- +title: Filtering Duplicates Automatically +--- + +## Hey, this is a draft for a system that is not yet working, you can ignore it for now + +# the problem with duplicates processing + +The duplicates filter can be pretty tedious to work with. Pairs that have trivial differences are easy to resolve, but working through dozens of obvious resizes or pixel duplicates that all follow the same pattern can get boring. + +If only there were some way to automate common situations! We could have hydrus solve these trivial duplicates in the background, leaving us with less, more interesting work to do. + +## duplicates auto-resolution + +_This is a new system that I am still developing. The plan is to roll out a hardcoded rule that resolves jpeg and png pixel dupes and then iterate on the UI and workflow to let users add their own custom rules. If you try it, let me know how you find things!_ + +So, let's start with a simple and generally non-controversial example: pixel duplicate jpegs and pngs. When you save a jpeg, you get some 'fuzzy' artifacts, but when you save a png, it is always pixel perfect. Thus, when you have a normal jpeg and a png that are pixel duplicates, you _know_, for certain, that the png is a copy of the jpeg. This happens most often when someone is posting from one application to another, or with a phone, and rather than uploading the source jpeg, they do 'copy image' and paste that into the upload box--the browser creates the accursed 'Clipboard.png', and we are thus overwhelmed with spam. + +In this case, we always want to keep the (almost always smaller) jpeg and ditch the (bloated, derived) png, which in the duplicates system would be: + +- A two-part duplicates search, for 'system:filetype is jpg' and 'system:filetype is png', with 'must be pixel dupes'. +- Arranging 'the jpeg is A, the png is B' +- Sending the normal duplicate action of 'set A as better than B, and delete B'. + +Let's check out the 'auto-resolution' tab under the duplicates filtering page: + +(image) + +The auto-resolution system lets you have multiple 'rules'. Each represents a search, a way of testing pairs, and then an action. Let's check the edit dialog: + +(image of edit rules) + +(image of edit rule, png vs jpeg) + +Note that this adds the 'system:height/width > 128' predicates as a failsafe to ensure we are checking real images in this case, not tiny 16x16 icons where there might be a legitimate accidentaly jpeg/png pixel dupe, and where the decision on what to keep is not so simple. Automated systems are powerful magic wands, and we should always be careful waving them around. + +Talk about metadata conditional objects here. + +Talk about the pair Comparator stuff, 4x filesize and so on. Might be more UI, so maybe a picture of the sub-panel. + +Hydrus will work these rules in its normal background maintenance time. You can force them to work a bit harder if you want to catch up somewhere, but normally you can just leave them alone and they'll stay up to date with new imports. + +## future + +I will expand the Metadata Conditional to cover more tests, including most of the hooks in the duplicates filter summaries, like 'this has exif data'. And, assuming the trivial cases go well, I'd like to push toward less-certain comparions and have some sort of tools for 'A is at least 99.7% similar to B', which will help with resize comparisons and differentiating dupes from alternates. + +I'd also eventually like auto-resolution to apply to files _as_ they are imported, so, in the vein of 'previously deleted', you could have an instant import result of 'duplicate discarded: (rule name)'. diff --git a/docs/filetypes.md b/docs/filetypes.md index 2d9c33bf..897d86f7 100644 --- a/docs/filetypes.md +++ b/docs/filetypes.md @@ -43,7 +43,7 @@ The filetype for a file can be overridden with `manage -> force filetype` in the If there are no frame durations provided hydrus will assume each frame should last 125ms. Hydrus will look inside the zip for a file called `animation.json` and try to parse it as the 2 most common metadata formats that PixivUtil and gallery-dl generate. The Ugoira file will only have a duration in the database if it contains a valid `animation.json`. -When played hydrus will first attempt to use the `animation.json` file but if it can't it will look for notes containing frame delays. First it looks for a note named `ugoira json` and attempts to read it like the `animation.json`, it then looks for a note called `ugoira frame delay array` which should be a note containing a simple JSON array, for example: `#!json [90, 90, 40, 90]`. +When played hydrus will first attempt to use the `animation.json` file, but if that does not exist, it will look for notes containing frame delays. First it looks for a note named `ugoira json` and attempts to read it like the `animation.json`, it then looks for a note called `ugoira frame delay array` which should be a note containing a simple JSON array, for example: `#!json [90, 90, 40, 90]`. ## Video diff --git a/docs/getting_started_downloading.md b/docs/getting_started_downloading.md index c3b69c9b..02420e22 100644 --- a/docs/getting_started_downloading.md +++ b/docs/getting_started_downloading.md @@ -134,7 +134,7 @@ A few of the options have more information if you hover over them. : See [multiple file services](advanced_multiple_local_file_services.md), an advanced feature. **post import actions** -: See the [files section on filtering](getting_started_files.md#inbox-and-archive) for the first option, the other two have information if you hover over them. +: See the [files section on filtering](getting_started_files.md#inbox_and_archive) for the first option, the other two have information if you hover over them. ### Tag Parsing By default, hydrus now starts with a local tag service called 'downloader tags' and it will parse (get) all the tags from normal gallery sites and put them in this service. You don't have to do anything, you will get some decent tags. As you use the client, you will figure out which tags you like and where you want them. On the downloader page, click `import options`: @@ -154,6 +154,14 @@ The blacklist button will let you skip downloading files that have certain tags !!! warning The file limit and import options on the upper panel of a gallery or watcher page, if changed, will only apply to **new** queries. If you want to change the options for an existing queue, either do so on its highlight panel below or use the 'set options to queries' button. +#### Force Page Fetch + +By default, hydrus will not revisit web pages or API endpoints for URLs it knows A) refer to one known file only, and B) that file is already in your database or has previously been deleted. The way it navigates this can be a complicated mix of hash and URL data, and in certain logical situations hydrus will determine its own records are untrustworthy and decide to check the source again. This saves bandwidth and time as you run successive queries that include the same results. You should not disable the capability for normal operation. + +But if you mess up your tag import options somewhere and need to re-run a download with forced tag re-fetching, how to do it? + +At the moment, this is in tag import options, the `force page fetch even if...` checkboxes. You can either set up a one-time downloader page with specific tag import options that check both of these checkboxes and then paste URLs in, or you can right-click a selection of thumbnails and have hydrus create the page for you under the _urls->force metadata refetch_ menu. Once you are done with the downloader page, delete it and do not use it for normal jobs--again, this method of downloading is inefficient and should not be used for repeating, long-term, or speculative jobs. Only use it to fill in specific holes. + ### Note Parsing Hydrus alsos parse 'notes' from some sites. This is a young feature, and a little advanced at times, but it generally means the comments that artists leave on certain gallery sites, or something like a tweet text. Notes are editable by you and appear in a hovering window on the right side of the media viewer. diff --git a/docs/getting_started_more_tags.md b/docs/getting_started_more_tags.md index e696b44f..72fbf91f 100644 --- a/docs/getting_started_more_tags.md +++ b/docs/getting_started_more_tags.md @@ -32,7 +32,7 @@ Each tag service comes with its own tags, siblings and parents. The intent is to use this service for tags you yourself want to add. ### Downloader tags -The default [tag parse target](getting_started_downloading.md#parsing). Tags of things you download will end up here unless you change the settings. It's probably a good idea to set up some tag blacklists for tags you don't want. +The default place for tags coming from downloaders. Tags of things you download will end up here unless you change the settings. It is a good idea to set up some tag blacklists for tags you do not want. ## Tag repositories diff --git a/docs/getting_started_tags.md b/docs/getting_started_tags.md index 1aefb221..abe777c1 100644 --- a/docs/getting_started_tags.md +++ b/docs/getting_started_tags.md @@ -11,7 +11,7 @@ So, you have some files imported. Let's give them some tags so we can find them [FAQ: what is a tag?](faq.md#tags) -Your client starts with two [local tags services](getting_started_tags.md#tag_services), called 'my tags' and 'downloader tags' which keep all of their file->tag mappings in your client's database where only you can see them. 'my tags' is a good place to practise. +Your client starts with two [local tags services](getting_started_more_tags.md#tag_services), called 'my tags' and 'downloader tags' which keep all of their file->tag mappings in your client's database where only you can see them. 'my tags' is a good place to practise. Select a file and press F3 to open the _manage tags dialog_: diff --git a/docs/old_changelog.html b/docs/old_changelog.html index 71e91e71..9761b858 100644 --- a/docs/old_changelog.html +++ b/docs/old_changelog.html @@ -34,6 +34,60 @@

changelog