From 608787e4555fad23498badceba420206c0546b79 Mon Sep 17 00:00:00 2001 From: Hydrus Network Developer Date: Wed, 20 Mar 2024 17:28:45 -0500 Subject: [PATCH 1/7] Revert "Fix for damaged URL definitions" This reverts commit 71420cefd2f1bde425a0135c0b4b3debe8ae4609. --- hydrus/client/networking/ClientNetworkingURLClass.py | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/hydrus/client/networking/ClientNetworkingURLClass.py b/hydrus/client/networking/ClientNetworkingURLClass.py index 31508427..57dfcb84 100644 --- a/hydrus/client/networking/ClientNetworkingURLClass.py +++ b/hydrus/client/networking/ClientNetworkingURLClass.py @@ -1049,15 +1049,7 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): num_total_path_components = len( self._path_components ) num_required_parameters = len( [ 1 for parameter in self._parameters if parameter.GetDefaultValue() is None ] ) num_total_parameters = len( self._parameters ) - - try: - - len_example_url = len( self.Normalise( self._example_url, ephemeral_ok = True ) ) - - except: - - len_example_url = len( self._example_url ) - + len_example_url = len( self.Normalise( self._example_url, ephemeral_ok = True ) ) return ( num_required_path_components, num_total_path_components, num_required_parameters, num_total_parameters, len_example_url ) From 9ab079a80f4aab656b11f8a62a3f5f17f5c13469 Mon Sep 17 00:00:00 2001 From: Hydrus Network Developer Date: Wed, 20 Mar 2024 17:28:51 -0500 Subject: [PATCH 2/7] Revert "Fixing URL comparison issue" This reverts commit 1da4a80ac2703e71340e74c80e9a5b755917f715. --- .../client/importing/ClientImportFileSeeds.py | 26 ------------------- 1 file changed, 26 deletions(-) diff --git a/hydrus/client/importing/ClientImportFileSeeds.py b/hydrus/client/importing/ClientImportFileSeeds.py index 37970441..6f274b5e 100644 --- a/hydrus/client/importing/ClientImportFileSeeds.py +++ b/hydrus/client/importing/ClientImportFileSeeds.py @@ -133,18 +133,6 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): self.file_seed_data = file_seed_data self.file_seed_data_for_comparison = file_seed_data - if self.file_seed_type == FILE_SEED_TYPE_URL: - - try: - - self.file_seed_data_for_comparison = CG.client_controller.network_engine.domain_manager.NormaliseURL( self.file_seed_data ) - - except: - - pass - - - self.created = HydrusTime.GetNow() self.modified = self.created self.source_time = None @@ -310,20 +298,6 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): serialisable_hashes ) = serialisable_info - self.file_seed_data_for_comparison = self.file_seed_data - - if self.file_seed_type == FILE_SEED_TYPE_URL: - - try: - - self.file_seed_data_for_comparison = CG.client_controller.network_engine.domain_manager.NormaliseURL( self.file_seed_data ) - - except: - - pass - - - self._external_filterable_tags = set( serialisable_external_filterable_tags ) self._external_additional_service_keys_to_tags = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_external_additional_service_keys_to_tags ) From 4ae4e66a062d1cf4398ca45fa5fceaf0456b3714 Mon Sep 17 00:00:00 2001 From: Hydrus Network Developer Date: Wed, 20 Mar 2024 17:28:54 -0500 Subject: [PATCH 3/7] Revert "Version 567" This reverts commit 35eca25409cbb85e86e236b55bf84ca4ffbc7261. --- docs/PTR.md | 2 +- docs/changelog.md | 75 ++-- docs/old_changelog.html | 32 -- hydrus/client/ClientOptions.py | 1 - hydrus/client/db/ClientDB.py | 25 -- hydrus/client/gui/ClientGUI.py | 2 +- hydrus/client/gui/ClientGUIDownloaders.py | 331 +++++++--------- hydrus/client/gui/ClientGUIFileSeedCache.py | 15 +- hydrus/client/gui/ClientGUIGallerySeedLog.py | 3 +- hydrus/client/gui/ClientGUIMediaActions.py | 2 +- hydrus/client/gui/ClientGUIMediaMenus.py | 5 +- .../gui/ClientGUIScrolledPanelsManagement.py | 8 +- hydrus/client/gui/ClientGUITags.py | 31 +- .../gui/lists/ClientGUIListConstants.py | 2 +- .../networking/ClientGUINetworkJobControl.py | 3 +- .../gui/search/ClientGUIPredicatesSingle.py | 41 +- hydrus/client/gui/widgets/ClientGUICommon.py | 21 +- .../client/importing/ClientImportFileSeeds.py | 14 +- .../importing/ClientImportGallerySeeds.py | 2 +- .../client/importing/ClientImportWatchers.py | 4 +- hydrus/client/metadata/ClientTags.py | 31 +- .../networking/ClientNetworkingDomain.py | 20 +- .../networking/ClientNetworkingFunctions.py | 87 ++-- .../client/networking/ClientNetworkingGUG.py | 70 ++-- .../networking/ClientNetworkingURLClass.py | 375 +++--------------- .../ClientSearchParseSystemPredicates.py | 14 +- hydrus/core/HydrusConstants.py | 2 +- hydrus/core/HydrusSerialisable.py | 1 - hydrus/core/files/HydrusFileHandling.py | 8 +- .../core/files/HydrusOfficeOpenXMLHandling.py | 20 +- .../HydrusNetworkVariableHandling.py | 2 - hydrus/external/SystemPredicateParser.py | 20 +- hydrus/test/TestClientNetworking.py | 67 +--- hydrus/test/TestClientTags.py | 22 +- hydrus/test/TestController.py | 2 +- 35 files changed, 423 insertions(+), 937 deletions(-) diff --git a/docs/PTR.md b/docs/PTR.md index 91a0cc58..06f43f77 100644 --- a/docs/PTR.md +++ b/docs/PTR.md @@ -16,7 +16,7 @@ The easiest method is to use the built in function, found under `help -> add the Once you are connected, Hydrus will proceed to download and then process the update files. The progress of this can be seen under `services -> review services -> remote -> tag repositories -> public tag repository`. Here you can view its status, your account (the default account is a shared public account. Currently only janitors and the administrator have personal accounts), tag status, and how synced you are. Being behind on the sync by a certain amount makes you unable to push tags and petitions until you are caught up again. !!! note "QuickSync 2" - If you are starting out with a completely fresh client, you can instead download a fully pre-synced client [here](https://breadthread.duckdns.org/) Though a little out of date, it will nonetheless save time. Some settings may differ from the defaults of an official installation. + If you are starting out with a completely fresh client, you can instead download a fully pre-synced client [here](https://breadthread.gay/) Though a little out of date, it will nonetheless save time. Some settings may differ from the defaults of an official installation. ## How does it work? For something to end up on the PTR it has to be pushed there. Tags can either be entered into the tag service manually by the user through the `manage tags` window, or be routed there by a parser when downloading files. See [parsing tags](getting_started_downloading.md). Once tags have been entered into the PTR tag service they are pending until pushed. This is indicated by the `pending ()` that will appear between `tags` and `help` in the menu bar. Here you can chose to either push your changes to the PTR or discard them. diff --git a/docs/changelog.md b/docs/changelog.md index 2faafde9..9b4b9b1c 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -7,45 +7,6 @@ title: Changelog !!! note This is the new changelog, only the most recent builds. For all versions, see the [old changelog](old_changelog.html). -## [Version 567](https://github.com/hydrusnetwork/hydrus/releases/tag/v567) - -### user contributions - -* thanks to a user, the new docx, pptx, and xlsx support is improved, with better thumbnails (better ratio, better icon itself, and sometimes an actual preview thumbnail for pptx), better file detection (fewer false positives with stuff like ppt templates), and word count for docx and pptx. I am queueing everyone's existing docx and pptx files for a metadata rescan and thumbnail regen on update -* thanks to a user, the cbz scanner now ignores the `__MACOSX` folder -* thanks to a user, setting the Qt style in *options->style* should be more reliable (fixing some name case sensitivity issues) -* thanks to a user, there's a new 'default' dark mode QSS stylesheet that has nicer valid/invalid colours. we'll build on this and try to detect dark mode better in future and auto-switch to this as the base when the application is in dark mode. - -### misc - -* added a 'tag in reverse' checkbox to the new incremental tagger panel. this simply applies the given iterator to the last file first and then works backwards, e.g. 5, 4, 3, 2, 1 for start=1, step=1 on five files -* all _new_ system:url predicates will have slightly different (standardised) labels, and all these labels should parse correctly in the system predicate parser if you copy/paste -* you should now be able to enter 'system:has url matching regex (regex with upper case)' and 'system:has url (url with upper case)' and it'll propage through parsing. this definitely has not™ broken any other predicate parsing. you can enter url class names with upper case if you want, but url class names should now match regardless of letter case -* you can now open the 'extra info' button (up top of a media viewer) on a jpeg if that jpeg has no exif or other human-readable metadata (to see just the progressive and subsampling info) -* added a new EXPERIMENTAL checkbox to _options->tag presentation_ that will replace emojis and other unicode symbol garbage with □. if you have crazy rendering for emoji stuff, try it out -* the tag summary generators that make thumbnail banners now wash their tags through the 'render tag for user' system, which will apply this new emoji rule and 'replace underscores with spaces' -* updated the QuickSync link to its new home at https://breadthread.duckdns.org/ - -### URL storage/display changes - -* today I correct a foolish decision I made when I first implemented the hydrus downloader engine--handling and storing URLs internally as 'pretty' decoded text, rather than with the proper ugly '%20" stuff you sometimes see. this improves support for weird URLs and makes some behind the scenes things simpler. you do not need to make any changes, but there is a chance some particularly funky URLs will redownload once more if your subscription runs into them again (this change breaks some 'known url' checking logic, since what is stored is now slightly different, but this 99% doesn't affect Post URLs, so no big worries) -* so, URLs are no longer decoded in the normalisation step. they are now saved in the file log as their proper actual 'what is sent to the server' encoded text. it will display in UI as the pretty version, but if you copy to clipboard, you get the data version--pretty much how your web browser address bar works. I have made it show 'pretty' in the file log and search log lists, 'copy url' menu labels, and hyperlink tooltips, but in the more technical 'manage GUGs' and so on, it shows the data version. let me know if I have forgotten to display them pretty anywhere! -* when you paste a URL, some new normalisation tech tries to figure out if it is pre-encoded or not -* there's also some GUG work. when you enter a query text like `male/female` or `blonde_hair%20blue_eyes`, some new logic tries to infer whether what you entered is encoded or not. it should handle pretty much everything well unless you have a single-tag query with a legit percent character in the middle (in which case you'll have to enter `%25` instead, but we'll see if it ever happens) -* these changes simplify the url parsing routine, eliminating plenty of nonsense hackery I've inserted over the years to make things like `6+girls blonde_hair`/`6%2Bgirls+blonde_hair` work with a merged system. this has mostly been a delicate cleanup job; long planned, finally triggered - -### ephemeral URL parameters - -* I was going to roll out 'ephemeral token' parameters, and I basically had it done, but I realised late that I was being stupid in a brand new way, basically expanding the whitelist when turning off the blacklist was a nicer solution. I will work on this more next week, I think ultimately making it so Post URLs are not clipped of undefined parameters before they are is sent to the server, just like for Gallery URLs. I will separately introduce 'I just need to add some random hex in this parameter to tell this cache I want the original' under different tech -* so, I did some behind the scenes URL filtering tech, and file import objects handle full and stripped down versions of Post URLs, but it doesn't do much yet - -### boring cleanup - -* I cleaned up some URL Class code -* the URL Class has a new buddy 'Parameter' class to handle param testing -* rewrote how the query string of a URL is deconstructed and scanned against your parameters. less chance of edge-case errors/merges and easier to expand in future -* brushed up the URL Class unit tests to account for the above changes and added new tests for encoding, ephemeral, and default parameter values (which must have been missed a long time ago) - ## [Version 566](https://github.com/hydrusnetwork/hydrus/releases/tag/v566) ### incremental tagging @@ -445,3 +406,39 @@ title: Changelog * some 'number of tags' queries should be a little faster * the 'tag suggestions' options page has a bit of brushed up UI and some new explanation labels * unified the various thumbnail generation error reporting for all the different filetypes. it should also print the file's hash, too, since most of these error contexts only have a temporary path to talk about at this stage, which isn't useful after the fact + +## [Version 557](https://github.com/hydrusnetwork/hydrus/releases/tag/v557) + +### misc + +* optimised large tag filter edit UI. you can now paste 5,000 items into an empty tag filter blacklist in less than a second, and if you have a big tag filter, removing or adding one thing is now instant (previously, this stuff would lag 4 seconds or more, sometimes multiple minutes!!) +* the ugoira 'num frames' counting method now discludes files ending in .js/.json, to catch future bundling of frame timings +* the cbz scanning tech should now recognise cbzs with four or fewer pages +* a legacy 'is this image all good?' check that happens on PIL-loading is now gone. this improves rendering for a variety of truncated files and clarifies some error messages (previously, this thing was just failing silently) +* fixed the delete file pre-flight logic so users on the non-advanced delete dialog can now delete repository updates. previously, they saw the menu entry, but hitting it was a no-op + +### better hash predicate parsing + +* `system:hash` labels are a little different now. they'll say `system:hash (md5) is abcd...`, with the algorithm after the "hash". hash is omitted for sha256 (the hydrus default). this eases parsing +* `system:similar to data` labels are a little different. they'll say 'distance' instead of 'max hamming', and the number and type of hashes they hold, and if they hold only pixel hashes, the distance is not stated +* `system:hash` predicate parsing is now more flexible. you can put the hash type pretty much anywhere now. +* `system:similar to` and `system:similar to data` predicate parsing is now more flexible. more combinations are allowed, and you can not include distance and it'll be fine +* these three hash predicates now copy to clipboard with all their hashes explicitly enumerated, making strings that are fully parsable! this is a big step forward in a completely sealed import-export predicate parsing loop; now I have the tech set up to export a different phrase to clipboard than what you see in the label, I just need the examples of where it goes wrong. if there is a system predicate that copies to clipboard in a way that won't parse back, let me know and I'll see if I can fix it. +* added more unit tests for this parsing + +### documentation and cleanup + +* wrote a guide on how to install 'Git for Windows' for the 'running from source' help. although most of the settings in its marathon 12-page install wizard can be left as default, the technical questions can be intimidating, so I've written them all out for a nice simple install. also brushed up some of the surrounding help here +* added a warning to the regular 'installing and updating' help regarding the danger of test-running extract releases before updating (you can overwrite your database by accident) +* thanks to a user, the filetypes help document is updated with Ugoira and CBZ info +* all the 'HydrusFiletypeHandling' files are refactored to a new 'files' module. there's a bunch of them these days! +* the hydrus.core.images module is moved beneath this 'files' module too +* the file log list panel right-click menu now says 'open URLs'/'open files' locations' depending on whether you are looking at a URL import log or local HDD import log + +### client api + +* the `file_metadata` call now returns `filetype_forced` and, if so, also `original_mime` to talk about the new forced filetype system +* the client api help and unit tests are updated to test this is working ok +* fixed a typo that was causing too much work in the updated file info manager call (and was often returning 'null' results for half-cached `file_metadata` requests with `only_return_basic_information=true`) +* thanks to a user, the `/add_urls/get_url_info` Client API call now has a cache timeout of ten minutes, and the `/add_urls/get_url_files` call now has a timeout of 30 seconds if all the files are 'already in db'. this should automatically reduce some overhead for several programs that talk to the Client API a lot about URLs +* the client api version is now 58 diff --git a/docs/old_changelog.html b/docs/old_changelog.html index ff8dd63f..2d474a2c 100644 --- a/docs/old_changelog.html +++ b/docs/old_changelog.html @@ -34,38 +34,6 @@

changelog

    -
  • -

    version 567

    -
      -
    • user contributions

    • -
    • thanks to a user, the new docx, pptx, and xlsx support is improved, with better thumbnails (better ratio, better icon itself, and sometimes an actual preview thumbnail for pptx), better file detection (fewer false positives with stuff like ppt templates), and word count for docx and pptx. I am queueing everyone's existing docx and pptx files for a metadata rescan and thumbnail regen on update
    • -
    • thanks to a user, the cbz scanner now ignores the `__MACOSX` folder
    • -
    • thanks to a user, setting the Qt style in *options->style* should be more reliable (fixing some name case sensitivity issues)
    • -
    • thanks to a user, there's a new 'default' dark mode QSS stylesheet that has nicer valid/invalid colours. we'll build on this and try to detect dark mode better in future and auto-switch to this as the base when the application is in dark mode.
    • -
    • misc

    • -
    • added a 'tag in reverse' checkbox to the new incremental tagger panel. this simply applies the given iterator to the last file first and then works backwards, e.g. 5, 4, 3, 2, 1 for start=1, step=1 on five files
    • -
    • all _new_ system:url predicates will have slightly different (standardised) labels, and all these labels should parse correctly in the system predicate parser if you copy/paste
    • -
    • you should now be able to enter 'system:has url matching regex (regex with upper case)' and 'system:has url (url with upper case)' and it'll propage through parsing. this definitely has not™ broken any other predicate parsing. you can enter url class names with upper case if you want, but url class names should now match regardless of letter case
    • -
    • you can now open the 'extra info' button (up top of a media viewer) on a jpeg if that jpeg has no exif or other human-readable metadata (to see just the progressive and subsampling info)
    • -
    • added a new EXPERIMENTAL checkbox to _options->tag presentation_ that will replace emojis and other unicode symbol garbage with □. if you have crazy rendering for emoji stuff, try it out
    • -
    • the tag summary generators that make thumbnail banners now wash their tags through the 'render tag for user' system, which will apply this new emoji rule and 'replace underscores with spaces'
    • -
    • updated the QuickSync link to its new home at https://breadthread.duckdns.org/
    • -
    • URL storage/display changes

    • -
    • today I correct a foolish decision I made when I first implemented the hydrus downloader engine--handling and storing URLs internally as 'pretty' decoded text, rather than with the proper ugly '%20" stuff you sometimes see. this improves support for weird URLs and makes some behind the scenes things simpler. you do not need to make any changes, but there is a chance some particularly funky URLs will redownload once more if your subscription runs into them again (this change breaks some 'known url' checking logic, since what is stored is now slightly different, but this 99% doesn't affect Post URLs, so no big worries)
    • -
    • so, URLs are no longer decoded in the normalisation step. they are now saved in the file log as their proper actual 'what is sent to the server' encoded text. it will display in UI as the pretty version, but if you copy to clipboard, you get the data version--pretty much how your web browser address bar works. I have made it show 'pretty' in the file log and search log lists, 'copy url' menu labels, and hyperlink tooltips, but in the more technical 'manage GUGs' and so on, it shows the data version. let me know if I have forgotten to display them pretty anywhere!
    • -
    • when you paste a URL, some new normalisation tech tries to figure out if it is pre-encoded or not
    • -
    • there's also some GUG work. when you enter a query text like `male/female` or `blonde_hair%20blue_eyes`, some new logic tries to infer whether what you entered is encoded or not. it should handle pretty much everything well unless you have a single-tag query with a legit percent character in the middle (in which case you'll have to enter `%25` instead, but we'll see if it ever happens)
    • -
    • these changes simplify the url parsing routine, eliminating plenty of nonsense hackery I've inserted over the years to make things like `6+girls blonde_hair`/`6%2Bgirls+blonde_hair` work with a merged system. this has mostly been a delicate cleanup job; long planned, finally triggered
    • -
    • ephemeral URL parameters

    • -
    • I was going to roll out 'ephemeral token' parameters, and I basically had it done, but I realised late that I was being stupid in a brand new way, basically expanding the whitelist when turning off the blacklist was a nicer solution. I will work on this more next week, I think ultimately making it so Post URLs are not clipped of undefined parameters before they are is sent to the server, just like for Gallery URLs. I will separately introduce 'I just need to add some random hex in this parameter to tell this cache I want the original' under different tech
    • -
    • so, I did some behind the scenes URL filtering tech, and file import objects handle full and stripped down versions of Post URLs, but it doesn't do much yet
    • -
    • boring cleanup

    • -
    • I cleaned up some URL Class code
    • -
    • the URL Class has a new buddy 'Parameter' class to handle param testing
    • -
    • rewrote how the query string of a URL is deconstructed and scanned against your parameters. less chance of edge-case errors/merges and easier to expand in future
    • -
    • brushed up the URL Class unit tests to account for the above changes and added new tests for encoding, ephemeral, and default parameter values (which must have been missed a long time ago)
    • -
    -
  • version 566

      diff --git a/hydrus/client/ClientOptions.py b/hydrus/client/ClientOptions.py index c2a8a614..f9e18c07 100644 --- a/hydrus/client/ClientOptions.py +++ b/hydrus/client/ClientOptions.py @@ -164,7 +164,6 @@ class ClientOptions( HydrusSerialisable.SerialisableBase ): self._dictionary[ 'booleans' ][ 'show_number_namespaces' ] = True self._dictionary[ 'booleans' ][ 'show_subtag_number_namespaces' ] = True self._dictionary[ 'booleans' ][ 'replace_tag_underscores_with_spaces' ] = False - self._dictionary[ 'booleans' ][ 'replace_tag_emojis_with_boxes' ] = False self._dictionary[ 'booleans' ][ 'verify_regular_https' ] = True diff --git a/hydrus/client/db/ClientDB.py b/hydrus/client/db/ClientDB.py index a526ca92..ee583575 100644 --- a/hydrus/client/db/ClientDB.py +++ b/hydrus/client/db/ClientDB.py @@ -10413,31 +10413,6 @@ class DB( HydrusDB.HydrusDB ): - if version == 566: - - try: - - table_join = self.modules_files_storage.GetTableJoinLimitedByFileDomain( self.modules_services.combined_local_file_service_id, 'files_info', HC.CONTENT_STATUS_CURRENT ) - - hash_ids = self._STL( self._Execute( 'SELECT hash_id FROM {} WHERE mime IN {};'.format( table_join, HydrusData.SplayListForDB( [ HC.APPLICATION_DOCX ] ) ) ) ) - - self.modules_files_maintenance_queue.AddJobs( hash_ids, ClientFiles.REGENERATE_FILE_DATA_JOB_FILE_METADATA ) - - hash_ids = self._STL( self._Execute( 'SELECT hash_id FROM {} WHERE mime IN {};'.format( table_join, HydrusData.SplayListForDB( [ HC.APPLICATION_PPTX ] ) ) ) ) - - self.modules_files_maintenance_queue.AddJobs( hash_ids, ClientFiles.REGENERATE_FILE_DATA_JOB_FILE_METADATA ) - self.modules_files_maintenance_queue.AddJobs( hash_ids, ClientFiles.REGENERATE_FILE_DATA_JOB_FORCE_THUMBNAIL ) - - except Exception as e: - - HydrusData.PrintException( e ) - - message = 'Trying to schedule a document metadata scan failed! Please let hydrus dev know!' - - self.pub_initial_message( message ) - - - self._controller.frame_splash_status.SetTitleText( 'updated db to v{}'.format( HydrusData.ToHumanInt( version + 1 ) ) ) self._Execute( 'UPDATE version SET version = ?;', ( version + 1, ) ) diff --git a/hydrus/client/gui/ClientGUI.py b/hydrus/client/gui/ClientGUI.py index d5501bdf..3f5e4bff 100644 --- a/hydrus/client/gui/ClientGUI.py +++ b/hydrus/client/gui/ClientGUI.py @@ -2213,7 +2213,7 @@ class FrameGUI( CAC.ApplicationCommandProcessorMixin, ClientGUITopLevelWindows.M additional_service_keys_to_tags = ClientTags.ServiceKeysToTags() - url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url, ephemeral_ok = True ) + url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url ) ( url_type, match_name, can_parse, cannot_parse_reason ) = self._controller.network_engine.domain_manager.GetURLParseCapability( url ) diff --git a/hydrus/client/gui/ClientGUIDownloaders.py b/hydrus/client/gui/ClientGUIDownloaders.py index 39259ecc..fb2f3f75 100644 --- a/hydrus/client/gui/ClientGUIDownloaders.py +++ b/hydrus/client/gui/ClientGUIDownloaders.py @@ -366,7 +366,7 @@ class EditGUGPanel( ClientGUIScrolledPanels.EditPanel ): example_url = gug.GetExampleURL() - example_url = CG.client_controller.network_engine.domain_manager.NormaliseURL( example_url, ephemeral_ok = True ) + example_url = CG.client_controller.network_engine.domain_manager.NormaliseURL( example_url ) self._example_url.setText( example_url ) @@ -707,7 +707,7 @@ class EditGUGsPanel( ClientGUIScrolledPanels.EditPanel ): try: - example_url = CG.client_controller.network_engine.domain_manager.NormaliseURL( example_url, ephemeral_ok = True ) + example_url = CG.client_controller.network_engine.domain_manager.NormaliseURL( example_url ) url_class = CG.client_controller.network_engine.domain_manager.GetURLClass( example_url ) @@ -913,96 +913,6 @@ class EditGUGsPanel( ClientGUIScrolledPanels.EditPanel ): return gugs - -class EditURLClassParameterFixedNamePanel( ClientGUIScrolledPanels.EditPanel ): - - def __init__( self, parent: QW.QWidget, parameter: ClientNetworkingURLClass.URLClassParameterFixedName, dupe_names ): - - # maybe graduate this guy to a 'any type of parameter' panel and have a dropdown and show/hide fixed name etc.. - - ClientGUIScrolledPanels.EditPanel.__init__( self, parent ) - - self._dupe_names = dupe_names - - self._fixed_name = QW.QLineEdit( self ) - self._fixed_name.setToolTip( 'The "key" of the key=value pair.' ) - - value_string_match_panel = ClientGUICommon.StaticBox( self, 'value' ) - - from hydrus.client.gui import ClientGUIStringPanels - - self._value_string_match = ClientGUIStringPanels.EditStringMatchPanel( value_string_match_panel, parameter.GetValueStringMatch() ) - self._value_string_match.setToolTip( 'If the value of the key=value pair matches this, the URL Class matches!' ) - - self._default_value = ClientGUICommon.NoneableTextCtrl( self ) - self._default_value.setToolTip( 'If the URL is missing this key=value pair, you can add it here, and the URL Class will still match and will normalise with this default value. This can be useful for gallery URLs that have an implicit page=1 or index=0 for their first result--sometimes it is better to make that stuff explicit.' ) - - # - - self.SetValue( parameter ) - - # - - value_string_match_panel.Add( self._value_string_match, CC.FLAGS_EXPAND_BOTH_WAYS ) - - rows = [] - - rows.append( ( 'name: ', self._fixed_name ) ) - rows.append( value_string_match_panel ) - rows.append( ( 'default value: ', self._default_value ) ) - - gridbox = ClientGUICommon.WrapInGrid( self, rows, add_stretch_at_end = False, expand_single_widgets = True ) - - vbox = QP.VBoxLayout() - - QP.AddToLayout( vbox, gridbox, CC.FLAGS_EXPAND_SIZER_BOTH_WAYS ) - - self.widget().setLayout( vbox ) - - - def _GetValue( self ): - - name = self._fixed_name.text() - - value_string_match = self._value_string_match.GetValue() - default_value = self._default_value.GetValue() - - parameter = ClientNetworkingURLClass.URLClassParameterFixedName( - name = name, - value_string_match = value_string_match, - default_value = default_value - ) - - return parameter - - - def GetValue( self ): - - parameter = self._GetValue() - - name = parameter.GetName() - - if name == '': - - raise HydrusExceptions.VetoException( 'Sorry, you have to set a key/name!' ) - - - if name in self._dupe_names: - - raise HydrusExceptions.VetoException( 'Sorry, your key/name already exists, pick something else!' ) - - - return parameter - - - def SetValue( self, parameter: ClientNetworkingURLClass.URLClassParameterFixedName ): - - self._fixed_name.setText( parameter.GetName() ) - self._value_string_match.SetValue( parameter.GetValueStringMatch() ) - self._default_value.SetValue( parameter.GetDefaultValue() ) - - - class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): def __init__( self, parent: QW.QWidget, url_class: ClientNetworkingURLClass.URLClass ): @@ -1022,14 +932,7 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): self._url_type.addItem( HC.url_type_string_lookup[ u_t ], u_t ) - url_type = url_class.GetURLType() - preferred_scheme = url_class.GetPreferredScheme() - netloc = url_class.GetNetloc() - path_components = url_class.GetPathComponents() - parameters = url_class.GetParameters() - api_lookup_converter = url_class.GetAPILookupConverter() - ( send_referral_url, referral_url_converter ) = url_class.GetReferralURLInfo() - example_url = url_class.GetExampleURL() + ( url_type, preferred_scheme, netloc, path_components, parameters, api_lookup_converter, send_referral_url, referral_url_converter, example_url ) = url_class.ToTuple() self._notebook = ClientGUICommon.BetterNotebook( self ) @@ -1228,12 +1131,6 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): self._example_url_classes = ClientGUICommon.BetterStaticText( self ) - self._ephemeral_normalised_url = QW.QLineEdit( self ) - self._ephemeral_normalised_url.setReadOnly( True ) - self._ephemeral_normalised_url.setToolTip( 'This is what will be sent to the server.' ) - - self._ephemeral_normalised_url.setVisible( False ) - self._normalised_url = QW.QLineEdit( self ) self._normalised_url.setReadOnly( True ) @@ -1269,7 +1166,7 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): self._path_components.AddDatas( path_components ) - self._parameters.AddDatas( parameters ) + self._parameters.AddDatas( list( parameters.items() ) ) self._parameters.Sort() @@ -1364,8 +1261,8 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): rows.append( ( 'if matching by subdomain, keep it when normalising?: ', self._keep_matched_subdomains ) ) rows.append( ( 'alphabetise GET parameters when normalising?: ', self._alphabetise_get_parameters ) ) - rows.append( ( 'do not match on any extra path components?: ', self._no_more_path_components_than_this ) ) - rows.append( ( 'do not match on any extra parameters?: ', self._no_more_parameters_than_this ) ) + rows.append( ( 'do not allow any extra path components?: ', self._no_more_path_components_than_this ) ) + rows.append( ( 'do not allow any extra parameters?: ', self._no_more_parameters_than_this ) ) rows.append( ( 'keep fragment when normalising?: ', self._keep_fragment ) ) rows.append( ( 'post page can produce multiple files?: ', self._can_produce_multiple_files ) ) rows.append( ( 'associate a \'known url\' with resulting files?: ', self._should_be_associated_with_files ) ) @@ -1390,7 +1287,6 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): rows = [] rows.append( ( 'example url: ', self._example_url ) ) - #rows.append( ( 'url sent to the server: ', self._ephemeral_normalised_url ) ) rows.append( ( 'normalised url: ', self._normalised_url ) ) gridbox_2 = ClientGUICommon.WrapInGrid( self, rows ) @@ -1431,25 +1327,11 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): def _AddParameters( self ): - existing_names = self._GetExistingParameterNames() - - parameter = ClientNetworkingURLClass.URLClassParameterFixedName() - - with ClientGUITopLevelWindowsPanels.DialogEdit( self, 'edit parameter' ) as dlg: - - panel = EditURLClassParameterFixedNamePanel( dlg, parameter, existing_names ) - - dlg.SetPanel( panel ) + with ClientGUIDialogs.DialogTextEntry( self, 'edit the key', placeholder = 'key', allow_blank = False ) as dlg: if dlg.exec() == QW.QDialog.Accepted: - parameter = panel.GetValue() - - self._parameters.AddDatas( ( parameter, ) ) - - self._parameters.Sort() - - self._UpdateControls() + key = dlg.GetValue() else: @@ -1457,6 +1339,66 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): + existing_keys = self._GetExistingKeys() + + if key in existing_keys: + + ClientGUIDialogsMessage.ShowWarning( self, 'That key already exists!' ) + + return + + + string_match = ClientStrings.StringMatch() + + with ClientGUITopLevelWindowsPanels.DialogEdit( self, 'edit value' ) as dlg: + + from hydrus.client.gui import ClientGUIStringPanels + + panel = ClientGUIStringPanels.EditStringMatchPanel( dlg, string_match ) + + dlg.SetPanel( panel ) + + if dlg.exec() == QW.QDialog.Accepted: + + string_match = panel.GetValue() + + with ClientGUIDialogs.DialogTextEntry( self, 'Enter optional \'default\' value for this parameter, which will be filled in if missing. Leave blank for none (recommended).', allow_blank = True ) as dlg_default: + + if dlg_default.exec() == QW.QDialog.Accepted: + + default = dlg_default.GetValue() + + if default == '': + + default = None + + elif not string_match.Matches( default ): + + ClientGUIDialogsMessage.ShowWarning( self, 'That default does not match the given rule! Clearing it to none!' ) + + default = None + + + else: + + return + + + + else: + + return + + + + data = ( key, ( string_match, default ) ) + + self._parameters.AddDatas( ( data, ) ) + + self._parameters.Sort() + + self._UpdateControls() + def _AddPathComponent( self ): @@ -1466,31 +1408,23 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): return self._EditPathComponent( ( string_match, default ) ) - def _ConvertParameterToListCtrlTuples( self, parameter: ClientNetworkingURLClass.URLClassParameterFixedName ): + def _ConvertParameterToListCtrlTuples( self, data ): - name = parameter.GetName() - value_string_match = parameter.GetValueStringMatch() + ( key, ( string_match, default ) ) = data - pretty_name = name - pretty_value_string_match = value_string_match.ToString() + pretty_key = key + pretty_string_match = string_match.ToString() - default_value = parameter.GetDefaultValue() - - if default_value is not None: + if default is not None: - pretty_value_string_match += f' (default "{default_value}")' + pretty_string_match += ' (default "' + default + '")' - if parameter.IsEphemeralToken(): - - pretty_value_string_match += ' (is ephemeral)' - + sort_key = pretty_key + sort_string_match = pretty_string_match - sort_name = pretty_name - sort_string_match = pretty_value_string_match - - display_tuple = ( pretty_name, pretty_value_string_match ) - sort_tuple = ( sort_name, sort_string_match ) + display_tuple = ( pretty_key, pretty_string_match ) + sort_tuple = ( sort_key, sort_string_match ) return ( display_tuple, sort_tuple ) @@ -1524,28 +1458,86 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): for parameter in selected_params: - existing_names = set( self._GetExistingParameterNames() ) + ( original_key, ( original_string_match, original_default ) ) = parameter - existing_names.discard( parameter.GetName() ) + with ClientGUIDialogs.DialogTextEntry( self, 'edit the key', default = original_key, allow_blank = False ) as dlg: + + if dlg.exec() == QW.QDialog.Accepted: + + key = dlg.GetValue() + + else: + + return + + + + if key != original_key: + + existing_keys = self._GetExistingKeys() + + if key in existing_keys: + + ClientGUIDialogsMessage.ShowWarning( self, 'That key already exists!' ) + + return + + with ClientGUITopLevelWindowsPanels.DialogEdit( self, 'edit value' ) as dlg: - panel = EditURLClassParameterFixedNamePanel( self, parameter, existing_names ) + from hydrus.client.gui import ClientGUIStringPanels + + panel = ClientGUIStringPanels.EditStringMatchPanel( dlg, original_string_match ) dlg.SetPanel( panel ) if dlg.exec() == QW.QDialog.Accepted: - edited_parameter = panel.GetValue() + string_match = panel.GetValue() - self._parameters.DeleteDatas( ( parameter, ) ) + if original_default is None: + + original_default = '' + - self._parameters.AddDatas( ( edited_parameter, ) ) + with ClientGUIDialogs.DialogTextEntry( self, 'Enter optional \'default\' value for this parameter, which will be filled in if missing. Leave blank for none (recommended).', default = original_default, allow_blank = True ) as dlg_default: + + if dlg_default.exec() == QW.QDialog.Accepted: + + default = dlg_default.GetValue() + + if default == '': + + default = None + + elif not string_match.Matches( default ): + + ClientGUIDialogsMessage.ShowWarning( self, 'That default does not match the given rule! Clearing it to none!' ) + + default = None + + + else: + + return + + - edited_datas.append( edited_parameter ) + else: + + return + self._parameters.DeleteDatas( ( parameter, ) ) + + new_parameter = ( key, ( string_match, default ) ) + + self._parameters.AddDatas( ( new_parameter, ) ) + + edited_datas.append( new_parameter ) + self._parameters.SelectDatas( edited_datas ) @@ -1605,13 +1597,13 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): - def _GetExistingParameterNames( self ) -> typing.Set[ str ]: + def _GetExistingKeys( self ): - parameters = self._parameters.GetData() + params = self._parameters.GetData() - fixed_names = { parameter.GetName() for parameter in parameters if isinstance( parameter, ClientNetworkingURLClass.URLClassParameterFixedName ) } + keys = { key for ( key, string_match ) in params } - return fixed_names + return keys def _GetValue( self ): @@ -1622,7 +1614,7 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): preferred_scheme = self._preferred_scheme.GetValue() netloc = self._netloc.text() path_components = self._path_components.GetData() - parameters = self._parameters.GetData() + parameters = dict( self._parameters.GetData() ) has_single_value_parameters = self._has_single_value_parameters.isChecked() single_value_parameters_string_match = self._single_value_parameters_string_match.GetValue() header_overrides = self._header_overrides.GetValue() @@ -1704,16 +1696,11 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): - for parameter in self._parameters.GetData(): + for ( index, ( key, ( string_match, default ) ) ) in enumerate( self._parameters.GetData() ): - if isinstance( parameter, ClientNetworkingURLClass.URLClassParameterFixedName ): + if True in ( string_match.Matches( n ) for n in ( '0', '1', '10', '100', '42' ) ): - if True in ( parameter.MatchesValue( n ) for n in ( '0', '1', '10', '100', '42' ) ): - - name = parameter.GetName() - - choices.append( ( f'{name} parameter', ( ClientNetworkingURLClass.GALLERY_INDEX_TYPE_PARAMETER, name ) ) ) - + choices.append( ( key + ' parameter', ( ClientNetworkingURLClass.GALLERY_INDEX_TYPE_PARAMETER, key ) ) ) @@ -1791,21 +1778,6 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): self._normalised_url.setText( normalised ) - ephemeral_normalised = url_class.Normalise( example_url, ephemeral_ok = True ) - - if ephemeral_normalised != normalised: - - self._ephemeral_normalised_url.setText( ephemeral_normalised ) - - self._ephemeral_normalised_url.setEnabled( True ) - - else: - - self._ephemeral_normalised_url.setText( '' ) - - self._ephemeral_normalised_url.setEnabled( False ) - - self._referral_url_converter.SetExampleString( normalised ) self._api_lookup_converter.SetExampleString( normalised ) @@ -1909,7 +1881,6 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): self._example_url_classes.setText( 'Example does not match - '+reason ) self._example_url_classes.setObjectName( 'HydrusInvalid' ) - self._ephemeral_normalised_url.clear() self._normalised_url.clear() self._api_url.clear() diff --git a/hydrus/client/gui/ClientGUIFileSeedCache.py b/hydrus/client/gui/ClientGUIFileSeedCache.py index d21c2e34..d9a5cac5 100644 --- a/hydrus/client/gui/ClientGUIFileSeedCache.py +++ b/hydrus/client/gui/ClientGUIFileSeedCache.py @@ -32,7 +32,6 @@ from hydrus.client.importing import ClientImportFileSeeds from hydrus.client.importing.options import PresentationImportOptions from hydrus.client.metadata import ClientContentUpdates from hydrus.client.metadata import ClientTagSorting -from hydrus.client.networking import ClientNetworkingFunctions def ClearFileSeeds( win: QW.QWidget, file_seed_cache: ClientImportFileSeeds.FileSeedCache, statuses_to_remove ): @@ -359,7 +358,7 @@ class EditFileSeedCachePanel( ClientGUIScrolledPanels.EditPanel ): QP.CallAfter( self._UpdateText ) - def _ConvertFileSeedToListCtrlTuples( self, file_seed: ClientImportFileSeeds.FileSeed ): + def _ConvertFileSeedToListCtrlTuples( self, file_seed ): try: @@ -374,22 +373,14 @@ class EditFileSeedCachePanel( ClientGUIScrolledPanels.EditPanel ): pretty_file_seed_index = '--' - file_seed_data = file_seed.file_seed_data_for_comparison + file_seed_data = file_seed.file_seed_data status = file_seed.status added = file_seed.created modified = file_seed.modified source_time = file_seed.source_time note = file_seed.note - if file_seed.file_seed_type == ClientImportFileSeeds.FILE_SEED_TYPE_URL: - - pretty_file_seed_data = ClientNetworkingFunctions.ConvertURLToHumanString( file_seed_data ) - - else: - - pretty_file_seed_data = file_seed_data - - + pretty_file_seed_data = str( file_seed_data ) pretty_status = CC.status_string_lookup[ status ] if status != CC.STATUS_UNKNOWN else '' pretty_added = ClientTime.TimestampToPrettyTimeDelta( added ) pretty_modified = ClientTime.TimestampToPrettyTimeDelta( modified ) diff --git a/hydrus/client/gui/ClientGUIGallerySeedLog.py b/hydrus/client/gui/ClientGUIGallerySeedLog.py index bfe4b9ef..fbfe8e00 100644 --- a/hydrus/client/gui/ClientGUIGallerySeedLog.py +++ b/hydrus/client/gui/ClientGUIGallerySeedLog.py @@ -24,7 +24,6 @@ from hydrus.client.gui.lists import ClientGUIListConstants as CGLC from hydrus.client.gui.lists import ClientGUIListCtrl from hydrus.client.gui.widgets import ClientGUICommon from hydrus.client.importing import ClientImportGallerySeeds -from hydrus.client.networking import ClientNetworkingFunctions def ClearGallerySeeds( win: QW.QWidget, gallery_seed_log: ClientImportGallerySeeds.GallerySeedLog, statuses_to_remove, gallery_type_string ): @@ -298,7 +297,7 @@ class EditGallerySeedLogPanel( ClientGUIScrolledPanels.EditPanel ): note = gallery_seed.note pretty_gallery_seed_index = HydrusData.ToHumanInt( gallery_seed_index ) - pretty_url = ClientNetworkingFunctions.ConvertURLToHumanString( url ) + pretty_url = url pretty_status = CC.status_string_lookup[ status ] if status != CC.STATUS_UNKNOWN else '' pretty_added = ClientTime.TimestampToPrettyTimeDelta( added ) pretty_modified = ClientTime.TimestampToPrettyTimeDelta( modified ) diff --git a/hydrus/client/gui/ClientGUIMediaActions.py b/hydrus/client/gui/ClientGUIMediaActions.py index 25da4b39..430cc7cb 100644 --- a/hydrus/client/gui/ClientGUIMediaActions.py +++ b/hydrus/client/gui/ClientGUIMediaActions.py @@ -857,7 +857,7 @@ def ShowFileEmbeddedMetadata( win: QW.QWidget, media: ClientMedia.MediaSingleton - if exif_dict is None and file_text is None and len( extra_rows ) == 0: + if exif_dict is None and file_text is None: ClientGUIDialogsMessage.ShowWarning( win, 'Sorry, could not see any human-readable information in this file! Hydrus should have known this, so if this keeps happening, you may need to schedule a rescan of this info in file maintenance.' ) diff --git a/hydrus/client/gui/ClientGUIMediaMenus.py b/hydrus/client/gui/ClientGUIMediaMenus.py index c089798c..528ac207 100644 --- a/hydrus/client/gui/ClientGUIMediaMenus.py +++ b/hydrus/client/gui/ClientGUIMediaMenus.py @@ -18,7 +18,6 @@ from hydrus.client.gui import ClientGUIMedia from hydrus.client.gui import ClientGUIMenus from hydrus.client.media import ClientMedia from hydrus.client.media import ClientMediaManagers -from hydrus.client.networking import ClientNetworkingFunctions def AddDuplicatesMenu( win: QW.QWidget, menu: QW.QMenu, location_context: ClientLocation.LocationContext, focus_singleton: ClientMedia.Media, num_selected: int, collections_selected: bool ): @@ -380,7 +379,7 @@ def AddKnownURLsViewCopyMenu( win, menu, focus_media, selected_media = None ): else: - label = url_class.GetName() + ': ' + ClientNetworkingFunctions.ConvertURLToHumanString( url ) + label = url_class.GetName() + ': ' + url focus_matched_labels_and_urls.append( ( label, url ) ) @@ -391,7 +390,7 @@ def AddKnownURLsViewCopyMenu( win, menu, focus_media, selected_media = None ): focus_labels_and_urls = list( focus_matched_labels_and_urls ) - focus_labels_and_urls.extend( ( ( ClientNetworkingFunctions.ConvertURLToHumanString( url ), url ) for url in focus_unmatched_urls ) ) + focus_labels_and_urls.extend( ( ( url, url ) for url in focus_unmatched_urls ) ) # figure out which urls these selected files have diff --git a/hydrus/client/gui/ClientGUIScrolledPanelsManagement.py b/hydrus/client/gui/ClientGUIScrolledPanelsManagement.py index f89bcdf7..0ccc447b 100644 --- a/hydrus/client/gui/ClientGUIScrolledPanelsManagement.py +++ b/hydrus/client/gui/ClientGUIScrolledPanelsManagement.py @@ -4222,9 +4222,6 @@ class ManageOptionsPanel( ClientGUIScrolledPanels.ManagePanel ): self._replace_tag_underscores_with_spaces = QW.QCheckBox( render_panel ) - self._replace_tag_emojis_with_boxes = QW.QCheckBox( render_panel ) - self._replace_tag_emojis_with_boxes.setToolTip( 'This will replace emojis and weird symbols with □ in front-facing user views, in case you are getting crazy rendering. It may break some CJK punctuation.' ) - # namespace_colours_panel = ClientGUICommon.StaticBox( self, 'namespace colours' ) @@ -4242,7 +4239,6 @@ class ManageOptionsPanel( ClientGUIScrolledPanels.ManagePanel ): self._show_subtag_number_namespaces.setChecked( new_options.GetBoolean( 'show_subtag_number_namespaces' ) ) self._namespace_connector.setText( new_options.GetString( 'namespace_connector' ) ) self._replace_tag_underscores_with_spaces.setChecked( new_options.GetBoolean( 'replace_tag_underscores_with_spaces' ) ) - self._replace_tag_emojis_with_boxes.setChecked( new_options.GetBoolean( 'replace_tag_emojis_with_boxes' ) ) self._sibling_connector.setText( new_options.GetString( 'sibling_connector' ) ) self._fade_sibling_connector.setChecked( new_options.GetBoolean( 'fade_sibling_connector' ) ) self._sibling_connector_custom_namespace_colour.SetValue( new_options.GetNoneableString( 'sibling_connector_custom_namespace_colour' ) ) @@ -4291,7 +4287,6 @@ class ManageOptionsPanel( ClientGUIScrolledPanels.ManagePanel ): rows.append( ( 'OR connecting string: ', self._or_connector ) ) rows.append( ( 'Namespace for the colour of the OR connecting string: ', self._or_connector_custom_namespace_colour ) ) rows.append( ( 'EXPERIMENTAL: Replace all underscores with spaces: ', self._replace_tag_underscores_with_spaces ) ) - rows.append( ( 'EXPERIMENTAL: Replace all emojis with □: ', self._replace_tag_emojis_with_boxes ) ) gridbox = ClientGUICommon.WrapInGrid( render_panel, rows ) @@ -4399,7 +4394,6 @@ class ManageOptionsPanel( ClientGUIScrolledPanels.ManagePanel ): self._new_options.SetBoolean( 'show_subtag_number_namespaces', self._show_subtag_number_namespaces.isChecked() ) self._new_options.SetString( 'namespace_connector', self._namespace_connector.text() ) self._new_options.SetBoolean( 'replace_tag_underscores_with_spaces', self._replace_tag_underscores_with_spaces.isChecked() ) - self._new_options.SetBoolean( 'replace_tag_emojis_with_boxes', self._replace_tag_emojis_with_boxes.isChecked() ) self._new_options.SetString( 'sibling_connector', self._sibling_connector.text() ) self._new_options.SetBoolean( 'fade_sibling_connector', self._fade_sibling_connector.isChecked() ) @@ -5205,7 +5199,7 @@ class ManageURLsPanel( CAC.ApplicationCommandProcessorMixin, ClientGUIScrolledPa try: - normalised_url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url, ephemeral_ok = True ) + normalised_url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url ) normalised_urls.append( normalised_url ) diff --git a/hydrus/client/gui/ClientGUITags.py b/hydrus/client/gui/ClientGUITags.py index 27874909..bd971570 100644 --- a/hydrus/client/gui/ClientGUITags.py +++ b/hydrus/client/gui/ClientGUITags.py @@ -1976,10 +1976,6 @@ class IncrementalTaggingPanel( ClientGUIScrolledPanels.EditPanel ): initial_suffix = CG.client_controller.new_options.GetString( 'last_incremental_tagging_suffix' ) self._suffix.setText( initial_suffix ) - self._tag_in_reverse = QW.QCheckBox( self ) - tt = 'Tag the last file first and work backwards, e.g. for start=1, step=1 on five files, set 5, 4, 3, 2, 1.' - self._tag_in_reverse.setToolTip( tt ) - initial_start = self._GetInitialStart() self._start = ClientGUICommon.BetterSpinBox( self, initial = initial_start, min = -10000000, max = 10000000 ) @@ -2003,7 +1999,6 @@ class IncrementalTaggingPanel( ClientGUIScrolledPanels.EditPanel ): rows.append( ( 'step: ', self._step ) ) rows.append( ( 'prefix: ', self._prefix ) ) rows.append( ( 'suffix: ', self._suffix ) ) - rows.append( ( 'tag in reverse: ', self._tag_in_reverse ) ) gridbox = ClientGUICommon.WrapInGrid( self, rows ) @@ -2022,7 +2017,6 @@ class IncrementalTaggingPanel( ClientGUIScrolledPanels.EditPanel ): self._suffix.textChanged.connect( self._UpdateSuffix ) self._start.valueChanged.connect( self._UpdateSummary ) self._step.valueChanged.connect( self._UpdateSummary ) - self._tag_in_reverse.clicked.connect( self._UpdateSummary ) self._UpdateSummary() @@ -2058,14 +2052,7 @@ class IncrementalTaggingPanel( ClientGUIScrolledPanels.EditPanel ): result = [] - medias = list( self._medias ) - - if self._tag_in_reverse.isChecked(): - - medias.reverse() - - - for ( i, media ) in enumerate( medias ): + for ( i, media ) in enumerate( self._medias ): number = start + i * step @@ -2076,11 +2063,6 @@ class IncrementalTaggingPanel( ClientGUIScrolledPanels.EditPanel ): result.append( ( media, tag ) ) - if self._tag_in_reverse.isChecked(): - - result.reverse() - - return result @@ -2168,14 +2150,7 @@ class IncrementalTaggingPanel( ClientGUIScrolledPanels.EditPanel ): else: - if self._tag_in_reverse.isChecked(): - - tag_summary = medias_and_tags[0][1] + f' {HC.UNICODE_ELLIPSIS} ' + ', '.join( ( tag for ( media, tag ) in medias_and_tags[-3:] ) ) - - else: - - tag_summary = ', '.join( ( tag for ( media, tag ) in medias_and_tags[:3] ) ) + f' {HC.UNICODE_ELLIPSIS} ' + medias_and_tags[-1][1] - + tag_summary = ', '.join( ( tag for ( media, tag ) in medias_and_tags[:3] ) ) + f' {HC.UNICODE_ELLIPSIS} ' + medias_and_tags[-1][1] # @@ -6407,8 +6382,6 @@ class TagSummaryGenerator( HydrusSerialisable.SerialisableBase ): if namespace in self._interesting_namespaces: - subtag = ClientTags.RenderTag( subtag, render_for_user = True ) - namespaces_to_subtags[ namespace ].append( subtag ) diff --git a/hydrus/client/gui/lists/ClientGUIListConstants.py b/hydrus/client/gui/lists/ClientGUIListConstants.py index 0011f2c0..f1aa58b9 100644 --- a/hydrus/client/gui/lists/ClientGUIListConstants.py +++ b/hydrus/client/gui/lists/ClientGUIListConstants.py @@ -835,7 +835,7 @@ class COLUMN_LIST_URL_CLASS_PATH_COMPONENTS( COLUMN_LIST_DEFINITION ): column_list_type_name_lookup[ COLUMN_LIST_URL_CLASS_PATH_COMPONENTS.ID ] = 'url class path components' -register_column_type( COLUMN_LIST_URL_CLASS_PATH_COMPONENTS.ID, COLUMN_LIST_URL_CLASS_PATH_COMPONENTS.KEY, 'name', False, 14, True ) +register_column_type( COLUMN_LIST_URL_CLASS_PATH_COMPONENTS.ID, COLUMN_LIST_URL_CLASS_PATH_COMPONENTS.KEY, 'key', False, 14, True ) register_column_type( COLUMN_LIST_URL_CLASS_PATH_COMPONENTS.ID, COLUMN_LIST_URL_CLASS_PATH_COMPONENTS.VALUE, 'value', False, 45, True ) default_column_list_sort_lookup[ COLUMN_LIST_URL_CLASS_PATH_COMPONENTS.ID ] = ( COLUMN_LIST_URL_CLASS_PATH_COMPONENTS.KEY, True ) diff --git a/hydrus/client/gui/networking/ClientGUINetworkJobControl.py b/hydrus/client/gui/networking/ClientGUINetworkJobControl.py index 7b5bc4cc..b0dbde3f 100644 --- a/hydrus/client/gui/networking/ClientGUINetworkJobControl.py +++ b/hydrus/client/gui/networking/ClientGUINetworkJobControl.py @@ -18,7 +18,6 @@ from hydrus.client.gui import QtPorting as QP from hydrus.client.gui.networking import ClientGUINetwork from hydrus.client.gui.widgets import ClientGUICommon from hydrus.client.networking import ClientNetworkingContexts -from hydrus.client.networking import ClientNetworkingFunctions from hydrus.client.networking import ClientNetworkingJobs class NetworkJobControl( QW.QFrame ): @@ -108,7 +107,7 @@ class NetworkJobControl( QW.QFrame ): url = self._network_job.GetURL() - ClientGUIMenus.AppendMenuLabel( menu, ClientNetworkingFunctions.ConvertURLToHumanString( url ), copy_text = url, description = 'copy URL to the clipboard' ) + ClientGUIMenus.AppendMenuLabel( menu, url, description = 'copy URL to the clipboard' ) ClientGUIMenus.AppendSeparator( menu ) diff --git a/hydrus/client/gui/search/ClientGUIPredicatesSingle.py b/hydrus/client/gui/search/ClientGUIPredicatesSingle.py index 4eee2221..5ff14f46 100644 --- a/hydrus/client/gui/search/ClientGUIPredicatesSingle.py +++ b/hydrus/client/gui/search/ClientGUIPredicatesSingle.py @@ -1324,9 +1324,9 @@ class PanelPredicateSystemKnownURLsExactURL( PanelPredicateSystemSingle ): hbox = QP.HBoxLayout() - QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'system:'), CC.FLAGS_CENTER_PERPENDICULAR ) + QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'system:known url'), CC.FLAGS_CENTER_PERPENDICULAR ) QP.AddToLayout( hbox, self._operator, CC.FLAGS_CENTER_PERPENDICULAR ) - QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,' url '), CC.FLAGS_CENTER_PERPENDICULAR ) + QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'exact url:'), CC.FLAGS_CENTER_PERPENDICULAR ) QP.AddToLayout( hbox, self._exact_url, CC.FLAGS_EXPAND_BOTH_WAYS ) self.setLayout( hbox ) @@ -1348,11 +1348,11 @@ class PanelPredicateSystemKnownURLsExactURL( PanelPredicateSystemSingle ): if operator: - operator_description = 'has url ' + operator_description = 'has url: ' else: - operator_description = 'does not have url ' + operator_description = 'does not have url: ' rule_type = 'exact_match' @@ -1396,9 +1396,9 @@ class PanelPredicateSystemKnownURLsDomain( PanelPredicateSystemSingle ): hbox = QP.HBoxLayout() - QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'system:'), CC.FLAGS_CENTER_PERPENDICULAR ) + QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'system:known url'), CC.FLAGS_CENTER_PERPENDICULAR ) QP.AddToLayout( hbox, self._operator, CC.FLAGS_CENTER_PERPENDICULAR ) - QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,' url with domain '), CC.FLAGS_CENTER_PERPENDICULAR ) + QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'a url with domain:'), CC.FLAGS_CENTER_PERPENDICULAR ) QP.AddToLayout( hbox, self._domain, CC.FLAGS_EXPAND_BOTH_WAYS ) self.setLayout( hbox ) @@ -1420,11 +1420,11 @@ class PanelPredicateSystemKnownURLsDomain( PanelPredicateSystemSingle ): if operator: - operator_description = 'has url with domain ' + operator_description = 'has a url with domain: ' else: - operator_description = 'does not have url with domain ' + operator_description = 'does not have a url with domain: ' rule_type = 'domain' @@ -1466,9 +1466,9 @@ class PanelPredicateSystemKnownURLsRegex( PanelPredicateSystemSingle ): hbox = QP.HBoxLayout() - QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'system:'), CC.FLAGS_CENTER_PERPENDICULAR ) + QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'system:known url'), CC.FLAGS_CENTER_PERPENDICULAR ) QP.AddToLayout( hbox, self._operator, CC.FLAGS_CENTER_PERPENDICULAR ) - QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,' url that matches regex '), CC.FLAGS_CENTER_PERPENDICULAR ) + QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'a url that matches this regex:'), CC.FLAGS_CENTER_PERPENDICULAR ) QP.AddToLayout( hbox, self._regex, CC.FLAGS_EXPAND_BOTH_WAYS ) self.setLayout( hbox ) @@ -1504,11 +1504,11 @@ class PanelPredicateSystemKnownURLsRegex( PanelPredicateSystemSingle ): if operator: - operator_description = 'has url matching regex ' + operator_description = 'has a url matching regex: ' else: - operator_description = 'does not have url matching regex ' + operator_description = 'does not have a url matching regex: ' rule_type = 'regex' @@ -1558,9 +1558,9 @@ class PanelPredicateSystemKnownURLsURLClass( PanelPredicateSystemSingle ): hbox = QP.HBoxLayout() - QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'system:'), CC.FLAGS_CENTER_PERPENDICULAR ) + QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'system:known url'), CC.FLAGS_CENTER_PERPENDICULAR ) QP.AddToLayout( hbox, self._operator, CC.FLAGS_CENTER_PERPENDICULAR ) - QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,' url matching class '), CC.FLAGS_CENTER_PERPENDICULAR ) + QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'url matching this class:'), CC.FLAGS_CENTER_PERPENDICULAR ) QP.AddToLayout( hbox, self._url_classes, CC.FLAGS_EXPAND_BOTH_WAYS ) self.setLayout( hbox ) @@ -1580,15 +1580,22 @@ class PanelPredicateSystemKnownURLsURLClass( PanelPredicateSystemSingle ): operator = self._operator.GetValue() + if operator: + + operator_description = 'has ' + + else: + + operator_description = 'does not have ' + + rule_type = 'url_class' url_class = self._url_classes.GetValue() rule = url_class - url_class_name = url_class.GetName() - - description = f'has url with class {url_class_name}' if operator else f'does not have url with class {url_class_name}' + description = operator_description + url_class.GetName() + ' url' predicates = ( ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( operator, rule_type, rule, description ) ), ) diff --git a/hydrus/client/gui/widgets/ClientGUICommon.py b/hydrus/client/gui/widgets/ClientGUICommon.py index 94d8ca6e..d8e7248a 100644 --- a/hydrus/client/gui/widgets/ClientGUICommon.py +++ b/hydrus/client/gui/widgets/ClientGUICommon.py @@ -22,7 +22,6 @@ from hydrus.client.gui import ClientGUIMenus from hydrus.client.gui import ClientGUIShortcuts from hydrus.client.gui import QtPorting as QP from hydrus.client.gui.widgets import ClientGUIColourPicker -from hydrus.client.networking import ClientNetworkingFunctions def AddGridboxStretchSpacer( win: QW.QWidget, layout: QW.QGridLayout ): @@ -31,7 +30,7 @@ def AddGridboxStretchSpacer( win: QW.QWidget, layout: QW.QGridLayout ): QP.AddToLayout( layout, widget, CC.FLAGS_CENTER_PERPENDICULAR_EXPAND_DEPTH ) -def WrapInGrid( parent, rows, expand_text = False, add_stretch_at_end = True, expand_single_widgets = False ): +def WrapInGrid( parent, rows, expand_text = False, add_stretch_at_end = True ): gridbox = QP.GridLayout( cols = 2 ) @@ -119,23 +118,10 @@ def WrapInGrid( parent, rows, expand_text = False, add_stretch_at_end = True, ex gridbox.next_col = 0 h_policy = QW.QSizePolicy.Expanding - - if expand_single_widgets: - - v_policy = QW.QSizePolicy.Expanding - - else: - - v_policy = QW.QSizePolicy.Fixed - + v_policy = QW.QSizePolicy.Fixed control.setSizePolicy( h_policy, v_policy ) - if expand_single_widgets: - - gridbox.setRowStretch( gridbox.rowCount() - 1, 1 ) - - if add_stretch_at_end: @@ -145,7 +131,6 @@ def WrapInGrid( parent, rows, expand_text = False, add_stretch_at_end = True, ex return gridbox - def WrapInText( control, parent, text, object_name = None ): hbox = QP.HBoxLayout() @@ -714,7 +699,7 @@ class BetterHyperLink( BetterStaticText ): self._url = url - self.setToolTip( ClientNetworkingFunctions.ConvertURLToHumanString( self._url ) ) + self.setToolTip( self._url ) self.setTextFormat( QC.Qt.RichText ) self.setTextInteractionFlags( QC.Qt.LinksAccessibleByMouse | QC.Qt.LinksAccessibleByKeyboard ) diff --git a/hydrus/client/importing/ClientImportFileSeeds.py b/hydrus/client/importing/ClientImportFileSeeds.py index 6f274b5e..6ac60445 100644 --- a/hydrus/client/importing/ClientImportFileSeeds.py +++ b/hydrus/client/importing/ClientImportFileSeeds.py @@ -131,7 +131,6 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): self.file_seed_type = file_seed_type self.file_seed_data = file_seed_data - self.file_seed_data_for_comparison = file_seed_data self.created = HydrusTime.GetNow() self.modified = self.created @@ -166,7 +165,7 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): def __hash__( self ): - return ( self.file_seed_type, self.file_seed_data_for_comparison ).__hash__() + return ( self.file_seed_type, self.file_seed_data ).__hash__() def __ne__( self, other ): @@ -186,7 +185,6 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): if self.file_seed_type == FILE_SEED_TYPE_URL: urls.discard( self.file_seed_data ) - urls.discard( self.file_seed_data_for_comparison ) if self._referral_url is not None: @@ -212,7 +210,6 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): if self.file_seed_type == FILE_SEED_TYPE_URL: all_primary_urls.add( self.file_seed_data ) - all_primary_urls.add( self.file_seed_data_for_comparison ) if self._referral_url is not None: @@ -887,7 +884,7 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): if self.file_seed_type == FILE_SEED_TYPE_URL: - urls.append( self.file_seed_data_for_comparison ) + urls.append( self.file_seed_data ) if file_url is not None: @@ -956,7 +953,7 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): if self.file_seed_type == FILE_SEED_TYPE_URL: - search_urls = ClientNetworkingFunctions.GetSearchURLs( self.file_seed_data_for_comparison ) + search_urls = ClientNetworkingFunctions.GetSearchURLs( self.file_seed_data ) search_file_seeds = [ FileSeed( FILE_SEED_TYPE_URL, search_url ) for search_url in search_urls ] @@ -1140,8 +1137,7 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): try: - self.file_seed_data = CG.client_controller.network_engine.domain_manager.NormaliseURL( self.file_seed_data, ephemeral_ok = True ) - self.file_seed_data_for_comparison = CG.client_controller.network_engine.domain_manager.NormaliseURL( self.file_seed_data ) + self.file_seed_data = CG.client_controller.network_engine.domain_manager.NormaliseURL( self.file_seed_data ) except HydrusExceptions.URLClassException: @@ -1711,7 +1707,7 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): if self.file_seed_type == FILE_SEED_TYPE_URL: - potentially_associable_urls.add( self.file_seed_data_for_comparison ) + potentially_associable_urls.add( self.file_seed_data ) domain = ClientNetworkingFunctions.ConvertURLIntoDomain( self.file_seed_data ) diff --git a/hydrus/client/importing/ClientImportGallerySeeds.py b/hydrus/client/importing/ClientImportGallerySeeds.py index 283365ed..284769ae 100644 --- a/hydrus/client/importing/ClientImportGallerySeeds.py +++ b/hydrus/client/importing/ClientImportGallerySeeds.py @@ -118,7 +118,7 @@ class GallerySeed( HydrusSerialisable.SerialisableBase ): try: - url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url, ephemeral_ok = True ) + url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url ) except HydrusExceptions.URLClassException: diff --git a/hydrus/client/importing/ClientImportWatchers.py b/hydrus/client/importing/ClientImportWatchers.py index 4c17603a..042ffc4a 100644 --- a/hydrus/client/importing/ClientImportWatchers.py +++ b/hydrus/client/importing/ClientImportWatchers.py @@ -243,7 +243,7 @@ class MultipleWatcherImport( HydrusSerialisable.SerialisableBase ): return None - url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url, ephemeral_ok = True ) + url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url ) with self._lock: @@ -1762,7 +1762,7 @@ class WatcherImport( HydrusSerialisable.SerialisableBase ): try: - url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url, ephemeral_ok = True ) + url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url ) except HydrusExceptions.URLClassException: diff --git a/hydrus/client/metadata/ClientTags.py b/hydrus/client/metadata/ClientTags.py index c50eaf55..c614a4c5 100644 --- a/hydrus/client/metadata/ClientTags.py +++ b/hydrus/client/metadata/ClientTags.py @@ -1,5 +1,4 @@ import collections -import re import typing from hydrus.core import HydrusSerialisable @@ -21,22 +20,6 @@ tag_display_str_lookup = { TAG_DISPLAY_DISPLAY_IDEAL : 'ideal display tags' } -emoji_pattern = re.compile("[" - u"\U0001F600-\U0001F64F" # emoticons - u"\U0001F300-\U0001F5FF" # symbols & pictographs - u"\U0001F680-\U0001F6FF" # transport & map symbols - u"\U0001F700-\U0001F77F" # alchemical symbols - u"\U0001F780-\U0001F7FF" # Geometric Shapes Extended - u"\U0001F800-\U0001F8FF" # Supplemental Arrows-C - u"\U0001F900-\U0001F9FF" # Supplemental Symbols and Pictographs - u"\U0001FA00-\U0001FA6F" # Chess Symbols - u"\U0001FA70-\U0001FAFF" # Symbols and Pictographs Extended-A - u"\U00002600-\U000026FF" # Miscellaneous Symbols - u"\U00002702-\U000027B0" # Dingbats - u"\U00003000-\U0000303F" # CJK Symbols and Punctuation - "]+(?:\U0000FE0F)?", # make the preding character a colourful emoji, decode this for an example: b'\xe2\x9b\x93\xef\xb8\x8f' -flags=re.UNICODE) - have_shown_invalid_tag_warning = False def RenderNamespaceForUser( namespace ): @@ -67,7 +50,7 @@ def RenderTag( tag, render_for_user: bool ): if namespace == '': - result = subtag + return subtag else: @@ -90,19 +73,9 @@ def RenderTag( tag, render_for_user: bool ): connector = ':' - result = namespace + connector + subtag + return namespace + connector + subtag - if render_for_user: - - if new_options.GetBoolean( 'replace_tag_emojis_with_boxes' ): - - result = emoji_pattern.sub( '□', result ) - - - - return result - class ServiceKeysToTags( HydrusSerialisable.SerialisableBase, collections.defaultdict ): diff --git a/hydrus/client/networking/ClientNetworkingDomain.py b/hydrus/client/networking/ClientNetworkingDomain.py index 9545bcea..7a9fe004 100644 --- a/hydrus/client/networking/ClientNetworkingDomain.py +++ b/hydrus/client/networking/ClientNetworkingDomain.py @@ -266,7 +266,7 @@ class NetworkDomainManager( HydrusSerialisable.SerialisableBase ): seen_url_classes.add( api_url_class ) - api_url = api_url_class.Normalise( api_url, ephemeral_ok = True ) + api_url = api_url_class.Normalise( api_url ) return ( api_url_class, api_url ) @@ -1336,15 +1336,13 @@ class NetworkDomainManager( HydrusSerialisable.SerialisableBase ): - def GetURLClassFromName( self, name: str ): + def GetURLClassFromName( self, name ): with self._lock: - name_search = name.casefold() - for url_class in self._url_classes: - if url_class.GetName().casefold() == name_search: + if url_class.GetName() == name: return url_class @@ -1510,7 +1508,7 @@ class NetworkDomainManager( HydrusSerialisable.SerialisableBase ): return True - def NormaliseURL( self, url, ephemeral_ok = False ): + def NormaliseURL( self, url ): with self._lock: @@ -1525,8 +1523,6 @@ class NetworkDomainManager( HydrusSerialisable.SerialisableBase ): path = p.path params = p.params - # this puts them all in alphabetical order - ( query_dict, single_value_parameters, param_order ) = ClientNetworkingFunctions.ConvertQueryTextToDict( p.query ) query = ClientNetworkingFunctions.ConvertQueryDictToText( query_dict, single_value_parameters ) @@ -1539,14 +1535,14 @@ class NetworkDomainManager( HydrusSerialisable.SerialisableBase ): else: - normalised_url = url_class.Normalise( url, ephemeral_ok = ephemeral_ok ) + normalised_url = url_class.Normalise( url ) return normalised_url - def NormaliseURLs( self, urls: typing.Collection[ str ], ephemeral_ok = False ) -> typing.List[ str ]: + def NormaliseURLs( self, urls: typing.Collection[ str ] ) -> typing.List[ str ]: normalised_urls = [] @@ -1554,14 +1550,14 @@ class NetworkDomainManager( HydrusSerialisable.SerialisableBase ): try: - normalised_url = self.NormaliseURL( url, ephemeral_ok = ephemeral_ok ) + normalised_url = self.NormaliseURL( url ) except HydrusExceptions.URLClassException: continue - normalised_urls.append( normalised_url ) + normalised_urls.append( url ) normalised_urls = HydrusData.DedupeList( normalised_urls ) diff --git a/hydrus/client/networking/ClientNetworkingFunctions.py b/hydrus/client/networking/ClientNetworkingFunctions.py index e056ec69..5440b75d 100644 --- a/hydrus/client/networking/ClientNetworkingFunctions.py +++ b/hydrus/client/networking/ClientNetworkingFunctions.py @@ -101,7 +101,6 @@ def ConvertHTTPToHTTPS( url ): raise Exception( 'Given a url that did not have a scheme!' ) - def ConvertQueryDictToText( query_dict, single_value_parameters, param_order = None ): # we now do everything with requests, which does all the unicode -> %20 business naturally, phew @@ -143,7 +142,7 @@ def ConvertQueryDictToText( query_dict, single_value_parameters, param_order = N if key in query_dict: - params.append( f'{key}={query_dict[ key ]}' ) + params.append( '{}={}'.format( key, query_dict[ key ] ) ) @@ -154,11 +153,16 @@ def ConvertQueryDictToText( query_dict, single_value_parameters, param_order = N def ConvertQueryTextToDict( query_text ): - # in the old version of this func, we played silly games with character encoding. I made the foolish decision to try to handle/save URLs with %20 stuff decoded - # this lead to complexity with odd situations like '6+girls+skirt', which would come here encoded as '6%2Bgirls+skirt' - # I flipped back and forth and tried to preserve the encoding if it did stepped on x or did not change y, what a mess! + # we generally do not want quote characters, %20 stuff, in our urls. we would prefer properly formatted unicode - # I no longer do this. I will encode if there is no '%' in there already, which catches cases of humans pasting/typing an URL with something human, but only if it is non-destructive + # so, let's replace all keys and values with unquoted versions + # -but- + # we only replace if it is a completely reversable operation! + # odd situations like '6+girls+skirt', which comes here encoded as '6%2Bgirls+skirt', shouldn't turn into '6+girls+skirt' + # so if there are a mix of encoded and non-encoded, we won't touch it here m8 + + # except these chars, which screw with GET arg syntax when unquoted + bad_chars = [ '&', '=', '/', '?', '#', ';', '+', ',' ] param_order = [] @@ -182,9 +186,23 @@ def ConvertQueryTextToDict( query_text ): continue - if '%' not in value: + try: - value = urllib.parse.quote( value, safe = '' ) + unquoted_value = urllib.parse.unquote( value ) + + if True not in ( bad_char in unquoted_value for bad_char in bad_chars ): + + requoted_value = urllib.parse.quote( unquoted_value ) + + if requoted_value == value: + + value = unquoted_value + + + + except: + + pass single_value_parameters.append( value ) @@ -194,14 +212,42 @@ def ConvertQueryTextToDict( query_text ): ( key, value ) = result - if '%' not in key: + try: - key = urllib.parse.quote( key, safe = '' ) + unquoted_key = urllib.parse.unquote( key ) + + if True not in ( bad_char in unquoted_key for bad_char in bad_chars ): + + requoted_key = urllib.parse.quote( unquoted_key ) + + if requoted_key == key: + + key = unquoted_key + + + + except: + + pass - if '%' not in value: + try: - value = urllib.parse.quote( value, safe = '' ) + unquoted_value = urllib.parse.unquote( value ) + + if True not in ( bad_char in unquoted_value for bad_char in bad_chars ): + + requoted_value = urllib.parse.quote( unquoted_value ) + + if requoted_value == value: + + value = unquoted_value + + + + except: + + pass param_order.append( key ) @@ -212,7 +258,6 @@ def ConvertQueryTextToDict( query_text ): return ( query_dict, single_value_parameters, param_order ) - def ConvertURLIntoDomain( url ): parser_result = ParseURL( url ) @@ -237,18 +282,6 @@ def ConvertURLIntoSecondLevelDomain( url ): return ConvertDomainIntoSecondLevelDomain( domain ) - -def ConvertURLToHumanString( url: str ) -> str: - - # ok so the idea here is that we want to store 'ugly' urls behind the scenes, with quoted %20 gubbins, but any time we present to the user, we want to convert all that to real (URL-invalid) characters - # although there are some caveats, we can pretty much just do a dequote on the whole string and it'll be fine most of the time mate - # if we have a unicode domain, we'll need to figure out 'punycode' decoding, but w/e for now - - pretty_url = urllib.parse.unquote( url ) - - return pretty_url - - def CookieDomainMatches( cookie, search_domain ): cookie_domain = cookie.domain @@ -291,10 +324,6 @@ def GetSearchURLs( url ): try: - ephemeral_normalised_url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url, ephemeral_ok = True ) - - search_urls.add( ephemeral_normalised_url ) - normalised_url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url ) search_urls.add( normalised_url ) diff --git a/hydrus/client/networking/ClientNetworkingGUG.py b/hydrus/client/networking/ClientNetworkingGUG.py index 8d7febf6..d0b72cd1 100644 --- a/hydrus/client/networking/ClientNetworkingGUG.py +++ b/hydrus/client/networking/ClientNetworkingGUG.py @@ -108,60 +108,46 @@ class GalleryURLGenerator( HydrusSerialisable.SerialisableBaseNamed ): raise HydrusExceptions.GUGException( 'Replacement phrase not in URL template!' ) - if '%' in query_text: - - # redundant test but leave it in for now - if ' ' in query_text or '% ' in query_text or query_text.endswith( '%' ): - - # there is probably a legit % character here that should be encoded - - search_terms = query_text.split( ' ' ) - - we_think_query_text_is_pre_encoded = False - - elif '%20' in query_text: - - # we are generally confident the user pasted a multi-tag query they copied from a notepad or something - - search_terms = query_text.split( '%20' ) - - # any % character entered here should be encoded as '%25' - we_think_query_text_is_pre_encoded = True - - else: - - # we simply do not know in this case. this is a single tag with a % not at the end, but it could be male%2Ffemale or it could be "120%120%hello", the hit new anime series - # assuming it is the former more often than the latter, we will not intrude on what the user sent here and cross our fingers - - search_terms = [ query_text ] - - we_think_query_text_is_pre_encoded = True - - - else: - - search_terms = query_text.split( ' ' ) - - # normal, not pre-encoded text - we_think_query_text_is_pre_encoded = False - + ( first_part, second_part ) = self._url_template.split( self._replacement_phrase, 1 ) - if not we_think_query_text_is_pre_encoded: + search_phrase_seems_to_go_in_path = '?' not in first_part + + search_terms = query_text.split( ' ' ) + + # if a user enters "%20" in a query, or any other percent-encoded char, we turn it into human here, lest it be re-quoted in a moment + # if a user enters "%25", i.e. "%", followed by some characters, then all bets are off + search_terms = [ urllib.parse.unquote( search_term ) for search_term in search_terms ] + + if search_phrase_seems_to_go_in_path: + + # encode all this gubbins since requests won't be able to do it + # this basically fixes e621 searches for 'male/female', which through some httpconf trickery are embedded in path but end up in a query, so need to be encoded right beforehand encoded_search_terms = [ urllib.parse.quote( search_term, safe = '' ) for search_term in search_terms ] else: - encoded_search_terms = search_terms + encoded_search_terms = [] + + for search_term in search_terms: + + # when the tags separator is '+' but the tags include '6+girls', we run into fun internet land + + bad_chars = [ self._search_terms_separator, '&', '=', '/', '?', '#', ';' ] + + if True in ( bad_char in search_term for bad_char in bad_chars ): + + search_term = urllib.parse.quote( search_term, safe = '' ) + + + encoded_search_terms.append( search_term ) + try: search_phrase = self._search_terms_separator.join( encoded_search_terms ) - # we do not encode the whole thing here since we may want to keep tag-connector-+ for the '6+girls+skirt' = '6%2Bgirls+skirt' scenario - # some characters are optional or something when it comes to encoding. '+' is one of these - gallery_url = self._url_template.replace( self._replacement_phrase, search_phrase ) except Exception as e: diff --git a/hydrus/client/networking/ClientNetworkingURLClass.py b/hydrus/client/networking/ClientNetworkingURLClass.py index 57dfcb84..988d66f4 100644 --- a/hydrus/client/networking/ClientNetworkingURLClass.py +++ b/hydrus/client/networking/ClientNetworkingURLClass.py @@ -69,101 +69,11 @@ def SortURLClassesListDescendingComplexity( url_classes: typing.List[ "URLClass" # ( num_path_components, num_required_parameters, num_total_parameters, len_example_url ) url_classes.sort( key = lambda u_c: u_c.GetSortingComplexityKey(), reverse = True ) - -class URLClassParameterFixedName( HydrusSerialisable.SerialisableBase ): - - SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_URL_CLASS_PARAMETER_FIXED_NAME - SERIALISABLE_NAME = 'URL Class Parameter - Fixed Name' - SERIALISABLE_VERSION = 1 - - def __init__( self, name = None, value_string_match = None, default_value = None ): - - if name is None: - - name = 'name' - - - if value_string_match is None: - - value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'value', example_string = 'value' ) - - - HydrusSerialisable.SerialisableBase.__init__( self ) - - self._name = name - self._value_string_match = value_string_match - self._default_value = default_value - - - def __repr__( self ): - - text = f'URL Class Parameter - Fixed Name: {self._name}: {self._value_string_match.ToString()}' - - return text - - - def _GetSerialisableInfo( self ): - - serialisable_value_string_match = self._value_string_match.GetSerialisableTuple() - - return ( self._name, serialisable_value_string_match, self._default_value ) - - - def _InitialiseFromSerialisableInfo( self, serialisable_info ): - - ( self._name, serialisable_value_string_match, self._default_value ) = serialisable_info - - self._value_string_match = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_value_string_match ) - - - def GetDefaultValue( self ): - - return self._default_value - - - def GetName( self ): - - return self._name - - - def GetValueStringMatch( self ): - - return self._value_string_match - - - def IsEphemeralToken( self ): - - return False - - - def MustBeInOriginalURL( self ): - - return self._default_value is None - - - def MatchesName( self, name ): - - return self._name == name - - - def MatchesValue( self, value ): - - return self._value_string_match.Matches( value ) - - - def TestValue( self, value ): - - self._value_string_match.Test( value ) - - - -HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_URL_CLASS_PARAMETER_FIXED_NAME ] = URLClassParameterFixedName - class URLClass( HydrusSerialisable.SerialisableBaseNamed ): SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_URL_CLASS SERIALISABLE_NAME = 'URL Class' - SERIALISABLE_VERSION = 13 + SERIALISABLE_VERSION = 12 def __init__( self, @@ -206,19 +116,10 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): if parameters is None: - parameters = [] + parameters = {} - p = URLClassParameterFixedName( - name = 's', - value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ) - ) - - parameters.append( p ) - - p = URLClassParameterFixedName( - name = 'id', - value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '123456' ) - ) + parameters[ 's' ] = ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ), None ) + parameters[ 'id' ] = ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '123456' ), None ) if single_value_parameters_string_match is None: @@ -244,7 +145,7 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): # if the args are not serialisable stuff, lets overwrite here path_components = HydrusSerialisable.SerialisableList( path_components ) - parameters = HydrusSerialisable.SerialisableList( parameters ) + parameters = HydrusSerialisable.SerialisableDictionary( parameters ) HydrusSerialisable.SerialisableBaseNamed.__init__( self, name ) @@ -347,98 +248,31 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): return path - def _ClipAndFleshOutQuery( self, query: str, ephemeral_ok: bool, allow_clip: bool = True ): + def _ClipAndFleshOutQuery( self, query, allow_clip = True ): ( query_dict, single_value_parameters, param_order ) = ClientNetworkingFunctions.ConvertQueryTextToDict( query ) - query_dict_keys_to_parameters = {} + if allow_clip: + + query_dict = { key : value for ( key, value ) in query_dict.items() if key in self._parameters } + - remaining_query_dict_names = set( query_dict.keys() ) - - # if we were feeling clever, we could sort these guys from most specific name to least, but w/e - for parameter in self._parameters: + for ( key, ( string_match, default ) ) in self._parameters.items(): - match_found = False - - for name in remaining_query_dict_names: + if key not in query_dict: - if parameter.MatchesName( name ): + if default is None: - query_dict_keys_to_parameters[ name ] = parameter - - remaining_query_dict_names.discard( name ) - - match_found = True - - break - - - - if not match_found: - - default_value = parameter.GetDefaultValue() - - if default_value is None: - - if not parameter.IsEphemeralToken(): - - raise HydrusExceptions.URLClassException( f'Could not flesh out query--no default for {name} defined!' ) - + raise HydrusExceptions.URLClassException( 'Could not flesh out query--no default for ' + key + ' defined!' ) else: - if isinstance( parameter, URLClassParameterFixedName ): - - name = parameter.GetName() - - query_dict_keys_to_parameters[ name ] = parameter - - query_dict[ name ] = default_value - - param_order.append( name ) - - else: - - raise HydrusExceptions.URLClassException( f'Could not flesh out query--cannot figure out a fixed name for {parameter}!' ) - + query_dict[ key ] = default - - - - - for name in remaining_query_dict_names: - - query_dict_keys_to_parameters[ name ] = None - - - # ok, we now have our fully fleshed out query_dict. let's filter it - - filtered_query_dict = {} - - for ( name, possible_parameter ) in query_dict_keys_to_parameters.items(): - - if possible_parameter is None: - - if allow_clip: - - # no matching param, discard it - continue - - - else: - - if possible_parameter.IsEphemeralToken() and not ephemeral_ok: - - continue + param_order.append( key ) - filtered_query_dict[ name ] = query_dict[ name ] - - - query_dict = filtered_query_dict - - # if self._alphabetise_get_parameters: @@ -459,7 +293,7 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): serialisable_url_class_key = self._url_class_key.hex() serialisable_path_components = [ ( string_match.GetSerialisableTuple(), default ) for ( string_match, default ) in self._path_components ] - serialisable_parameters = self._parameters.GetSerialisableTuple() + serialisable_parameters = [ ( key, ( string_match.GetSerialisableTuple(), default ) ) for ( key, ( string_match, default ) ) in self._parameters.items() ] serialisable_single_value_parameters_string_match = self._single_value_parameters_string_match.GetSerialisableTuple() serialisable_header_overrides = list( self._header_overrides.items() ) serialisable_api_lookup_converter = self._api_lookup_converter.GetSerialisableTuple() @@ -514,7 +348,7 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): self._url_class_key = bytes.fromhex( serialisable_url_class_key ) self._path_components = [ ( HydrusSerialisable.CreateFromSerialisableTuple( serialisable_string_match ), default ) for ( serialisable_string_match, default ) in serialisable_path_components ] - self._parameters = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_parameters ) + self._parameters = { key : ( HydrusSerialisable.CreateFromSerialisableTuple( serialisable_string_match ), default ) for ( key, ( serialisable_string_match, default ) ) in serialisable_parameters } self._single_value_parameters_string_match = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_single_value_parameters_string_match ) self._header_overrides = dict( serialisable_header_overrides ) self._api_lookup_converter = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_api_lookup_converter ) @@ -735,68 +569,6 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): return ( 12, new_serialisable_info ) - if version == 12: - - ( - serialisable_url_class_key, - url_type, - preferred_scheme, - netloc, - booleans, - serialisable_path_components, - serialisable_parameters, - has_single_value_parameters, - serialisable_single_value_parameters_match, - serialisable_header_overrides, - serialisable_api_lookup_converter, - send_referral_url, - serialisable_referrel_url_converter, - gallery_index_type, - gallery_index_identifier, - gallery_index_delta, - example_url - ) = old_serialisable_info - - new_parameters = HydrusSerialisable.SerialisableList() - - for ( name, ( serialisable_value_string_match, default_value ) ) in serialisable_parameters: - - value_string_match = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_value_string_match ) - - parameter = URLClassParameterFixedName( - name = name, - value_string_match = value_string_match, - default_value = default_value - ) - - new_parameters.append( parameter ) - - - serialisable_parameters = new_parameters.GetSerialisableTuple() - - new_serialisable_info = ( - serialisable_url_class_key, - url_type, - preferred_scheme, - netloc, - booleans, - serialisable_path_components, - serialisable_parameters, - has_single_value_parameters, - serialisable_single_value_parameters_match, - serialisable_header_overrides, - serialisable_api_lookup_converter, - send_referral_url, - serialisable_referrel_url_converter, - gallery_index_type, - gallery_index_identifier, - gallery_index_delta, - example_url - ) - - return ( 13, new_serialisable_info ) - - def AlphabetiseGetParameters( self ): @@ -830,11 +602,6 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): return self._should_be_associated_with_files or self.UsesAPIURL() - def GetAPILookupConverter( self ): - - return self._api_lookup_converter - - def GetAPIURL( self, url = None ): if url is None: @@ -842,7 +609,7 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): url = self._example_url - url = self.Normalise( url, ephemeral_ok = True ) + url = self.Normalise( url ) return self._api_lookup_converter.Convert( url ) @@ -872,14 +639,9 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): return self._header_overrides - def GetNetloc( self ): - - return self._netloc - - def GetNextGalleryPage( self, url ): - url = self.Normalise( url, ephemeral_ok = True ) + url = self.Normalise( url ) p = ClientNetworkingFunctions.ParseURL( url ) @@ -969,21 +731,6 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): return r.geturl() - def GetParameters( self ) -> typing.List[ URLClassParameterFixedName ]: - - return self._parameters - - - def GetPathComponents( self ): - - return self._path_components - - - def GetPreferredScheme( self ): - - return self._preferred_scheme - - def GetReferralURL( self, url, referral_url ): if self._send_referral_url == SEND_REFERRAL_URL_ONLY_IF_PROVIDED: @@ -1021,11 +768,6 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): return referral_url - def GetReferralURLInfo( self ): - - return ( self._send_referral_url, self._referral_url_converter ) - - def GetSafeSummary( self ): return 'URL Class "' + self._name + '" - ' + ClientNetworkingFunctions.ConvertURLIntoDomain( self.GetExampleURL() ) @@ -1047,9 +789,9 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): num_required_path_components = len( [ 1 for ( string_match, default ) in self._path_components if default is None ] ) num_total_path_components = len( self._path_components ) - num_required_parameters = len( [ 1 for parameter in self._parameters if parameter.GetDefaultValue() is None ] ) + num_required_parameters = len( [ 1 for ( key, ( string_match, default ) ) in self._parameters.items() if default is None ] ) num_total_parameters = len( self._parameters ) - len_example_url = len( self.Normalise( self._example_url, ephemeral_ok = True ) ) + len_example_url = len( self.Normalise( self._example_url ) ) return ( num_required_path_components, num_total_path_components, num_required_parameters, num_total_parameters, len_example_url ) @@ -1103,7 +845,7 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): return self._match_subdomains - def Normalise( self, url, ephemeral_ok = False ): + def Normalise( self, url ): p = ClientNetworkingFunctions.ParseURL( url ) @@ -1123,13 +865,13 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): netloc = self._ClipNetLoc( p.netloc ) path = self._ClipAndFleshOutPath( p.path ) - query = self._ClipAndFleshOutQuery( p.query, ephemeral_ok ) + query = self._ClipAndFleshOutQuery( p.query ) else: netloc = p.netloc path = self._ClipAndFleshOutPath( p.path, allow_clip = False ) - query = self._ClipAndFleshOutQuery( p.query, ephemeral_ok, allow_clip = False ) + query = self._ClipAndFleshOutQuery( p.query, allow_clip = False ) r = urllib.parse.ParseResult( scheme, netloc, path, params, query, fragment ) @@ -1243,12 +985,9 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): url_path_components = url_path.split( '/' ) - if self._no_more_path_components_than_this: + if len( url_path_components ) > len( self._path_components ) and self._no_more_path_components_than_this: - if len( url_path_components ) > len( self._path_components ): - - raise HydrusExceptions.URLClassException( '"{}" has {} path components, but I will not allow more than my defined {}!'.format( url_path, len( url_path_components ), len( self._path_components ) ) ) - + raise HydrusExceptions.URLClassException( '"{}" has {} path components, but I will not allow more than my defined {}!'.format( url_path, len( url_path_components ), len( self._path_components ) ) ) for ( index, ( string_match, default ) ) in enumerate( self._path_components ): @@ -1281,55 +1020,42 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): - ( url_query_dict, single_value_parameters, param_order ) = ClientNetworkingFunctions.ConvertQueryTextToDict( p.query ) + ( url_parameters, single_value_parameters, param_order ) = ClientNetworkingFunctions.ConvertQueryTextToDict( p.query ) - if self._no_more_parameters_than_this: + if len( url_parameters ) > len( self._parameters ) and self._no_more_parameters_than_this: - good_fixed_names = { parameter.GetName() for parameter in self._parameters if isinstance( parameter, URLClassParameterFixedName ) } - - for ( name, value ) in url_query_dict.items(): - - if name not in good_fixed_names: - - raise HydrusExceptions.URLClassException( f'"This has a "{name}" parameter, but I am set to not allow any unexpected parameters!' ) - - + raise HydrusExceptions.URLClassException( '"{}" has {} parameters, but I will not allow more than my defined {}!'.format( url_path, len( url_parameters ), len( self._parameters ) ) ) - for parameter in self._parameters: + for ( key, ( string_match, default ) ) in self._parameters.items(): - if isinstance( parameter, URLClassParameterFixedName ): + if key not in url_parameters: - name = parameter.GetName() - - if name not in url_query_dict: + if default is None: - if parameter.MustBeInOriginalURL(): - - raise HydrusExceptions.URLClassException( f'{name} not found in {p.query}' ) - - else: - - continue - + raise HydrusExceptions.URLClassException( key + ' not found in ' + p.query ) + + else: + + continue - value = url_query_dict[ name ] + + value = url_parameters[ key ] + + try: - try: - - parameter.TestValue( value ) - - except HydrusExceptions.StringMatchException as e: - - raise HydrusExceptions.URLClassException( f'Problem with {name}: ' + str( e ) ) - + string_match.Test( value ) + + except HydrusExceptions.StringMatchException as e: + + raise HydrusExceptions.URLClassException( str( e ) ) if len( single_value_parameters ) > 0 and not self._has_single_value_parameters and self._no_more_parameters_than_this: - raise HydrusExceptions.URLClassException( '"{}" has unexpected single-value parameters, but I am set to not allow any unexpected parameters!'.format( url_path ) ) + raise HydrusExceptions.URLClassException( '"{}" has unexpected single-value parameters, but I am set not to allow any unexpected parameters!'.format( url_path ) ) if self._has_single_value_parameters: @@ -1353,6 +1079,11 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): + def ToTuple( self ): + + return ( self._url_type, self._preferred_scheme, self._netloc, self._path_components, self._parameters, self._api_lookup_converter, self._send_referral_url, self._referral_url_converter, self._example_url ) + + def UsesAPIURL( self ): return self._api_lookup_converter.MakesChanges() diff --git a/hydrus/client/search/ClientSearchParseSystemPredicates.py b/hydrus/client/search/ClientSearchParseSystemPredicates.py index a4ae5931..936334f8 100644 --- a/hydrus/client/search/ClientSearchParseSystemPredicates.py +++ b/hydrus/client/search/ClientSearchParseSystemPredicates.py @@ -186,7 +186,7 @@ def strip_quotes( s: str ) -> str: def url_class_pred_generator( include, url_class_name ): - description = f'has url with class {url_class_name}' if include else f'does not have url with class {url_class_name}' + description = ( 'has {} url' if include else 'does not have {} url' ).format( url_class_name ) try: @@ -251,12 +251,12 @@ pred_generators = { SystemPredicateParser.Predicate.MEDIA_VIEWTIME : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_FILE_VIEWING_STATS, ( 'viewtime', ( 'media', ), o, convert_timetuple_to_seconds( v ) ) ), SystemPredicateParser.Predicate.PREVIEW_VIEWTIME : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_FILE_VIEWING_STATS, ( 'viewtime', ( 'preview', ), o, convert_timetuple_to_seconds( v ) ) ), SystemPredicateParser.Predicate.ALL_VIEWTIME : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_FILE_VIEWING_STATS, ( 'viewtime', ( 'media', 'preview' ), o, convert_timetuple_to_seconds( v ) ) ), - SystemPredicateParser.Predicate.URL_REGEX : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( True, 'regex', v, 'has url matching regex {}'.format( v ) ) ), - SystemPredicateParser.Predicate.NO_URL_REGEX : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( False, 'regex', v, 'does not have url matching regex {}'.format( v ) ) ), - SystemPredicateParser.Predicate.URL : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( True, 'exact_match', v, 'has url {}'.format( v ) ) ), - SystemPredicateParser.Predicate.NO_URL : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( False, 'exact_match', v, 'does not have url {}'.format( v ) ) ), - SystemPredicateParser.Predicate.DOMAIN : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( True, 'domain', v, 'has url with domain {}'.format( v ) ) ), - SystemPredicateParser.Predicate.NO_DOMAIN : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( False, 'domain', v, 'does not have url with domain {}'.format( v ) ) ), + SystemPredicateParser.Predicate.URL_REGEX : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( True, 'regex', v, 'has a url matching regex: {}'.format( v ) ) ), + SystemPredicateParser.Predicate.NO_URL_REGEX : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( False, 'regex', v, 'does not have a url matching regex: {}'.format( v ) ) ), + SystemPredicateParser.Predicate.URL : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( True, 'exact_match', v, 'has url: {}'.format( v ) ) ), + SystemPredicateParser.Predicate.NO_URL : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( False, 'exact_match', v, 'does not have url: {}'.format( v ) ) ), + SystemPredicateParser.Predicate.DOMAIN : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( True, 'domain', v, 'has a url with domain: {}'.format( v ) ) ), + SystemPredicateParser.Predicate.NO_DOMAIN : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( False, 'domain', v, 'does not have a url with domain: {}'.format( v ) ) ), SystemPredicateParser.Predicate.URL_CLASS : lambda o, v, u: url_class_pred_generator( True, v ), SystemPredicateParser.Predicate.NO_URL_CLASS : lambda o, v, u: url_class_pred_generator( False, v ), SystemPredicateParser.Predicate.MOD_DATE : lambda o, v, u: date_pred_generator( ClientSearch.PREDICATE_TYPE_SYSTEM_MODIFIED_TIME, o, v ), diff --git a/hydrus/core/HydrusConstants.py b/hydrus/core/HydrusConstants.py index 6f00f5b2..63a419b9 100644 --- a/hydrus/core/HydrusConstants.py +++ b/hydrus/core/HydrusConstants.py @@ -105,7 +105,7 @@ options = {} # Misc NETWORK_VERSION = 20 -SOFTWARE_VERSION = 567 +SOFTWARE_VERSION = 566 CLIENT_API_VERSION = 62 SERVER_THUMBNAIL_DIMENSIONS = ( 200, 200 ) diff --git a/hydrus/core/HydrusSerialisable.py b/hydrus/core/HydrusSerialisable.py index b581862b..41ef0a02 100644 --- a/hydrus/core/HydrusSerialisable.py +++ b/hydrus/core/HydrusSerialisable.py @@ -141,7 +141,6 @@ SERIALISABLE_TYPE_METADATA_SINGLE_FILE_IMPORTER_MEDIA_TIMESTAMPS = 123 SERIALISABLE_TYPE_PETITION_HEADER = 124 SERIALISABLE_TYPE_STRING_JOINER = 125 SERIALISABLE_TYPE_FILE_FILTER = 126 -SERIALISABLE_TYPE_URL_CLASS_PARAMETER_FIXED_NAME = 127 SERIALISABLE_TYPES_TO_OBJECT_TYPES = {} diff --git a/hydrus/core/files/HydrusFileHandling.py b/hydrus/core/files/HydrusFileHandling.py index 889e4ada..bdcedc26 100644 --- a/hydrus/core/files/HydrusFileHandling.py +++ b/hydrus/core/files/HydrusFileHandling.py @@ -263,16 +263,11 @@ def GenerateThumbnailNumPy( path, target_resolution, mime, duration, num_frames, thumbnail_numpy = HydrusOfficeOpenXMLHandling.GenerateThumbnailNumPyFromOfficePath( path, target_resolution ) - except HydrusExceptions.NoThumbnailFileException: - - thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution) - except Exception as e: PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description ) thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution) - elif mime == HC.APPLICATION_FLASH: @@ -552,11 +547,12 @@ def GetFileInfo( path, mime = None, ok_to_look_for_hydrus_updates = False ): pass + elif mime == HC.APPLICATION_DOCX: try: - num_words = HydrusOfficeOpenXMLHandling.GetDOCXInfo( path ) + ( num_words ) = HydrusOfficeOpenXMLHandling.GetDOCXInfo( path ) except HydrusExceptions.LimitedSupportFileException: diff --git a/hydrus/core/files/HydrusOfficeOpenXMLHandling.py b/hydrus/core/files/HydrusOfficeOpenXMLHandling.py index a0275653..88c997ea 100644 --- a/hydrus/core/files/HydrusOfficeOpenXMLHandling.py +++ b/hydrus/core/files/HydrusOfficeOpenXMLHandling.py @@ -1,7 +1,6 @@ import typing from hydrus.core import HydrusConstants as HC -from hydrus.core import HydrusExceptions from hydrus.core.files.HydrusArchiveHandling import GetZipAsPath from hydrus.core.files.images import HydrusImageHandling @@ -9,6 +8,7 @@ import xml.etree.ElementTree as ET from PIL import Image as PILImage + DOCX_XPATH = ".//{*}Override[@PartName='/word/document.xml'][@ContentType='application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml']" XLSX_XPATH = ".//{*}Override[@PartName='/xl/workbook.xml'][@ContentType='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml']" PPTX_XPATH = ".//{*}Override[@PartName='/ppt/presentation.xml'][@ContentType='application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml']" @@ -35,26 +35,17 @@ def MimeFromMicrosoftOpenXMLDocument(path: str): else: - return None - + return None except: return None - def GenerateThumbnailNumPyFromOfficePath( path: str, target_resolution: typing.Tuple[ int, int ] ) -> bytes: - try: + zip_path_file_obj = GetZipAsPath( path, 'docProps/thumbnail.jpeg' ).open( 'rb' ) - zip_path_file_obj = GetZipAsPath( path, 'docProps/thumbnail.jpeg' ).open( 'rb' ) - - except FileNotFoundError: - - raise HydrusExceptions.NoThumbnailFileException( 'No thumbnail.jpeg file!' ) - - pil_image = HydrusImageHandling.GeneratePILImage( zip_path_file_obj ) thumbnail_pil_image = pil_image.resize( target_resolution, PILImage.LANCZOS ) @@ -73,7 +64,7 @@ PPTX_ASSUMED_DPI = 300 PPTX_PIXEL_PER_EMU = PPTX_ASSUMED_DPI / 914400 def PowerPointResolution( path: str ): - + file = GetZipAsPath( path, 'ppt/presentation.xml' ).open( 'rb' ) root = ET.parse( file ) @@ -135,6 +126,5 @@ def GetDOCXInfo( path:str ): num_words = None - - return num_words + return ( num_words ) diff --git a/hydrus/core/networking/HydrusNetworkVariableHandling.py b/hydrus/core/networking/HydrusNetworkVariableHandling.py index 723acdde..d3081f64 100644 --- a/hydrus/core/networking/HydrusNetworkVariableHandling.py +++ b/hydrus/core/networking/HydrusNetworkVariableHandling.py @@ -381,7 +381,6 @@ def ParseTwistedRequestGETArgs( requests_args: dict, int_params, byte_params, st else: args[ name ] = json.loads( urllib.parse.unquote( value ) ) - except Exception as e: @@ -399,7 +398,6 @@ def ParseTwistedRequestGETArgs( requests_args: dict, int_params, byte_params, st else: list_of_hex_strings = json.loads( urllib.parse.unquote( value ) ) - args[ name ] = [ bytes.fromhex( hex_string ) for hex_string in list_of_hex_strings ] diff --git a/hydrus/external/SystemPredicateParser.py b/hydrus/external/SystemPredicateParser.py index 75c52b1c..fc4f238a 100644 --- a/hydrus/external/SystemPredicateParser.py +++ b/hydrus/external/SystemPredicateParser.py @@ -265,12 +265,12 @@ SYSTEM_PREDICATES = { 'all viewtime': (Predicate.ALL_VIEWTIME, Operators.RELATIONAL, Value.TIME_INTERVAL, None), 'has (a )?url matching regex': (Predicate.URL_REGEX, None, Value.ANY_STRING, None), '(does not|doesn\'t) have (a )?url matching regex': (Predicate.NO_URL_REGEX, None, Value.ANY_STRING, None), - 'has url:? (?=http)': (Predicate.URL, None, Value.ANY_STRING, None), - '(does not|doesn\'t) have url:? (?=http)': (Predicate.NO_URL, None, Value.ANY_STRING, None), - 'has (an? )?(url with )?domain': (Predicate.DOMAIN, None, Value.ANY_STRING, None), - '(does not|doesn\'t) have (an? )?(url with )?domain': (Predicate.NO_DOMAIN, None, Value.ANY_STRING, None), - 'has (an? )?url with (url )?class': (Predicate.URL_CLASS, None, Value.ANY_STRING, None), - '(does not|doesn\'t) have (an? )?url with (url )?class': (Predicate.NO_URL_CLASS, None, Value.ANY_STRING, None), + 'has url': (Predicate.URL, None, Value.ANY_STRING, None), + '(does not|doesn\'t) have url': (Predicate.NO_URL, None, Value.ANY_STRING, None), + 'has (a )?(url with )?domain': (Predicate.DOMAIN, None, Value.ANY_STRING, None), + '(does not|doesn\'t) have (a )?(url with )?domain': (Predicate.NO_DOMAIN, None, Value.ANY_STRING, None), + 'has (a )?url with (url )?class': (Predicate.URL_CLASS, None, Value.ANY_STRING, None), + '(does not|doesn\'t) have (a )?url with (url )?class': (Predicate.NO_URL_CLASS, None, Value.ANY_STRING, None), 'tag as number': (Predicate.TAG_AS_NUMBER, Operators.TAG_RELATIONAL, Value.INTEGER, None), 'has notes?$': (Predicate.HAS_NOTES, None, None, None), '((has )?no|does not have( a)?|doesn\'t have) notes?$': (Predicate.NO_NOTES, None, None, None), @@ -300,13 +300,7 @@ def parse_system_predicate( string: str ): # TODO: (hydev): rework this thing into passing around a 'parse result object' that the operator parser can set a value for and say 'yeah value is sorted' for things like 'has words' = '> 0' in one swoop - string = string.strip() - - if 'url' not in string: # hack for system:url has regex (blah) and matching url in general - - string = string.lower() - - + string = string.lower().strip() string = string.replace( '_', ' ' ) if string.startswith( "-" ): raise ValueError( "System predicate can't start with negation" ) diff --git a/hydrus/test/TestClientNetworking.py b/hydrus/test/TestClientNetworking.py index 9361eda9..ec64fb35 100644 --- a/hydrus/test/TestClientNetworking.py +++ b/hydrus/test/TestClientNetworking.py @@ -228,8 +228,6 @@ class TestNetworkingDomain( unittest.TestCase ): def test_url_classes( self ): - # TODO: Yo, these all suck and should be broken into separate spammy tests with more appropriate example urls and all that! - name = 'test' url_type = HC.URL_TYPE_POST preferred_scheme = 'https' @@ -247,10 +245,10 @@ class TestNetworkingDomain( unittest.TestCase ): path_components.append( ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'post', example_string = 'post' ), None ) ) path_components.append( ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'page.php', example_string = 'page.php' ), None ) ) - parameters = [] + parameters = {} - parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 's', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ) ) ) - parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 'id', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '123456' ) ) ) + parameters[ 's' ] = ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ), None ) + parameters[ 'id' ] = ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '123456' ), None ) send_referral_url = ClientNetworkingURLClass.SEND_REFERRAL_URL_ONLY_IF_PROVIDED referral_url_converter = None @@ -280,57 +278,6 @@ class TestNetworkingDomain( unittest.TestCase ): self.assertEqual( url_class.GetReferralURL( good_url, referral_url ), referral_url ) self.assertEqual( url_class.GetReferralURL( good_url, None ), None ) - # encoding test - - parameters = [] - - parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 's', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ) ) ) - parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 'id', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_ANY, example_string = 'hello' ) ) ) - - url_class = ClientNetworkingURLClass.URLClass( name, url_type = url_type, preferred_scheme = preferred_scheme, netloc = netloc, path_components = path_components, parameters = parameters, send_referral_url = send_referral_url, referral_url_converter = referral_url_converter, gallery_index_type = gallery_index_type, gallery_index_identifier = gallery_index_identifier, gallery_index_delta = gallery_index_delta, example_url = example_url ) - - url_class.SetURLBooleans( match_subdomains, keep_matched_subdomains, alphabetise_get_parameters, can_produce_multiple_files, should_be_associated_with_files, keep_fragment ) - - unnormalised_human_url = 'https://testbooru.cx/post/page.php?id=1234 56&s=view' - normalised_encoded_url = 'https://testbooru.cx/post/page.php?id=1234%2056&s=view' - - self.assertEqual( url_class.Normalise( unnormalised_human_url ), normalised_encoded_url ) - self.assertEqual( url_class.Normalise( normalised_encoded_url ), normalised_encoded_url ) - - parameters = [] - - parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 's', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ) ) ) - parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 'id', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '123456' ) ) ) - - # default test - - parameters = [] - - parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 's', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ) ) ) - parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 'id', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '123456' ) ) ) - parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 'pid', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '0' ), default_value = '0' ) ) - - url_class = ClientNetworkingURLClass.URLClass( name, url_type = url_type, preferred_scheme = preferred_scheme, netloc = netloc, path_components = path_components, parameters = parameters, send_referral_url = send_referral_url, referral_url_converter = referral_url_converter, gallery_index_type = gallery_index_type, gallery_index_identifier = gallery_index_identifier, gallery_index_delta = gallery_index_delta, example_url = example_url ) - - url_class.SetURLBooleans( match_subdomains, keep_matched_subdomains, alphabetise_get_parameters, can_produce_multiple_files, should_be_associated_with_files, keep_fragment ) - - unnormalised_without_pid = 'https://testbooru.cx/post/page.php?id=123456&s=view' - unnormalised_with_pid = 'https://testbooru.cx/post/page.php?id=123456&pid=3&s=view' - normalised_with_pid = 'https://testbooru.cx/post/page.php?id=123456&pid=0&s=view' - - self.assertEqual( url_class.Normalise( unnormalised_without_pid ), normalised_with_pid ) - self.assertEqual( url_class.Normalise( normalised_with_pid ), normalised_with_pid ) - self.assertEqual( url_class.Normalise( unnormalised_with_pid ), unnormalised_with_pid ) - - self.assertTrue( url_class.Matches( unnormalised_without_pid ) ) - self.assertTrue( url_class.Matches( unnormalised_with_pid ) ) - self.assertTrue( url_class.Matches( good_url ) ) - - parameters = [] - - parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 's', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ) ) ) - parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 'id', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '123456' ) ) ) - # alphabetise_get_parameters = False @@ -402,7 +349,7 @@ class TestNetworkingDomain( unittest.TestCase ): path_components.append( ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'file', example_string = 'file' ), None ) ) path_components.append( ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_ANY ), None ) ) - parameters = [] + parameters = {} send_referral_url = ClientNetworkingURLClass.SEND_REFERRAL_URL_ONLY_IF_PROVIDED referral_url_converter = None @@ -454,10 +401,10 @@ class TestNetworkingDomain( unittest.TestCase ): path_components.append( ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'post', example_string = 'post' ), None ) ) path_components.append( ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'page.php', example_string = 'page.php' ), None ) ) - parameters = [] + parameters = {} - parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 's', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ) ) ) - parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 'id', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '123456' ) ) ) + parameters[ 's' ] = ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ), None ) + parameters[ 'id' ] = ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '123456' ), None ) has_single_value_parameters = True single_value_parameters_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_REGEX, match_value = '^token.*', example_string = 'token1' ) diff --git a/hydrus/test/TestClientTags.py b/hydrus/test/TestClientTags.py index 71151ad4..1d2d545c 100644 --- a/hydrus/test/TestClientTags.py +++ b/hydrus/test/TestClientTags.py @@ -2167,14 +2167,14 @@ class TestTagObjects( unittest.TestCase ): ( 'system:media viewtime < 1 day 1 hour', "system:media viewtime < 1 days 1 hour 0 minutes" ), ( 'system:all viewtime > 1 hour 1 minute', "system:all viewtime > 1 hours 100 seconds" ), ( f'system:preview viewtime {HC.UNICODE_APPROX_EQUAL} 2 days 7 hours', "system:preview viewtime ~= 1 day 30 hours 100 minutes 90s" ), - ( 'system:has url matching regex index\\.php', " system:has url matching regex index\\.php" ), - ( 'system:does not have url matching regex index\\.php', "system:does not have a url matching regex index\\.php" ), - ( 'system:has url https://safebooru.donmai.us/posts/4695284', "system:has_url https://safebooru.donmai.us/posts/4695284" ), - ( 'system:does not have url https://safebooru.donmai.us/posts/4695284', " system:doesn't have url https://safebooru.donmai.us/posts/4695284 " ), - ( 'system:has url with domain safebooru.com', "system:has domain safebooru.com" ), - ( 'system:does not have url with domain safebooru.com', "system:doesn't have domain safebooru.com" ), - ( 'system:has url with class safebooru file page', "system:has url with class safebooru file page" ), - ( 'system:does not have url with class safebooru file page', "system:doesn't have a url with url class safebooru file page " ), + ( 'system:has a url matching regex: index\\.php', " system:has url matching regex index\\.php" ), + ( 'system:does not have a url matching regex: index\\.php', "system:does not have a url matching regex index\\.php" ), + ( 'system:has url: https://safebooru.donmai.us/posts/4695284', "system:has_url https://safebooru.donmai.us/posts/4695284" ), + ( 'system:does not have url: https://safebooru.donmai.us/posts/4695284', " system:doesn't have url https://safebooru.donmai.us/posts/4695284 " ), + ( 'system:has a url with domain: safebooru.com', "system:has domain safebooru.com" ), + ( 'system:does not have a url with domain: safebooru.com', "system:doesn't have domain safebooru.com" ), + ( 'system:has safebooru file page url', "system:has a url with class safebooru file page" ), + ( 'system:does not have safebooru file page url', "system:doesn't have a url with url class safebooru file page " ), ( 'system:tag as number: page less than 5', "system:tag as number page < 5" ), ( 'system:tag as number: page less than 5', "system:tag as number: page less than 5" ), ( 'system:number of notes: has notes', 'system:has note' ), @@ -2266,12 +2266,6 @@ class TestTagRendering( unittest.TestCase ): HG.test_controller.new_options.SetBoolean( 'replace_tag_underscores_with_spaces', False ) - HG.test_controller.new_options.SetBoolean( 'replace_tag_emojis_with_boxes', True ) - - self.assertEqual( ClientTags.RenderTag( 'title:skeb⛓️💙', True ), 'title:skeb□□' ) - - HG.test_controller.new_options.SetBoolean( 'replace_tag_emojis_with_boxes', False ) - self.assertEqual( ClientTags.RenderTag( 'character:lara', True ), 'character:lara' ) HG.test_controller.new_options.SetBoolean( 'show_namespaces', False ) diff --git a/hydrus/test/TestController.py b/hydrus/test/TestController.py index 47dfd0ae..07807c6d 100644 --- a/hydrus/test/TestController.py +++ b/hydrus/test/TestController.py @@ -632,7 +632,7 @@ class Controller( object ): def ImportURLFromAPI( self, url, filterable_tags, additional_service_keys_to_tags, destination_page_name, destination_page_key, show_destination_page ): - normalised_url = self.network_engine.domain_manager.NormaliseURL( url, ephemeral_ok = True ) + normalised_url = self.network_engine.domain_manager.NormaliseURL( url ) human_result_text = '"{}" URL added successfully.'.format( normalised_url ) From 4faa2e11fdd392672ee548f7e8fc27679e829fce Mon Sep 17 00:00:00 2001 From: Hydrus Network Developer Date: Wed, 27 Mar 2024 17:22:51 -0500 Subject: [PATCH 4/7] Revert "Revert "Version 567"" This reverts commit 4ae4e66a062d1cf4398ca45fa5fceaf0456b3714. --- docs/PTR.md | 2 +- docs/changelog.md | 75 ++-- docs/old_changelog.html | 32 ++ hydrus/client/ClientOptions.py | 1 + hydrus/client/db/ClientDB.py | 25 ++ hydrus/client/gui/ClientGUI.py | 2 +- hydrus/client/gui/ClientGUIDownloaders.py | 319 ++++++++------- hydrus/client/gui/ClientGUIFileSeedCache.py | 15 +- hydrus/client/gui/ClientGUIGallerySeedLog.py | 3 +- hydrus/client/gui/ClientGUIMediaActions.py | 2 +- hydrus/client/gui/ClientGUIMediaMenus.py | 5 +- .../gui/ClientGUIScrolledPanelsManagement.py | 8 +- hydrus/client/gui/ClientGUITags.py | 31 +- .../gui/lists/ClientGUIListConstants.py | 2 +- .../networking/ClientGUINetworkJobControl.py | 3 +- .../gui/search/ClientGUIPredicatesSingle.py | 41 +- hydrus/client/gui/widgets/ClientGUICommon.py | 21 +- .../client/importing/ClientImportFileSeeds.py | 14 +- .../importing/ClientImportGallerySeeds.py | 2 +- .../client/importing/ClientImportWatchers.py | 4 +- hydrus/client/metadata/ClientTags.py | 31 +- .../networking/ClientNetworkingDomain.py | 20 +- .../networking/ClientNetworkingFunctions.py | 87 ++-- .../client/networking/ClientNetworkingGUG.py | 70 ++-- .../networking/ClientNetworkingURLClass.py | 375 +++++++++++++++--- .../ClientSearchParseSystemPredicates.py | 14 +- hydrus/core/HydrusConstants.py | 2 +- hydrus/core/HydrusSerialisable.py | 1 + hydrus/core/files/HydrusFileHandling.py | 8 +- .../core/files/HydrusOfficeOpenXMLHandling.py | 20 +- .../HydrusNetworkVariableHandling.py | 2 + hydrus/external/SystemPredicateParser.py | 20 +- hydrus/test/TestClientNetworking.py | 67 +++- hydrus/test/TestClientTags.py | 22 +- hydrus/test/TestController.py | 2 +- 35 files changed, 931 insertions(+), 417 deletions(-) diff --git a/docs/PTR.md b/docs/PTR.md index 06f43f77..91a0cc58 100644 --- a/docs/PTR.md +++ b/docs/PTR.md @@ -16,7 +16,7 @@ The easiest method is to use the built in function, found under `help -> add the Once you are connected, Hydrus will proceed to download and then process the update files. The progress of this can be seen under `services -> review services -> remote -> tag repositories -> public tag repository`. Here you can view its status, your account (the default account is a shared public account. Currently only janitors and the administrator have personal accounts), tag status, and how synced you are. Being behind on the sync by a certain amount makes you unable to push tags and petitions until you are caught up again. !!! note "QuickSync 2" - If you are starting out with a completely fresh client, you can instead download a fully pre-synced client [here](https://breadthread.gay/) Though a little out of date, it will nonetheless save time. Some settings may differ from the defaults of an official installation. + If you are starting out with a completely fresh client, you can instead download a fully pre-synced client [here](https://breadthread.duckdns.org/) Though a little out of date, it will nonetheless save time. Some settings may differ from the defaults of an official installation. ## How does it work? For something to end up on the PTR it has to be pushed there. Tags can either be entered into the tag service manually by the user through the `manage tags` window, or be routed there by a parser when downloading files. See [parsing tags](getting_started_downloading.md). Once tags have been entered into the PTR tag service they are pending until pushed. This is indicated by the `pending ()` that will appear between `tags` and `help` in the menu bar. Here you can chose to either push your changes to the PTR or discard them. diff --git a/docs/changelog.md b/docs/changelog.md index 9b4b9b1c..2faafde9 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -7,6 +7,45 @@ title: Changelog !!! note This is the new changelog, only the most recent builds. For all versions, see the [old changelog](old_changelog.html). +## [Version 567](https://github.com/hydrusnetwork/hydrus/releases/tag/v567) + +### user contributions + +* thanks to a user, the new docx, pptx, and xlsx support is improved, with better thumbnails (better ratio, better icon itself, and sometimes an actual preview thumbnail for pptx), better file detection (fewer false positives with stuff like ppt templates), and word count for docx and pptx. I am queueing everyone's existing docx and pptx files for a metadata rescan and thumbnail regen on update +* thanks to a user, the cbz scanner now ignores the `__MACOSX` folder +* thanks to a user, setting the Qt style in *options->style* should be more reliable (fixing some name case sensitivity issues) +* thanks to a user, there's a new 'default' dark mode QSS stylesheet that has nicer valid/invalid colours. we'll build on this and try to detect dark mode better in future and auto-switch to this as the base when the application is in dark mode. + +### misc + +* added a 'tag in reverse' checkbox to the new incremental tagger panel. this simply applies the given iterator to the last file first and then works backwards, e.g. 5, 4, 3, 2, 1 for start=1, step=1 on five files +* all _new_ system:url predicates will have slightly different (standardised) labels, and all these labels should parse correctly in the system predicate parser if you copy/paste +* you should now be able to enter 'system:has url matching regex (regex with upper case)' and 'system:has url (url with upper case)' and it'll propage through parsing. this definitely has not™ broken any other predicate parsing. you can enter url class names with upper case if you want, but url class names should now match regardless of letter case +* you can now open the 'extra info' button (up top of a media viewer) on a jpeg if that jpeg has no exif or other human-readable metadata (to see just the progressive and subsampling info) +* added a new EXPERIMENTAL checkbox to _options->tag presentation_ that will replace emojis and other unicode symbol garbage with □. if you have crazy rendering for emoji stuff, try it out +* the tag summary generators that make thumbnail banners now wash their tags through the 'render tag for user' system, which will apply this new emoji rule and 'replace underscores with spaces' +* updated the QuickSync link to its new home at https://breadthread.duckdns.org/ + +### URL storage/display changes + +* today I correct a foolish decision I made when I first implemented the hydrus downloader engine--handling and storing URLs internally as 'pretty' decoded text, rather than with the proper ugly '%20" stuff you sometimes see. this improves support for weird URLs and makes some behind the scenes things simpler. you do not need to make any changes, but there is a chance some particularly funky URLs will redownload once more if your subscription runs into them again (this change breaks some 'known url' checking logic, since what is stored is now slightly different, but this 99% doesn't affect Post URLs, so no big worries) +* so, URLs are no longer decoded in the normalisation step. they are now saved in the file log as their proper actual 'what is sent to the server' encoded text. it will display in UI as the pretty version, but if you copy to clipboard, you get the data version--pretty much how your web browser address bar works. I have made it show 'pretty' in the file log and search log lists, 'copy url' menu labels, and hyperlink tooltips, but in the more technical 'manage GUGs' and so on, it shows the data version. let me know if I have forgotten to display them pretty anywhere! +* when you paste a URL, some new normalisation tech tries to figure out if it is pre-encoded or not +* there's also some GUG work. when you enter a query text like `male/female` or `blonde_hair%20blue_eyes`, some new logic tries to infer whether what you entered is encoded or not. it should handle pretty much everything well unless you have a single-tag query with a legit percent character in the middle (in which case you'll have to enter `%25` instead, but we'll see if it ever happens) +* these changes simplify the url parsing routine, eliminating plenty of nonsense hackery I've inserted over the years to make things like `6+girls blonde_hair`/`6%2Bgirls+blonde_hair` work with a merged system. this has mostly been a delicate cleanup job; long planned, finally triggered + +### ephemeral URL parameters + +* I was going to roll out 'ephemeral token' parameters, and I basically had it done, but I realised late that I was being stupid in a brand new way, basically expanding the whitelist when turning off the blacklist was a nicer solution. I will work on this more next week, I think ultimately making it so Post URLs are not clipped of undefined parameters before they are is sent to the server, just like for Gallery URLs. I will separately introduce 'I just need to add some random hex in this parameter to tell this cache I want the original' under different tech +* so, I did some behind the scenes URL filtering tech, and file import objects handle full and stripped down versions of Post URLs, but it doesn't do much yet + +### boring cleanup + +* I cleaned up some URL Class code +* the URL Class has a new buddy 'Parameter' class to handle param testing +* rewrote how the query string of a URL is deconstructed and scanned against your parameters. less chance of edge-case errors/merges and easier to expand in future +* brushed up the URL Class unit tests to account for the above changes and added new tests for encoding, ephemeral, and default parameter values (which must have been missed a long time ago) + ## [Version 566](https://github.com/hydrusnetwork/hydrus/releases/tag/v566) ### incremental tagging @@ -406,39 +445,3 @@ title: Changelog * some 'number of tags' queries should be a little faster * the 'tag suggestions' options page has a bit of brushed up UI and some new explanation labels * unified the various thumbnail generation error reporting for all the different filetypes. it should also print the file's hash, too, since most of these error contexts only have a temporary path to talk about at this stage, which isn't useful after the fact - -## [Version 557](https://github.com/hydrusnetwork/hydrus/releases/tag/v557) - -### misc - -* optimised large tag filter edit UI. you can now paste 5,000 items into an empty tag filter blacklist in less than a second, and if you have a big tag filter, removing or adding one thing is now instant (previously, this stuff would lag 4 seconds or more, sometimes multiple minutes!!) -* the ugoira 'num frames' counting method now discludes files ending in .js/.json, to catch future bundling of frame timings -* the cbz scanning tech should now recognise cbzs with four or fewer pages -* a legacy 'is this image all good?' check that happens on PIL-loading is now gone. this improves rendering for a variety of truncated files and clarifies some error messages (previously, this thing was just failing silently) -* fixed the delete file pre-flight logic so users on the non-advanced delete dialog can now delete repository updates. previously, they saw the menu entry, but hitting it was a no-op - -### better hash predicate parsing - -* `system:hash` labels are a little different now. they'll say `system:hash (md5) is abcd...`, with the algorithm after the "hash". hash is omitted for sha256 (the hydrus default). this eases parsing -* `system:similar to data` labels are a little different. they'll say 'distance' instead of 'max hamming', and the number and type of hashes they hold, and if they hold only pixel hashes, the distance is not stated -* `system:hash` predicate parsing is now more flexible. you can put the hash type pretty much anywhere now. -* `system:similar to` and `system:similar to data` predicate parsing is now more flexible. more combinations are allowed, and you can not include distance and it'll be fine -* these three hash predicates now copy to clipboard with all their hashes explicitly enumerated, making strings that are fully parsable! this is a big step forward in a completely sealed import-export predicate parsing loop; now I have the tech set up to export a different phrase to clipboard than what you see in the label, I just need the examples of where it goes wrong. if there is a system predicate that copies to clipboard in a way that won't parse back, let me know and I'll see if I can fix it. -* added more unit tests for this parsing - -### documentation and cleanup - -* wrote a guide on how to install 'Git for Windows' for the 'running from source' help. although most of the settings in its marathon 12-page install wizard can be left as default, the technical questions can be intimidating, so I've written them all out for a nice simple install. also brushed up some of the surrounding help here -* added a warning to the regular 'installing and updating' help regarding the danger of test-running extract releases before updating (you can overwrite your database by accident) -* thanks to a user, the filetypes help document is updated with Ugoira and CBZ info -* all the 'HydrusFiletypeHandling' files are refactored to a new 'files' module. there's a bunch of them these days! -* the hydrus.core.images module is moved beneath this 'files' module too -* the file log list panel right-click menu now says 'open URLs'/'open files' locations' depending on whether you are looking at a URL import log or local HDD import log - -### client api - -* the `file_metadata` call now returns `filetype_forced` and, if so, also `original_mime` to talk about the new forced filetype system -* the client api help and unit tests are updated to test this is working ok -* fixed a typo that was causing too much work in the updated file info manager call (and was often returning 'null' results for half-cached `file_metadata` requests with `only_return_basic_information=true`) -* thanks to a user, the `/add_urls/get_url_info` Client API call now has a cache timeout of ten minutes, and the `/add_urls/get_url_files` call now has a timeout of 30 seconds if all the files are 'already in db'. this should automatically reduce some overhead for several programs that talk to the Client API a lot about URLs -* the client api version is now 58 diff --git a/docs/old_changelog.html b/docs/old_changelog.html index 2d474a2c..ff8dd63f 100644 --- a/docs/old_changelog.html +++ b/docs/old_changelog.html @@ -34,6 +34,38 @@

      changelog

        +
      • +

        version 567

        +
          +
        • user contributions

        • +
        • thanks to a user, the new docx, pptx, and xlsx support is improved, with better thumbnails (better ratio, better icon itself, and sometimes an actual preview thumbnail for pptx), better file detection (fewer false positives with stuff like ppt templates), and word count for docx and pptx. I am queueing everyone's existing docx and pptx files for a metadata rescan and thumbnail regen on update
        • +
        • thanks to a user, the cbz scanner now ignores the `__MACOSX` folder
        • +
        • thanks to a user, setting the Qt style in *options->style* should be more reliable (fixing some name case sensitivity issues)
        • +
        • thanks to a user, there's a new 'default' dark mode QSS stylesheet that has nicer valid/invalid colours. we'll build on this and try to detect dark mode better in future and auto-switch to this as the base when the application is in dark mode.
        • +
        • misc

        • +
        • added a 'tag in reverse' checkbox to the new incremental tagger panel. this simply applies the given iterator to the last file first and then works backwards, e.g. 5, 4, 3, 2, 1 for start=1, step=1 on five files
        • +
        • all _new_ system:url predicates will have slightly different (standardised) labels, and all these labels should parse correctly in the system predicate parser if you copy/paste
        • +
        • you should now be able to enter 'system:has url matching regex (regex with upper case)' and 'system:has url (url with upper case)' and it'll propage through parsing. this definitely has not™ broken any other predicate parsing. you can enter url class names with upper case if you want, but url class names should now match regardless of letter case
        • +
        • you can now open the 'extra info' button (up top of a media viewer) on a jpeg if that jpeg has no exif or other human-readable metadata (to see just the progressive and subsampling info)
        • +
        • added a new EXPERIMENTAL checkbox to _options->tag presentation_ that will replace emojis and other unicode symbol garbage with □. if you have crazy rendering for emoji stuff, try it out
        • +
        • the tag summary generators that make thumbnail banners now wash their tags through the 'render tag for user' system, which will apply this new emoji rule and 'replace underscores with spaces'
        • +
        • updated the QuickSync link to its new home at https://breadthread.duckdns.org/
        • +
        • URL storage/display changes

        • +
        • today I correct a foolish decision I made when I first implemented the hydrus downloader engine--handling and storing URLs internally as 'pretty' decoded text, rather than with the proper ugly '%20" stuff you sometimes see. this improves support for weird URLs and makes some behind the scenes things simpler. you do not need to make any changes, but there is a chance some particularly funky URLs will redownload once more if your subscription runs into them again (this change breaks some 'known url' checking logic, since what is stored is now slightly different, but this 99% doesn't affect Post URLs, so no big worries)
        • +
        • so, URLs are no longer decoded in the normalisation step. they are now saved in the file log as their proper actual 'what is sent to the server' encoded text. it will display in UI as the pretty version, but if you copy to clipboard, you get the data version--pretty much how your web browser address bar works. I have made it show 'pretty' in the file log and search log lists, 'copy url' menu labels, and hyperlink tooltips, but in the more technical 'manage GUGs' and so on, it shows the data version. let me know if I have forgotten to display them pretty anywhere!
        • +
        • when you paste a URL, some new normalisation tech tries to figure out if it is pre-encoded or not
        • +
        • there's also some GUG work. when you enter a query text like `male/female` or `blonde_hair%20blue_eyes`, some new logic tries to infer whether what you entered is encoded or not. it should handle pretty much everything well unless you have a single-tag query with a legit percent character in the middle (in which case you'll have to enter `%25` instead, but we'll see if it ever happens)
        • +
        • these changes simplify the url parsing routine, eliminating plenty of nonsense hackery I've inserted over the years to make things like `6+girls blonde_hair`/`6%2Bgirls+blonde_hair` work with a merged system. this has mostly been a delicate cleanup job; long planned, finally triggered
        • +
        • ephemeral URL parameters

        • +
        • I was going to roll out 'ephemeral token' parameters, and I basically had it done, but I realised late that I was being stupid in a brand new way, basically expanding the whitelist when turning off the blacklist was a nicer solution. I will work on this more next week, I think ultimately making it so Post URLs are not clipped of undefined parameters before they are is sent to the server, just like for Gallery URLs. I will separately introduce 'I just need to add some random hex in this parameter to tell this cache I want the original' under different tech
        • +
        • so, I did some behind the scenes URL filtering tech, and file import objects handle full and stripped down versions of Post URLs, but it doesn't do much yet
        • +
        • boring cleanup

        • +
        • I cleaned up some URL Class code
        • +
        • the URL Class has a new buddy 'Parameter' class to handle param testing
        • +
        • rewrote how the query string of a URL is deconstructed and scanned against your parameters. less chance of edge-case errors/merges and easier to expand in future
        • +
        • brushed up the URL Class unit tests to account for the above changes and added new tests for encoding, ephemeral, and default parameter values (which must have been missed a long time ago)
        • +
        +
      • version 566

          diff --git a/hydrus/client/ClientOptions.py b/hydrus/client/ClientOptions.py index f9e18c07..c2a8a614 100644 --- a/hydrus/client/ClientOptions.py +++ b/hydrus/client/ClientOptions.py @@ -164,6 +164,7 @@ class ClientOptions( HydrusSerialisable.SerialisableBase ): self._dictionary[ 'booleans' ][ 'show_number_namespaces' ] = True self._dictionary[ 'booleans' ][ 'show_subtag_number_namespaces' ] = True self._dictionary[ 'booleans' ][ 'replace_tag_underscores_with_spaces' ] = False + self._dictionary[ 'booleans' ][ 'replace_tag_emojis_with_boxes' ] = False self._dictionary[ 'booleans' ][ 'verify_regular_https' ] = True diff --git a/hydrus/client/db/ClientDB.py b/hydrus/client/db/ClientDB.py index ee583575..a526ca92 100644 --- a/hydrus/client/db/ClientDB.py +++ b/hydrus/client/db/ClientDB.py @@ -10413,6 +10413,31 @@ class DB( HydrusDB.HydrusDB ): + if version == 566: + + try: + + table_join = self.modules_files_storage.GetTableJoinLimitedByFileDomain( self.modules_services.combined_local_file_service_id, 'files_info', HC.CONTENT_STATUS_CURRENT ) + + hash_ids = self._STL( self._Execute( 'SELECT hash_id FROM {} WHERE mime IN {};'.format( table_join, HydrusData.SplayListForDB( [ HC.APPLICATION_DOCX ] ) ) ) ) + + self.modules_files_maintenance_queue.AddJobs( hash_ids, ClientFiles.REGENERATE_FILE_DATA_JOB_FILE_METADATA ) + + hash_ids = self._STL( self._Execute( 'SELECT hash_id FROM {} WHERE mime IN {};'.format( table_join, HydrusData.SplayListForDB( [ HC.APPLICATION_PPTX ] ) ) ) ) + + self.modules_files_maintenance_queue.AddJobs( hash_ids, ClientFiles.REGENERATE_FILE_DATA_JOB_FILE_METADATA ) + self.modules_files_maintenance_queue.AddJobs( hash_ids, ClientFiles.REGENERATE_FILE_DATA_JOB_FORCE_THUMBNAIL ) + + except Exception as e: + + HydrusData.PrintException( e ) + + message = 'Trying to schedule a document metadata scan failed! Please let hydrus dev know!' + + self.pub_initial_message( message ) + + + self._controller.frame_splash_status.SetTitleText( 'updated db to v{}'.format( HydrusData.ToHumanInt( version + 1 ) ) ) self._Execute( 'UPDATE version SET version = ?;', ( version + 1, ) ) diff --git a/hydrus/client/gui/ClientGUI.py b/hydrus/client/gui/ClientGUI.py index 3f5e4bff..d5501bdf 100644 --- a/hydrus/client/gui/ClientGUI.py +++ b/hydrus/client/gui/ClientGUI.py @@ -2213,7 +2213,7 @@ class FrameGUI( CAC.ApplicationCommandProcessorMixin, ClientGUITopLevelWindows.M additional_service_keys_to_tags = ClientTags.ServiceKeysToTags() - url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url ) + url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url, ephemeral_ok = True ) ( url_type, match_name, can_parse, cannot_parse_reason ) = self._controller.network_engine.domain_manager.GetURLParseCapability( url ) diff --git a/hydrus/client/gui/ClientGUIDownloaders.py b/hydrus/client/gui/ClientGUIDownloaders.py index fb2f3f75..39259ecc 100644 --- a/hydrus/client/gui/ClientGUIDownloaders.py +++ b/hydrus/client/gui/ClientGUIDownloaders.py @@ -366,7 +366,7 @@ class EditGUGPanel( ClientGUIScrolledPanels.EditPanel ): example_url = gug.GetExampleURL() - example_url = CG.client_controller.network_engine.domain_manager.NormaliseURL( example_url ) + example_url = CG.client_controller.network_engine.domain_manager.NormaliseURL( example_url, ephemeral_ok = True ) self._example_url.setText( example_url ) @@ -707,7 +707,7 @@ class EditGUGsPanel( ClientGUIScrolledPanels.EditPanel ): try: - example_url = CG.client_controller.network_engine.domain_manager.NormaliseURL( example_url ) + example_url = CG.client_controller.network_engine.domain_manager.NormaliseURL( example_url, ephemeral_ok = True ) url_class = CG.client_controller.network_engine.domain_manager.GetURLClass( example_url ) @@ -913,6 +913,96 @@ class EditGUGsPanel( ClientGUIScrolledPanels.EditPanel ): return gugs + +class EditURLClassParameterFixedNamePanel( ClientGUIScrolledPanels.EditPanel ): + + def __init__( self, parent: QW.QWidget, parameter: ClientNetworkingURLClass.URLClassParameterFixedName, dupe_names ): + + # maybe graduate this guy to a 'any type of parameter' panel and have a dropdown and show/hide fixed name etc.. + + ClientGUIScrolledPanels.EditPanel.__init__( self, parent ) + + self._dupe_names = dupe_names + + self._fixed_name = QW.QLineEdit( self ) + self._fixed_name.setToolTip( 'The "key" of the key=value pair.' ) + + value_string_match_panel = ClientGUICommon.StaticBox( self, 'value' ) + + from hydrus.client.gui import ClientGUIStringPanels + + self._value_string_match = ClientGUIStringPanels.EditStringMatchPanel( value_string_match_panel, parameter.GetValueStringMatch() ) + self._value_string_match.setToolTip( 'If the value of the key=value pair matches this, the URL Class matches!' ) + + self._default_value = ClientGUICommon.NoneableTextCtrl( self ) + self._default_value.setToolTip( 'If the URL is missing this key=value pair, you can add it here, and the URL Class will still match and will normalise with this default value. This can be useful for gallery URLs that have an implicit page=1 or index=0 for their first result--sometimes it is better to make that stuff explicit.' ) + + # + + self.SetValue( parameter ) + + # + + value_string_match_panel.Add( self._value_string_match, CC.FLAGS_EXPAND_BOTH_WAYS ) + + rows = [] + + rows.append( ( 'name: ', self._fixed_name ) ) + rows.append( value_string_match_panel ) + rows.append( ( 'default value: ', self._default_value ) ) + + gridbox = ClientGUICommon.WrapInGrid( self, rows, add_stretch_at_end = False, expand_single_widgets = True ) + + vbox = QP.VBoxLayout() + + QP.AddToLayout( vbox, gridbox, CC.FLAGS_EXPAND_SIZER_BOTH_WAYS ) + + self.widget().setLayout( vbox ) + + + def _GetValue( self ): + + name = self._fixed_name.text() + + value_string_match = self._value_string_match.GetValue() + default_value = self._default_value.GetValue() + + parameter = ClientNetworkingURLClass.URLClassParameterFixedName( + name = name, + value_string_match = value_string_match, + default_value = default_value + ) + + return parameter + + + def GetValue( self ): + + parameter = self._GetValue() + + name = parameter.GetName() + + if name == '': + + raise HydrusExceptions.VetoException( 'Sorry, you have to set a key/name!' ) + + + if name in self._dupe_names: + + raise HydrusExceptions.VetoException( 'Sorry, your key/name already exists, pick something else!' ) + + + return parameter + + + def SetValue( self, parameter: ClientNetworkingURLClass.URLClassParameterFixedName ): + + self._fixed_name.setText( parameter.GetName() ) + self._value_string_match.SetValue( parameter.GetValueStringMatch() ) + self._default_value.SetValue( parameter.GetDefaultValue() ) + + + class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): def __init__( self, parent: QW.QWidget, url_class: ClientNetworkingURLClass.URLClass ): @@ -932,7 +1022,14 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): self._url_type.addItem( HC.url_type_string_lookup[ u_t ], u_t ) - ( url_type, preferred_scheme, netloc, path_components, parameters, api_lookup_converter, send_referral_url, referral_url_converter, example_url ) = url_class.ToTuple() + url_type = url_class.GetURLType() + preferred_scheme = url_class.GetPreferredScheme() + netloc = url_class.GetNetloc() + path_components = url_class.GetPathComponents() + parameters = url_class.GetParameters() + api_lookup_converter = url_class.GetAPILookupConverter() + ( send_referral_url, referral_url_converter ) = url_class.GetReferralURLInfo() + example_url = url_class.GetExampleURL() self._notebook = ClientGUICommon.BetterNotebook( self ) @@ -1131,6 +1228,12 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): self._example_url_classes = ClientGUICommon.BetterStaticText( self ) + self._ephemeral_normalised_url = QW.QLineEdit( self ) + self._ephemeral_normalised_url.setReadOnly( True ) + self._ephemeral_normalised_url.setToolTip( 'This is what will be sent to the server.' ) + + self._ephemeral_normalised_url.setVisible( False ) + self._normalised_url = QW.QLineEdit( self ) self._normalised_url.setReadOnly( True ) @@ -1166,7 +1269,7 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): self._path_components.AddDatas( path_components ) - self._parameters.AddDatas( list( parameters.items() ) ) + self._parameters.AddDatas( parameters ) self._parameters.Sort() @@ -1261,8 +1364,8 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): rows.append( ( 'if matching by subdomain, keep it when normalising?: ', self._keep_matched_subdomains ) ) rows.append( ( 'alphabetise GET parameters when normalising?: ', self._alphabetise_get_parameters ) ) - rows.append( ( 'do not allow any extra path components?: ', self._no_more_path_components_than_this ) ) - rows.append( ( 'do not allow any extra parameters?: ', self._no_more_parameters_than_this ) ) + rows.append( ( 'do not match on any extra path components?: ', self._no_more_path_components_than_this ) ) + rows.append( ( 'do not match on any extra parameters?: ', self._no_more_parameters_than_this ) ) rows.append( ( 'keep fragment when normalising?: ', self._keep_fragment ) ) rows.append( ( 'post page can produce multiple files?: ', self._can_produce_multiple_files ) ) rows.append( ( 'associate a \'known url\' with resulting files?: ', self._should_be_associated_with_files ) ) @@ -1287,6 +1390,7 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): rows = [] rows.append( ( 'example url: ', self._example_url ) ) + #rows.append( ( 'url sent to the server: ', self._ephemeral_normalised_url ) ) rows.append( ( 'normalised url: ', self._normalised_url ) ) gridbox_2 = ClientGUICommon.WrapInGrid( self, rows ) @@ -1327,63 +1431,25 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): def _AddParameters( self ): - with ClientGUIDialogs.DialogTextEntry( self, 'edit the key', placeholder = 'key', allow_blank = False ) as dlg: - - if dlg.exec() == QW.QDialog.Accepted: - - key = dlg.GetValue() - - else: - - return - - + existing_names = self._GetExistingParameterNames() - existing_keys = self._GetExistingKeys() + parameter = ClientNetworkingURLClass.URLClassParameterFixedName() - if key in existing_keys: + with ClientGUITopLevelWindowsPanels.DialogEdit( self, 'edit parameter' ) as dlg: - ClientGUIDialogsMessage.ShowWarning( self, 'That key already exists!' ) - - return - - - string_match = ClientStrings.StringMatch() - - with ClientGUITopLevelWindowsPanels.DialogEdit( self, 'edit value' ) as dlg: - - from hydrus.client.gui import ClientGUIStringPanels - - panel = ClientGUIStringPanels.EditStringMatchPanel( dlg, string_match ) + panel = EditURLClassParameterFixedNamePanel( dlg, parameter, existing_names ) dlg.SetPanel( panel ) if dlg.exec() == QW.QDialog.Accepted: - string_match = panel.GetValue() + parameter = panel.GetValue() - with ClientGUIDialogs.DialogTextEntry( self, 'Enter optional \'default\' value for this parameter, which will be filled in if missing. Leave blank for none (recommended).', allow_blank = True ) as dlg_default: - - if dlg_default.exec() == QW.QDialog.Accepted: - - default = dlg_default.GetValue() - - if default == '': - - default = None - - elif not string_match.Matches( default ): - - ClientGUIDialogsMessage.ShowWarning( self, 'That default does not match the given rule! Clearing it to none!' ) - - default = None - - - else: - - return - - + self._parameters.AddDatas( ( parameter, ) ) + + self._parameters.Sort() + + self._UpdateControls() else: @@ -1391,14 +1457,6 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): - data = ( key, ( string_match, default ) ) - - self._parameters.AddDatas( ( data, ) ) - - self._parameters.Sort() - - self._UpdateControls() - def _AddPathComponent( self ): @@ -1408,23 +1466,31 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): return self._EditPathComponent( ( string_match, default ) ) - def _ConvertParameterToListCtrlTuples( self, data ): + def _ConvertParameterToListCtrlTuples( self, parameter: ClientNetworkingURLClass.URLClassParameterFixedName ): - ( key, ( string_match, default ) ) = data + name = parameter.GetName() + value_string_match = parameter.GetValueStringMatch() - pretty_key = key - pretty_string_match = string_match.ToString() + pretty_name = name + pretty_value_string_match = value_string_match.ToString() - if default is not None: + default_value = parameter.GetDefaultValue() + + if default_value is not None: - pretty_string_match += ' (default "' + default + '")' + pretty_value_string_match += f' (default "{default_value}")' - sort_key = pretty_key - sort_string_match = pretty_string_match + if parameter.IsEphemeralToken(): + + pretty_value_string_match += ' (is ephemeral)' + - display_tuple = ( pretty_key, pretty_string_match ) - sort_tuple = ( sort_key, sort_string_match ) + sort_name = pretty_name + sort_string_match = pretty_value_string_match + + display_tuple = ( pretty_name, pretty_value_string_match ) + sort_tuple = ( sort_name, sort_string_match ) return ( display_tuple, sort_tuple ) @@ -1458,86 +1524,28 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): for parameter in selected_params: - ( original_key, ( original_string_match, original_default ) ) = parameter + existing_names = set( self._GetExistingParameterNames() ) - with ClientGUIDialogs.DialogTextEntry( self, 'edit the key', default = original_key, allow_blank = False ) as dlg: - - if dlg.exec() == QW.QDialog.Accepted: - - key = dlg.GetValue() - - else: - - return - - - - if key != original_key: - - existing_keys = self._GetExistingKeys() - - if key in existing_keys: - - ClientGUIDialogsMessage.ShowWarning( self, 'That key already exists!' ) - - return - - + existing_names.discard( parameter.GetName() ) with ClientGUITopLevelWindowsPanels.DialogEdit( self, 'edit value' ) as dlg: - from hydrus.client.gui import ClientGUIStringPanels - - panel = ClientGUIStringPanels.EditStringMatchPanel( dlg, original_string_match ) + panel = EditURLClassParameterFixedNamePanel( self, parameter, existing_names ) dlg.SetPanel( panel ) if dlg.exec() == QW.QDialog.Accepted: - string_match = panel.GetValue() + edited_parameter = panel.GetValue() - if original_default is None: - - original_default = '' - + self._parameters.DeleteDatas( ( parameter, ) ) - with ClientGUIDialogs.DialogTextEntry( self, 'Enter optional \'default\' value for this parameter, which will be filled in if missing. Leave blank for none (recommended).', default = original_default, allow_blank = True ) as dlg_default: - - if dlg_default.exec() == QW.QDialog.Accepted: - - default = dlg_default.GetValue() - - if default == '': - - default = None - - elif not string_match.Matches( default ): - - ClientGUIDialogsMessage.ShowWarning( self, 'That default does not match the given rule! Clearing it to none!' ) - - default = None - - - else: - - return - - + self._parameters.AddDatas( ( edited_parameter, ) ) - else: - - return + edited_datas.append( edited_parameter ) - self._parameters.DeleteDatas( ( parameter, ) ) - - new_parameter = ( key, ( string_match, default ) ) - - self._parameters.AddDatas( ( new_parameter, ) ) - - edited_datas.append( new_parameter ) - self._parameters.SelectDatas( edited_datas ) @@ -1597,13 +1605,13 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): - def _GetExistingKeys( self ): + def _GetExistingParameterNames( self ) -> typing.Set[ str ]: - params = self._parameters.GetData() + parameters = self._parameters.GetData() - keys = { key for ( key, string_match ) in params } + fixed_names = { parameter.GetName() for parameter in parameters if isinstance( parameter, ClientNetworkingURLClass.URLClassParameterFixedName ) } - return keys + return fixed_names def _GetValue( self ): @@ -1614,7 +1622,7 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): preferred_scheme = self._preferred_scheme.GetValue() netloc = self._netloc.text() path_components = self._path_components.GetData() - parameters = dict( self._parameters.GetData() ) + parameters = self._parameters.GetData() has_single_value_parameters = self._has_single_value_parameters.isChecked() single_value_parameters_string_match = self._single_value_parameters_string_match.GetValue() header_overrides = self._header_overrides.GetValue() @@ -1696,11 +1704,16 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): - for ( index, ( key, ( string_match, default ) ) ) in enumerate( self._parameters.GetData() ): + for parameter in self._parameters.GetData(): - if True in ( string_match.Matches( n ) for n in ( '0', '1', '10', '100', '42' ) ): + if isinstance( parameter, ClientNetworkingURLClass.URLClassParameterFixedName ): - choices.append( ( key + ' parameter', ( ClientNetworkingURLClass.GALLERY_INDEX_TYPE_PARAMETER, key ) ) ) + if True in ( parameter.MatchesValue( n ) for n in ( '0', '1', '10', '100', '42' ) ): + + name = parameter.GetName() + + choices.append( ( f'{name} parameter', ( ClientNetworkingURLClass.GALLERY_INDEX_TYPE_PARAMETER, name ) ) ) + @@ -1778,6 +1791,21 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): self._normalised_url.setText( normalised ) + ephemeral_normalised = url_class.Normalise( example_url, ephemeral_ok = True ) + + if ephemeral_normalised != normalised: + + self._ephemeral_normalised_url.setText( ephemeral_normalised ) + + self._ephemeral_normalised_url.setEnabled( True ) + + else: + + self._ephemeral_normalised_url.setText( '' ) + + self._ephemeral_normalised_url.setEnabled( False ) + + self._referral_url_converter.SetExampleString( normalised ) self._api_lookup_converter.SetExampleString( normalised ) @@ -1881,6 +1909,7 @@ class EditURLClassPanel( ClientGUIScrolledPanels.EditPanel ): self._example_url_classes.setText( 'Example does not match - '+reason ) self._example_url_classes.setObjectName( 'HydrusInvalid' ) + self._ephemeral_normalised_url.clear() self._normalised_url.clear() self._api_url.clear() diff --git a/hydrus/client/gui/ClientGUIFileSeedCache.py b/hydrus/client/gui/ClientGUIFileSeedCache.py index d9a5cac5..d21c2e34 100644 --- a/hydrus/client/gui/ClientGUIFileSeedCache.py +++ b/hydrus/client/gui/ClientGUIFileSeedCache.py @@ -32,6 +32,7 @@ from hydrus.client.importing import ClientImportFileSeeds from hydrus.client.importing.options import PresentationImportOptions from hydrus.client.metadata import ClientContentUpdates from hydrus.client.metadata import ClientTagSorting +from hydrus.client.networking import ClientNetworkingFunctions def ClearFileSeeds( win: QW.QWidget, file_seed_cache: ClientImportFileSeeds.FileSeedCache, statuses_to_remove ): @@ -358,7 +359,7 @@ class EditFileSeedCachePanel( ClientGUIScrolledPanels.EditPanel ): QP.CallAfter( self._UpdateText ) - def _ConvertFileSeedToListCtrlTuples( self, file_seed ): + def _ConvertFileSeedToListCtrlTuples( self, file_seed: ClientImportFileSeeds.FileSeed ): try: @@ -373,14 +374,22 @@ class EditFileSeedCachePanel( ClientGUIScrolledPanels.EditPanel ): pretty_file_seed_index = '--' - file_seed_data = file_seed.file_seed_data + file_seed_data = file_seed.file_seed_data_for_comparison status = file_seed.status added = file_seed.created modified = file_seed.modified source_time = file_seed.source_time note = file_seed.note - pretty_file_seed_data = str( file_seed_data ) + if file_seed.file_seed_type == ClientImportFileSeeds.FILE_SEED_TYPE_URL: + + pretty_file_seed_data = ClientNetworkingFunctions.ConvertURLToHumanString( file_seed_data ) + + else: + + pretty_file_seed_data = file_seed_data + + pretty_status = CC.status_string_lookup[ status ] if status != CC.STATUS_UNKNOWN else '' pretty_added = ClientTime.TimestampToPrettyTimeDelta( added ) pretty_modified = ClientTime.TimestampToPrettyTimeDelta( modified ) diff --git a/hydrus/client/gui/ClientGUIGallerySeedLog.py b/hydrus/client/gui/ClientGUIGallerySeedLog.py index fbfe8e00..bfe4b9ef 100644 --- a/hydrus/client/gui/ClientGUIGallerySeedLog.py +++ b/hydrus/client/gui/ClientGUIGallerySeedLog.py @@ -24,6 +24,7 @@ from hydrus.client.gui.lists import ClientGUIListConstants as CGLC from hydrus.client.gui.lists import ClientGUIListCtrl from hydrus.client.gui.widgets import ClientGUICommon from hydrus.client.importing import ClientImportGallerySeeds +from hydrus.client.networking import ClientNetworkingFunctions def ClearGallerySeeds( win: QW.QWidget, gallery_seed_log: ClientImportGallerySeeds.GallerySeedLog, statuses_to_remove, gallery_type_string ): @@ -297,7 +298,7 @@ class EditGallerySeedLogPanel( ClientGUIScrolledPanels.EditPanel ): note = gallery_seed.note pretty_gallery_seed_index = HydrusData.ToHumanInt( gallery_seed_index ) - pretty_url = url + pretty_url = ClientNetworkingFunctions.ConvertURLToHumanString( url ) pretty_status = CC.status_string_lookup[ status ] if status != CC.STATUS_UNKNOWN else '' pretty_added = ClientTime.TimestampToPrettyTimeDelta( added ) pretty_modified = ClientTime.TimestampToPrettyTimeDelta( modified ) diff --git a/hydrus/client/gui/ClientGUIMediaActions.py b/hydrus/client/gui/ClientGUIMediaActions.py index 430cc7cb..25da4b39 100644 --- a/hydrus/client/gui/ClientGUIMediaActions.py +++ b/hydrus/client/gui/ClientGUIMediaActions.py @@ -857,7 +857,7 @@ def ShowFileEmbeddedMetadata( win: QW.QWidget, media: ClientMedia.MediaSingleton - if exif_dict is None and file_text is None: + if exif_dict is None and file_text is None and len( extra_rows ) == 0: ClientGUIDialogsMessage.ShowWarning( win, 'Sorry, could not see any human-readable information in this file! Hydrus should have known this, so if this keeps happening, you may need to schedule a rescan of this info in file maintenance.' ) diff --git a/hydrus/client/gui/ClientGUIMediaMenus.py b/hydrus/client/gui/ClientGUIMediaMenus.py index 528ac207..c089798c 100644 --- a/hydrus/client/gui/ClientGUIMediaMenus.py +++ b/hydrus/client/gui/ClientGUIMediaMenus.py @@ -18,6 +18,7 @@ from hydrus.client.gui import ClientGUIMedia from hydrus.client.gui import ClientGUIMenus from hydrus.client.media import ClientMedia from hydrus.client.media import ClientMediaManagers +from hydrus.client.networking import ClientNetworkingFunctions def AddDuplicatesMenu( win: QW.QWidget, menu: QW.QMenu, location_context: ClientLocation.LocationContext, focus_singleton: ClientMedia.Media, num_selected: int, collections_selected: bool ): @@ -379,7 +380,7 @@ def AddKnownURLsViewCopyMenu( win, menu, focus_media, selected_media = None ): else: - label = url_class.GetName() + ': ' + url + label = url_class.GetName() + ': ' + ClientNetworkingFunctions.ConvertURLToHumanString( url ) focus_matched_labels_and_urls.append( ( label, url ) ) @@ -390,7 +391,7 @@ def AddKnownURLsViewCopyMenu( win, menu, focus_media, selected_media = None ): focus_labels_and_urls = list( focus_matched_labels_and_urls ) - focus_labels_and_urls.extend( ( ( url, url ) for url in focus_unmatched_urls ) ) + focus_labels_and_urls.extend( ( ( ClientNetworkingFunctions.ConvertURLToHumanString( url ), url ) for url in focus_unmatched_urls ) ) # figure out which urls these selected files have diff --git a/hydrus/client/gui/ClientGUIScrolledPanelsManagement.py b/hydrus/client/gui/ClientGUIScrolledPanelsManagement.py index 0ccc447b..f89bcdf7 100644 --- a/hydrus/client/gui/ClientGUIScrolledPanelsManagement.py +++ b/hydrus/client/gui/ClientGUIScrolledPanelsManagement.py @@ -4222,6 +4222,9 @@ class ManageOptionsPanel( ClientGUIScrolledPanels.ManagePanel ): self._replace_tag_underscores_with_spaces = QW.QCheckBox( render_panel ) + self._replace_tag_emojis_with_boxes = QW.QCheckBox( render_panel ) + self._replace_tag_emojis_with_boxes.setToolTip( 'This will replace emojis and weird symbols with □ in front-facing user views, in case you are getting crazy rendering. It may break some CJK punctuation.' ) + # namespace_colours_panel = ClientGUICommon.StaticBox( self, 'namespace colours' ) @@ -4239,6 +4242,7 @@ class ManageOptionsPanel( ClientGUIScrolledPanels.ManagePanel ): self._show_subtag_number_namespaces.setChecked( new_options.GetBoolean( 'show_subtag_number_namespaces' ) ) self._namespace_connector.setText( new_options.GetString( 'namespace_connector' ) ) self._replace_tag_underscores_with_spaces.setChecked( new_options.GetBoolean( 'replace_tag_underscores_with_spaces' ) ) + self._replace_tag_emojis_with_boxes.setChecked( new_options.GetBoolean( 'replace_tag_emojis_with_boxes' ) ) self._sibling_connector.setText( new_options.GetString( 'sibling_connector' ) ) self._fade_sibling_connector.setChecked( new_options.GetBoolean( 'fade_sibling_connector' ) ) self._sibling_connector_custom_namespace_colour.SetValue( new_options.GetNoneableString( 'sibling_connector_custom_namespace_colour' ) ) @@ -4287,6 +4291,7 @@ class ManageOptionsPanel( ClientGUIScrolledPanels.ManagePanel ): rows.append( ( 'OR connecting string: ', self._or_connector ) ) rows.append( ( 'Namespace for the colour of the OR connecting string: ', self._or_connector_custom_namespace_colour ) ) rows.append( ( 'EXPERIMENTAL: Replace all underscores with spaces: ', self._replace_tag_underscores_with_spaces ) ) + rows.append( ( 'EXPERIMENTAL: Replace all emojis with □: ', self._replace_tag_emojis_with_boxes ) ) gridbox = ClientGUICommon.WrapInGrid( render_panel, rows ) @@ -4394,6 +4399,7 @@ class ManageOptionsPanel( ClientGUIScrolledPanels.ManagePanel ): self._new_options.SetBoolean( 'show_subtag_number_namespaces', self._show_subtag_number_namespaces.isChecked() ) self._new_options.SetString( 'namespace_connector', self._namespace_connector.text() ) self._new_options.SetBoolean( 'replace_tag_underscores_with_spaces', self._replace_tag_underscores_with_spaces.isChecked() ) + self._new_options.SetBoolean( 'replace_tag_emojis_with_boxes', self._replace_tag_emojis_with_boxes.isChecked() ) self._new_options.SetString( 'sibling_connector', self._sibling_connector.text() ) self._new_options.SetBoolean( 'fade_sibling_connector', self._fade_sibling_connector.isChecked() ) @@ -5199,7 +5205,7 @@ class ManageURLsPanel( CAC.ApplicationCommandProcessorMixin, ClientGUIScrolledPa try: - normalised_url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url ) + normalised_url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url, ephemeral_ok = True ) normalised_urls.append( normalised_url ) diff --git a/hydrus/client/gui/ClientGUITags.py b/hydrus/client/gui/ClientGUITags.py index bd971570..27874909 100644 --- a/hydrus/client/gui/ClientGUITags.py +++ b/hydrus/client/gui/ClientGUITags.py @@ -1976,6 +1976,10 @@ class IncrementalTaggingPanel( ClientGUIScrolledPanels.EditPanel ): initial_suffix = CG.client_controller.new_options.GetString( 'last_incremental_tagging_suffix' ) self._suffix.setText( initial_suffix ) + self._tag_in_reverse = QW.QCheckBox( self ) + tt = 'Tag the last file first and work backwards, e.g. for start=1, step=1 on five files, set 5, 4, 3, 2, 1.' + self._tag_in_reverse.setToolTip( tt ) + initial_start = self._GetInitialStart() self._start = ClientGUICommon.BetterSpinBox( self, initial = initial_start, min = -10000000, max = 10000000 ) @@ -1999,6 +2003,7 @@ class IncrementalTaggingPanel( ClientGUIScrolledPanels.EditPanel ): rows.append( ( 'step: ', self._step ) ) rows.append( ( 'prefix: ', self._prefix ) ) rows.append( ( 'suffix: ', self._suffix ) ) + rows.append( ( 'tag in reverse: ', self._tag_in_reverse ) ) gridbox = ClientGUICommon.WrapInGrid( self, rows ) @@ -2017,6 +2022,7 @@ class IncrementalTaggingPanel( ClientGUIScrolledPanels.EditPanel ): self._suffix.textChanged.connect( self._UpdateSuffix ) self._start.valueChanged.connect( self._UpdateSummary ) self._step.valueChanged.connect( self._UpdateSummary ) + self._tag_in_reverse.clicked.connect( self._UpdateSummary ) self._UpdateSummary() @@ -2052,7 +2058,14 @@ class IncrementalTaggingPanel( ClientGUIScrolledPanels.EditPanel ): result = [] - for ( i, media ) in enumerate( self._medias ): + medias = list( self._medias ) + + if self._tag_in_reverse.isChecked(): + + medias.reverse() + + + for ( i, media ) in enumerate( medias ): number = start + i * step @@ -2063,6 +2076,11 @@ class IncrementalTaggingPanel( ClientGUIScrolledPanels.EditPanel ): result.append( ( media, tag ) ) + if self._tag_in_reverse.isChecked(): + + result.reverse() + + return result @@ -2150,7 +2168,14 @@ class IncrementalTaggingPanel( ClientGUIScrolledPanels.EditPanel ): else: - tag_summary = ', '.join( ( tag for ( media, tag ) in medias_and_tags[:3] ) ) + f' {HC.UNICODE_ELLIPSIS} ' + medias_and_tags[-1][1] + if self._tag_in_reverse.isChecked(): + + tag_summary = medias_and_tags[0][1] + f' {HC.UNICODE_ELLIPSIS} ' + ', '.join( ( tag for ( media, tag ) in medias_and_tags[-3:] ) ) + + else: + + tag_summary = ', '.join( ( tag for ( media, tag ) in medias_and_tags[:3] ) ) + f' {HC.UNICODE_ELLIPSIS} ' + medias_and_tags[-1][1] + # @@ -6382,6 +6407,8 @@ class TagSummaryGenerator( HydrusSerialisable.SerialisableBase ): if namespace in self._interesting_namespaces: + subtag = ClientTags.RenderTag( subtag, render_for_user = True ) + namespaces_to_subtags[ namespace ].append( subtag ) diff --git a/hydrus/client/gui/lists/ClientGUIListConstants.py b/hydrus/client/gui/lists/ClientGUIListConstants.py index f1aa58b9..0011f2c0 100644 --- a/hydrus/client/gui/lists/ClientGUIListConstants.py +++ b/hydrus/client/gui/lists/ClientGUIListConstants.py @@ -835,7 +835,7 @@ class COLUMN_LIST_URL_CLASS_PATH_COMPONENTS( COLUMN_LIST_DEFINITION ): column_list_type_name_lookup[ COLUMN_LIST_URL_CLASS_PATH_COMPONENTS.ID ] = 'url class path components' -register_column_type( COLUMN_LIST_URL_CLASS_PATH_COMPONENTS.ID, COLUMN_LIST_URL_CLASS_PATH_COMPONENTS.KEY, 'key', False, 14, True ) +register_column_type( COLUMN_LIST_URL_CLASS_PATH_COMPONENTS.ID, COLUMN_LIST_URL_CLASS_PATH_COMPONENTS.KEY, 'name', False, 14, True ) register_column_type( COLUMN_LIST_URL_CLASS_PATH_COMPONENTS.ID, COLUMN_LIST_URL_CLASS_PATH_COMPONENTS.VALUE, 'value', False, 45, True ) default_column_list_sort_lookup[ COLUMN_LIST_URL_CLASS_PATH_COMPONENTS.ID ] = ( COLUMN_LIST_URL_CLASS_PATH_COMPONENTS.KEY, True ) diff --git a/hydrus/client/gui/networking/ClientGUINetworkJobControl.py b/hydrus/client/gui/networking/ClientGUINetworkJobControl.py index b0dbde3f..7b5bc4cc 100644 --- a/hydrus/client/gui/networking/ClientGUINetworkJobControl.py +++ b/hydrus/client/gui/networking/ClientGUINetworkJobControl.py @@ -18,6 +18,7 @@ from hydrus.client.gui import QtPorting as QP from hydrus.client.gui.networking import ClientGUINetwork from hydrus.client.gui.widgets import ClientGUICommon from hydrus.client.networking import ClientNetworkingContexts +from hydrus.client.networking import ClientNetworkingFunctions from hydrus.client.networking import ClientNetworkingJobs class NetworkJobControl( QW.QFrame ): @@ -107,7 +108,7 @@ class NetworkJobControl( QW.QFrame ): url = self._network_job.GetURL() - ClientGUIMenus.AppendMenuLabel( menu, url, description = 'copy URL to the clipboard' ) + ClientGUIMenus.AppendMenuLabel( menu, ClientNetworkingFunctions.ConvertURLToHumanString( url ), copy_text = url, description = 'copy URL to the clipboard' ) ClientGUIMenus.AppendSeparator( menu ) diff --git a/hydrus/client/gui/search/ClientGUIPredicatesSingle.py b/hydrus/client/gui/search/ClientGUIPredicatesSingle.py index 5ff14f46..4eee2221 100644 --- a/hydrus/client/gui/search/ClientGUIPredicatesSingle.py +++ b/hydrus/client/gui/search/ClientGUIPredicatesSingle.py @@ -1324,9 +1324,9 @@ class PanelPredicateSystemKnownURLsExactURL( PanelPredicateSystemSingle ): hbox = QP.HBoxLayout() - QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'system:known url'), CC.FLAGS_CENTER_PERPENDICULAR ) + QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'system:'), CC.FLAGS_CENTER_PERPENDICULAR ) QP.AddToLayout( hbox, self._operator, CC.FLAGS_CENTER_PERPENDICULAR ) - QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'exact url:'), CC.FLAGS_CENTER_PERPENDICULAR ) + QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,' url '), CC.FLAGS_CENTER_PERPENDICULAR ) QP.AddToLayout( hbox, self._exact_url, CC.FLAGS_EXPAND_BOTH_WAYS ) self.setLayout( hbox ) @@ -1348,11 +1348,11 @@ class PanelPredicateSystemKnownURLsExactURL( PanelPredicateSystemSingle ): if operator: - operator_description = 'has url: ' + operator_description = 'has url ' else: - operator_description = 'does not have url: ' + operator_description = 'does not have url ' rule_type = 'exact_match' @@ -1396,9 +1396,9 @@ class PanelPredicateSystemKnownURLsDomain( PanelPredicateSystemSingle ): hbox = QP.HBoxLayout() - QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'system:known url'), CC.FLAGS_CENTER_PERPENDICULAR ) + QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'system:'), CC.FLAGS_CENTER_PERPENDICULAR ) QP.AddToLayout( hbox, self._operator, CC.FLAGS_CENTER_PERPENDICULAR ) - QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'a url with domain:'), CC.FLAGS_CENTER_PERPENDICULAR ) + QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,' url with domain '), CC.FLAGS_CENTER_PERPENDICULAR ) QP.AddToLayout( hbox, self._domain, CC.FLAGS_EXPAND_BOTH_WAYS ) self.setLayout( hbox ) @@ -1420,11 +1420,11 @@ class PanelPredicateSystemKnownURLsDomain( PanelPredicateSystemSingle ): if operator: - operator_description = 'has a url with domain: ' + operator_description = 'has url with domain ' else: - operator_description = 'does not have a url with domain: ' + operator_description = 'does not have url with domain ' rule_type = 'domain' @@ -1466,9 +1466,9 @@ class PanelPredicateSystemKnownURLsRegex( PanelPredicateSystemSingle ): hbox = QP.HBoxLayout() - QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'system:known url'), CC.FLAGS_CENTER_PERPENDICULAR ) + QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'system:'), CC.FLAGS_CENTER_PERPENDICULAR ) QP.AddToLayout( hbox, self._operator, CC.FLAGS_CENTER_PERPENDICULAR ) - QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'a url that matches this regex:'), CC.FLAGS_CENTER_PERPENDICULAR ) + QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,' url that matches regex '), CC.FLAGS_CENTER_PERPENDICULAR ) QP.AddToLayout( hbox, self._regex, CC.FLAGS_EXPAND_BOTH_WAYS ) self.setLayout( hbox ) @@ -1504,11 +1504,11 @@ class PanelPredicateSystemKnownURLsRegex( PanelPredicateSystemSingle ): if operator: - operator_description = 'has a url matching regex: ' + operator_description = 'has url matching regex ' else: - operator_description = 'does not have a url matching regex: ' + operator_description = 'does not have url matching regex ' rule_type = 'regex' @@ -1558,9 +1558,9 @@ class PanelPredicateSystemKnownURLsURLClass( PanelPredicateSystemSingle ): hbox = QP.HBoxLayout() - QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'system:known url'), CC.FLAGS_CENTER_PERPENDICULAR ) + QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'system:'), CC.FLAGS_CENTER_PERPENDICULAR ) QP.AddToLayout( hbox, self._operator, CC.FLAGS_CENTER_PERPENDICULAR ) - QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,'url matching this class:'), CC.FLAGS_CENTER_PERPENDICULAR ) + QP.AddToLayout( hbox, ClientGUICommon.BetterStaticText(self,' url matching class '), CC.FLAGS_CENTER_PERPENDICULAR ) QP.AddToLayout( hbox, self._url_classes, CC.FLAGS_EXPAND_BOTH_WAYS ) self.setLayout( hbox ) @@ -1580,22 +1580,15 @@ class PanelPredicateSystemKnownURLsURLClass( PanelPredicateSystemSingle ): operator = self._operator.GetValue() - if operator: - - operator_description = 'has ' - - else: - - operator_description = 'does not have ' - - rule_type = 'url_class' url_class = self._url_classes.GetValue() rule = url_class - description = operator_description + url_class.GetName() + ' url' + url_class_name = url_class.GetName() + + description = f'has url with class {url_class_name}' if operator else f'does not have url with class {url_class_name}' predicates = ( ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( operator, rule_type, rule, description ) ), ) diff --git a/hydrus/client/gui/widgets/ClientGUICommon.py b/hydrus/client/gui/widgets/ClientGUICommon.py index d8e7248a..94d8ca6e 100644 --- a/hydrus/client/gui/widgets/ClientGUICommon.py +++ b/hydrus/client/gui/widgets/ClientGUICommon.py @@ -22,6 +22,7 @@ from hydrus.client.gui import ClientGUIMenus from hydrus.client.gui import ClientGUIShortcuts from hydrus.client.gui import QtPorting as QP from hydrus.client.gui.widgets import ClientGUIColourPicker +from hydrus.client.networking import ClientNetworkingFunctions def AddGridboxStretchSpacer( win: QW.QWidget, layout: QW.QGridLayout ): @@ -30,7 +31,7 @@ def AddGridboxStretchSpacer( win: QW.QWidget, layout: QW.QGridLayout ): QP.AddToLayout( layout, widget, CC.FLAGS_CENTER_PERPENDICULAR_EXPAND_DEPTH ) -def WrapInGrid( parent, rows, expand_text = False, add_stretch_at_end = True ): +def WrapInGrid( parent, rows, expand_text = False, add_stretch_at_end = True, expand_single_widgets = False ): gridbox = QP.GridLayout( cols = 2 ) @@ -118,10 +119,23 @@ def WrapInGrid( parent, rows, expand_text = False, add_stretch_at_end = True ): gridbox.next_col = 0 h_policy = QW.QSizePolicy.Expanding - v_policy = QW.QSizePolicy.Fixed + + if expand_single_widgets: + + v_policy = QW.QSizePolicy.Expanding + + else: + + v_policy = QW.QSizePolicy.Fixed + control.setSizePolicy( h_policy, v_policy ) + if expand_single_widgets: + + gridbox.setRowStretch( gridbox.rowCount() - 1, 1 ) + + if add_stretch_at_end: @@ -131,6 +145,7 @@ def WrapInGrid( parent, rows, expand_text = False, add_stretch_at_end = True ): return gridbox + def WrapInText( control, parent, text, object_name = None ): hbox = QP.HBoxLayout() @@ -699,7 +714,7 @@ class BetterHyperLink( BetterStaticText ): self._url = url - self.setToolTip( self._url ) + self.setToolTip( ClientNetworkingFunctions.ConvertURLToHumanString( self._url ) ) self.setTextFormat( QC.Qt.RichText ) self.setTextInteractionFlags( QC.Qt.LinksAccessibleByMouse | QC.Qt.LinksAccessibleByKeyboard ) diff --git a/hydrus/client/importing/ClientImportFileSeeds.py b/hydrus/client/importing/ClientImportFileSeeds.py index 6ac60445..6f274b5e 100644 --- a/hydrus/client/importing/ClientImportFileSeeds.py +++ b/hydrus/client/importing/ClientImportFileSeeds.py @@ -131,6 +131,7 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): self.file_seed_type = file_seed_type self.file_seed_data = file_seed_data + self.file_seed_data_for_comparison = file_seed_data self.created = HydrusTime.GetNow() self.modified = self.created @@ -165,7 +166,7 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): def __hash__( self ): - return ( self.file_seed_type, self.file_seed_data ).__hash__() + return ( self.file_seed_type, self.file_seed_data_for_comparison ).__hash__() def __ne__( self, other ): @@ -185,6 +186,7 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): if self.file_seed_type == FILE_SEED_TYPE_URL: urls.discard( self.file_seed_data ) + urls.discard( self.file_seed_data_for_comparison ) if self._referral_url is not None: @@ -210,6 +212,7 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): if self.file_seed_type == FILE_SEED_TYPE_URL: all_primary_urls.add( self.file_seed_data ) + all_primary_urls.add( self.file_seed_data_for_comparison ) if self._referral_url is not None: @@ -884,7 +887,7 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): if self.file_seed_type == FILE_SEED_TYPE_URL: - urls.append( self.file_seed_data ) + urls.append( self.file_seed_data_for_comparison ) if file_url is not None: @@ -953,7 +956,7 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): if self.file_seed_type == FILE_SEED_TYPE_URL: - search_urls = ClientNetworkingFunctions.GetSearchURLs( self.file_seed_data ) + search_urls = ClientNetworkingFunctions.GetSearchURLs( self.file_seed_data_for_comparison ) search_file_seeds = [ FileSeed( FILE_SEED_TYPE_URL, search_url ) for search_url in search_urls ] @@ -1137,7 +1140,8 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): try: - self.file_seed_data = CG.client_controller.network_engine.domain_manager.NormaliseURL( self.file_seed_data ) + self.file_seed_data = CG.client_controller.network_engine.domain_manager.NormaliseURL( self.file_seed_data, ephemeral_ok = True ) + self.file_seed_data_for_comparison = CG.client_controller.network_engine.domain_manager.NormaliseURL( self.file_seed_data ) except HydrusExceptions.URLClassException: @@ -1707,7 +1711,7 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): if self.file_seed_type == FILE_SEED_TYPE_URL: - potentially_associable_urls.add( self.file_seed_data ) + potentially_associable_urls.add( self.file_seed_data_for_comparison ) domain = ClientNetworkingFunctions.ConvertURLIntoDomain( self.file_seed_data ) diff --git a/hydrus/client/importing/ClientImportGallerySeeds.py b/hydrus/client/importing/ClientImportGallerySeeds.py index 284769ae..283365ed 100644 --- a/hydrus/client/importing/ClientImportGallerySeeds.py +++ b/hydrus/client/importing/ClientImportGallerySeeds.py @@ -118,7 +118,7 @@ class GallerySeed( HydrusSerialisable.SerialisableBase ): try: - url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url ) + url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url, ephemeral_ok = True ) except HydrusExceptions.URLClassException: diff --git a/hydrus/client/importing/ClientImportWatchers.py b/hydrus/client/importing/ClientImportWatchers.py index 042ffc4a..4c17603a 100644 --- a/hydrus/client/importing/ClientImportWatchers.py +++ b/hydrus/client/importing/ClientImportWatchers.py @@ -243,7 +243,7 @@ class MultipleWatcherImport( HydrusSerialisable.SerialisableBase ): return None - url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url ) + url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url, ephemeral_ok = True ) with self._lock: @@ -1762,7 +1762,7 @@ class WatcherImport( HydrusSerialisable.SerialisableBase ): try: - url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url ) + url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url, ephemeral_ok = True ) except HydrusExceptions.URLClassException: diff --git a/hydrus/client/metadata/ClientTags.py b/hydrus/client/metadata/ClientTags.py index c614a4c5..c50eaf55 100644 --- a/hydrus/client/metadata/ClientTags.py +++ b/hydrus/client/metadata/ClientTags.py @@ -1,4 +1,5 @@ import collections +import re import typing from hydrus.core import HydrusSerialisable @@ -20,6 +21,22 @@ tag_display_str_lookup = { TAG_DISPLAY_DISPLAY_IDEAL : 'ideal display tags' } +emoji_pattern = re.compile("[" + u"\U0001F600-\U0001F64F" # emoticons + u"\U0001F300-\U0001F5FF" # symbols & pictographs + u"\U0001F680-\U0001F6FF" # transport & map symbols + u"\U0001F700-\U0001F77F" # alchemical symbols + u"\U0001F780-\U0001F7FF" # Geometric Shapes Extended + u"\U0001F800-\U0001F8FF" # Supplemental Arrows-C + u"\U0001F900-\U0001F9FF" # Supplemental Symbols and Pictographs + u"\U0001FA00-\U0001FA6F" # Chess Symbols + u"\U0001FA70-\U0001FAFF" # Symbols and Pictographs Extended-A + u"\U00002600-\U000026FF" # Miscellaneous Symbols + u"\U00002702-\U000027B0" # Dingbats + u"\U00003000-\U0000303F" # CJK Symbols and Punctuation + "]+(?:\U0000FE0F)?", # make the preding character a colourful emoji, decode this for an example: b'\xe2\x9b\x93\xef\xb8\x8f' +flags=re.UNICODE) + have_shown_invalid_tag_warning = False def RenderNamespaceForUser( namespace ): @@ -50,7 +67,7 @@ def RenderTag( tag, render_for_user: bool ): if namespace == '': - return subtag + result = subtag else: @@ -73,9 +90,19 @@ def RenderTag( tag, render_for_user: bool ): connector = ':' - return namespace + connector + subtag + result = namespace + connector + subtag + if render_for_user: + + if new_options.GetBoolean( 'replace_tag_emojis_with_boxes' ): + + result = emoji_pattern.sub( '□', result ) + + + + return result + class ServiceKeysToTags( HydrusSerialisable.SerialisableBase, collections.defaultdict ): diff --git a/hydrus/client/networking/ClientNetworkingDomain.py b/hydrus/client/networking/ClientNetworkingDomain.py index 7a9fe004..9545bcea 100644 --- a/hydrus/client/networking/ClientNetworkingDomain.py +++ b/hydrus/client/networking/ClientNetworkingDomain.py @@ -266,7 +266,7 @@ class NetworkDomainManager( HydrusSerialisable.SerialisableBase ): seen_url_classes.add( api_url_class ) - api_url = api_url_class.Normalise( api_url ) + api_url = api_url_class.Normalise( api_url, ephemeral_ok = True ) return ( api_url_class, api_url ) @@ -1336,13 +1336,15 @@ class NetworkDomainManager( HydrusSerialisable.SerialisableBase ): - def GetURLClassFromName( self, name ): + def GetURLClassFromName( self, name: str ): with self._lock: + name_search = name.casefold() + for url_class in self._url_classes: - if url_class.GetName() == name: + if url_class.GetName().casefold() == name_search: return url_class @@ -1508,7 +1510,7 @@ class NetworkDomainManager( HydrusSerialisable.SerialisableBase ): return True - def NormaliseURL( self, url ): + def NormaliseURL( self, url, ephemeral_ok = False ): with self._lock: @@ -1523,6 +1525,8 @@ class NetworkDomainManager( HydrusSerialisable.SerialisableBase ): path = p.path params = p.params + # this puts them all in alphabetical order + ( query_dict, single_value_parameters, param_order ) = ClientNetworkingFunctions.ConvertQueryTextToDict( p.query ) query = ClientNetworkingFunctions.ConvertQueryDictToText( query_dict, single_value_parameters ) @@ -1535,14 +1539,14 @@ class NetworkDomainManager( HydrusSerialisable.SerialisableBase ): else: - normalised_url = url_class.Normalise( url ) + normalised_url = url_class.Normalise( url, ephemeral_ok = ephemeral_ok ) return normalised_url - def NormaliseURLs( self, urls: typing.Collection[ str ] ) -> typing.List[ str ]: + def NormaliseURLs( self, urls: typing.Collection[ str ], ephemeral_ok = False ) -> typing.List[ str ]: normalised_urls = [] @@ -1550,14 +1554,14 @@ class NetworkDomainManager( HydrusSerialisable.SerialisableBase ): try: - normalised_url = self.NormaliseURL( url ) + normalised_url = self.NormaliseURL( url, ephemeral_ok = ephemeral_ok ) except HydrusExceptions.URLClassException: continue - normalised_urls.append( url ) + normalised_urls.append( normalised_url ) normalised_urls = HydrusData.DedupeList( normalised_urls ) diff --git a/hydrus/client/networking/ClientNetworkingFunctions.py b/hydrus/client/networking/ClientNetworkingFunctions.py index 5440b75d..e056ec69 100644 --- a/hydrus/client/networking/ClientNetworkingFunctions.py +++ b/hydrus/client/networking/ClientNetworkingFunctions.py @@ -101,6 +101,7 @@ def ConvertHTTPToHTTPS( url ): raise Exception( 'Given a url that did not have a scheme!' ) + def ConvertQueryDictToText( query_dict, single_value_parameters, param_order = None ): # we now do everything with requests, which does all the unicode -> %20 business naturally, phew @@ -142,7 +143,7 @@ def ConvertQueryDictToText( query_dict, single_value_parameters, param_order = N if key in query_dict: - params.append( '{}={}'.format( key, query_dict[ key ] ) ) + params.append( f'{key}={query_dict[ key ]}' ) @@ -153,16 +154,11 @@ def ConvertQueryDictToText( query_dict, single_value_parameters, param_order = N def ConvertQueryTextToDict( query_text ): - # we generally do not want quote characters, %20 stuff, in our urls. we would prefer properly formatted unicode + # in the old version of this func, we played silly games with character encoding. I made the foolish decision to try to handle/save URLs with %20 stuff decoded + # this lead to complexity with odd situations like '6+girls+skirt', which would come here encoded as '6%2Bgirls+skirt' + # I flipped back and forth and tried to preserve the encoding if it did stepped on x or did not change y, what a mess! - # so, let's replace all keys and values with unquoted versions - # -but- - # we only replace if it is a completely reversable operation! - # odd situations like '6+girls+skirt', which comes here encoded as '6%2Bgirls+skirt', shouldn't turn into '6+girls+skirt' - # so if there are a mix of encoded and non-encoded, we won't touch it here m8 - - # except these chars, which screw with GET arg syntax when unquoted - bad_chars = [ '&', '=', '/', '?', '#', ';', '+', ',' ] + # I no longer do this. I will encode if there is no '%' in there already, which catches cases of humans pasting/typing an URL with something human, but only if it is non-destructive param_order = [] @@ -186,23 +182,9 @@ def ConvertQueryTextToDict( query_text ): continue - try: + if '%' not in value: - unquoted_value = urllib.parse.unquote( value ) - - if True not in ( bad_char in unquoted_value for bad_char in bad_chars ): - - requoted_value = urllib.parse.quote( unquoted_value ) - - if requoted_value == value: - - value = unquoted_value - - - - except: - - pass + value = urllib.parse.quote( value, safe = '' ) single_value_parameters.append( value ) @@ -212,42 +194,14 @@ def ConvertQueryTextToDict( query_text ): ( key, value ) = result - try: + if '%' not in key: - unquoted_key = urllib.parse.unquote( key ) - - if True not in ( bad_char in unquoted_key for bad_char in bad_chars ): - - requoted_key = urllib.parse.quote( unquoted_key ) - - if requoted_key == key: - - key = unquoted_key - - - - except: - - pass + key = urllib.parse.quote( key, safe = '' ) - try: + if '%' not in value: - unquoted_value = urllib.parse.unquote( value ) - - if True not in ( bad_char in unquoted_value for bad_char in bad_chars ): - - requoted_value = urllib.parse.quote( unquoted_value ) - - if requoted_value == value: - - value = unquoted_value - - - - except: - - pass + value = urllib.parse.quote( value, safe = '' ) param_order.append( key ) @@ -258,6 +212,7 @@ def ConvertQueryTextToDict( query_text ): return ( query_dict, single_value_parameters, param_order ) + def ConvertURLIntoDomain( url ): parser_result = ParseURL( url ) @@ -282,6 +237,18 @@ def ConvertURLIntoSecondLevelDomain( url ): return ConvertDomainIntoSecondLevelDomain( domain ) + +def ConvertURLToHumanString( url: str ) -> str: + + # ok so the idea here is that we want to store 'ugly' urls behind the scenes, with quoted %20 gubbins, but any time we present to the user, we want to convert all that to real (URL-invalid) characters + # although there are some caveats, we can pretty much just do a dequote on the whole string and it'll be fine most of the time mate + # if we have a unicode domain, we'll need to figure out 'punycode' decoding, but w/e for now + + pretty_url = urllib.parse.unquote( url ) + + return pretty_url + + def CookieDomainMatches( cookie, search_domain ): cookie_domain = cookie.domain @@ -324,6 +291,10 @@ def GetSearchURLs( url ): try: + ephemeral_normalised_url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url, ephemeral_ok = True ) + + search_urls.add( ephemeral_normalised_url ) + normalised_url = CG.client_controller.network_engine.domain_manager.NormaliseURL( url ) search_urls.add( normalised_url ) diff --git a/hydrus/client/networking/ClientNetworkingGUG.py b/hydrus/client/networking/ClientNetworkingGUG.py index d0b72cd1..8d7febf6 100644 --- a/hydrus/client/networking/ClientNetworkingGUG.py +++ b/hydrus/client/networking/ClientNetworkingGUG.py @@ -108,46 +108,60 @@ class GalleryURLGenerator( HydrusSerialisable.SerialisableBaseNamed ): raise HydrusExceptions.GUGException( 'Replacement phrase not in URL template!' ) - ( first_part, second_part ) = self._url_template.split( self._replacement_phrase, 1 ) - - search_phrase_seems_to_go_in_path = '?' not in first_part - - search_terms = query_text.split( ' ' ) - - # if a user enters "%20" in a query, or any other percent-encoded char, we turn it into human here, lest it be re-quoted in a moment - # if a user enters "%25", i.e. "%", followed by some characters, then all bets are off - search_terms = [ urllib.parse.unquote( search_term ) for search_term in search_terms ] - - if search_phrase_seems_to_go_in_path: + if '%' in query_text: - # encode all this gubbins since requests won't be able to do it - # this basically fixes e621 searches for 'male/female', which through some httpconf trickery are embedded in path but end up in a query, so need to be encoded right beforehand + # redundant test but leave it in for now + if ' ' in query_text or '% ' in query_text or query_text.endswith( '%' ): + + # there is probably a legit % character here that should be encoded + + search_terms = query_text.split( ' ' ) + + we_think_query_text_is_pre_encoded = False + + elif '%20' in query_text: + + # we are generally confident the user pasted a multi-tag query they copied from a notepad or something + + search_terms = query_text.split( '%20' ) + + # any % character entered here should be encoded as '%25' + we_think_query_text_is_pre_encoded = True + + else: + + # we simply do not know in this case. this is a single tag with a % not at the end, but it could be male%2Ffemale or it could be "120%120%hello", the hit new anime series + # assuming it is the former more often than the latter, we will not intrude on what the user sent here and cross our fingers + + search_terms = [ query_text ] + + we_think_query_text_is_pre_encoded = True + + + else: + + search_terms = query_text.split( ' ' ) + + # normal, not pre-encoded text + we_think_query_text_is_pre_encoded = False + + + if not we_think_query_text_is_pre_encoded: encoded_search_terms = [ urllib.parse.quote( search_term, safe = '' ) for search_term in search_terms ] else: - encoded_search_terms = [] - - for search_term in search_terms: - - # when the tags separator is '+' but the tags include '6+girls', we run into fun internet land - - bad_chars = [ self._search_terms_separator, '&', '=', '/', '?', '#', ';' ] - - if True in ( bad_char in search_term for bad_char in bad_chars ): - - search_term = urllib.parse.quote( search_term, safe = '' ) - - - encoded_search_terms.append( search_term ) - + encoded_search_terms = search_terms try: search_phrase = self._search_terms_separator.join( encoded_search_terms ) + # we do not encode the whole thing here since we may want to keep tag-connector-+ for the '6+girls+skirt' = '6%2Bgirls+skirt' scenario + # some characters are optional or something when it comes to encoding. '+' is one of these + gallery_url = self._url_template.replace( self._replacement_phrase, search_phrase ) except Exception as e: diff --git a/hydrus/client/networking/ClientNetworkingURLClass.py b/hydrus/client/networking/ClientNetworkingURLClass.py index 988d66f4..57dfcb84 100644 --- a/hydrus/client/networking/ClientNetworkingURLClass.py +++ b/hydrus/client/networking/ClientNetworkingURLClass.py @@ -69,11 +69,101 @@ def SortURLClassesListDescendingComplexity( url_classes: typing.List[ "URLClass" # ( num_path_components, num_required_parameters, num_total_parameters, len_example_url ) url_classes.sort( key = lambda u_c: u_c.GetSortingComplexityKey(), reverse = True ) + +class URLClassParameterFixedName( HydrusSerialisable.SerialisableBase ): + + SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_URL_CLASS_PARAMETER_FIXED_NAME + SERIALISABLE_NAME = 'URL Class Parameter - Fixed Name' + SERIALISABLE_VERSION = 1 + + def __init__( self, name = None, value_string_match = None, default_value = None ): + + if name is None: + + name = 'name' + + + if value_string_match is None: + + value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'value', example_string = 'value' ) + + + HydrusSerialisable.SerialisableBase.__init__( self ) + + self._name = name + self._value_string_match = value_string_match + self._default_value = default_value + + + def __repr__( self ): + + text = f'URL Class Parameter - Fixed Name: {self._name}: {self._value_string_match.ToString()}' + + return text + + + def _GetSerialisableInfo( self ): + + serialisable_value_string_match = self._value_string_match.GetSerialisableTuple() + + return ( self._name, serialisable_value_string_match, self._default_value ) + + + def _InitialiseFromSerialisableInfo( self, serialisable_info ): + + ( self._name, serialisable_value_string_match, self._default_value ) = serialisable_info + + self._value_string_match = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_value_string_match ) + + + def GetDefaultValue( self ): + + return self._default_value + + + def GetName( self ): + + return self._name + + + def GetValueStringMatch( self ): + + return self._value_string_match + + + def IsEphemeralToken( self ): + + return False + + + def MustBeInOriginalURL( self ): + + return self._default_value is None + + + def MatchesName( self, name ): + + return self._name == name + + + def MatchesValue( self, value ): + + return self._value_string_match.Matches( value ) + + + def TestValue( self, value ): + + self._value_string_match.Test( value ) + + + +HydrusSerialisable.SERIALISABLE_TYPES_TO_OBJECT_TYPES[ HydrusSerialisable.SERIALISABLE_TYPE_URL_CLASS_PARAMETER_FIXED_NAME ] = URLClassParameterFixedName + class URLClass( HydrusSerialisable.SerialisableBaseNamed ): SERIALISABLE_TYPE = HydrusSerialisable.SERIALISABLE_TYPE_URL_CLASS SERIALISABLE_NAME = 'URL Class' - SERIALISABLE_VERSION = 12 + SERIALISABLE_VERSION = 13 def __init__( self, @@ -116,10 +206,19 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): if parameters is None: - parameters = {} + parameters = [] - parameters[ 's' ] = ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ), None ) - parameters[ 'id' ] = ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '123456' ), None ) + p = URLClassParameterFixedName( + name = 's', + value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ) + ) + + parameters.append( p ) + + p = URLClassParameterFixedName( + name = 'id', + value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '123456' ) + ) if single_value_parameters_string_match is None: @@ -145,7 +244,7 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): # if the args are not serialisable stuff, lets overwrite here path_components = HydrusSerialisable.SerialisableList( path_components ) - parameters = HydrusSerialisable.SerialisableDictionary( parameters ) + parameters = HydrusSerialisable.SerialisableList( parameters ) HydrusSerialisable.SerialisableBaseNamed.__init__( self, name ) @@ -248,31 +347,98 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): return path - def _ClipAndFleshOutQuery( self, query, allow_clip = True ): + def _ClipAndFleshOutQuery( self, query: str, ephemeral_ok: bool, allow_clip: bool = True ): ( query_dict, single_value_parameters, param_order ) = ClientNetworkingFunctions.ConvertQueryTextToDict( query ) - if allow_clip: - - query_dict = { key : value for ( key, value ) in query_dict.items() if key in self._parameters } - + query_dict_keys_to_parameters = {} - for ( key, ( string_match, default ) ) in self._parameters.items(): + remaining_query_dict_names = set( query_dict.keys() ) + + # if we were feeling clever, we could sort these guys from most specific name to least, but w/e + for parameter in self._parameters: - if key not in query_dict: + match_found = False + + for name in remaining_query_dict_names: - if default is None: + if parameter.MatchesName( name ): - raise HydrusExceptions.URLClassException( 'Could not flesh out query--no default for ' + key + ' defined!' ) + query_dict_keys_to_parameters[ name ] = parameter + + remaining_query_dict_names.discard( name ) + + match_found = True + + break + + + + if not match_found: + + default_value = parameter.GetDefaultValue() + + if default_value is None: + + if not parameter.IsEphemeralToken(): + + raise HydrusExceptions.URLClassException( f'Could not flesh out query--no default for {name} defined!' ) + else: - query_dict[ key ] = default + if isinstance( parameter, URLClassParameterFixedName ): + + name = parameter.GetName() + + query_dict_keys_to_parameters[ name ] = parameter + + query_dict[ name ] = default_value + + param_order.append( name ) + + else: + + raise HydrusExceptions.URLClassException( f'Could not flesh out query--cannot figure out a fixed name for {parameter}!' ) + - param_order.append( key ) + + + + + for name in remaining_query_dict_names: + + query_dict_keys_to_parameters[ name ] = None + + + # ok, we now have our fully fleshed out query_dict. let's filter it + + filtered_query_dict = {} + + for ( name, possible_parameter ) in query_dict_keys_to_parameters.items(): + + if possible_parameter is None: + + if allow_clip: + + # no matching param, discard it + continue + + + else: + + if possible_parameter.IsEphemeralToken() and not ephemeral_ok: + + continue + filtered_query_dict[ name ] = query_dict[ name ] + + + query_dict = filtered_query_dict + + # if self._alphabetise_get_parameters: @@ -293,7 +459,7 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): serialisable_url_class_key = self._url_class_key.hex() serialisable_path_components = [ ( string_match.GetSerialisableTuple(), default ) for ( string_match, default ) in self._path_components ] - serialisable_parameters = [ ( key, ( string_match.GetSerialisableTuple(), default ) ) for ( key, ( string_match, default ) ) in self._parameters.items() ] + serialisable_parameters = self._parameters.GetSerialisableTuple() serialisable_single_value_parameters_string_match = self._single_value_parameters_string_match.GetSerialisableTuple() serialisable_header_overrides = list( self._header_overrides.items() ) serialisable_api_lookup_converter = self._api_lookup_converter.GetSerialisableTuple() @@ -348,7 +514,7 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): self._url_class_key = bytes.fromhex( serialisable_url_class_key ) self._path_components = [ ( HydrusSerialisable.CreateFromSerialisableTuple( serialisable_string_match ), default ) for ( serialisable_string_match, default ) in serialisable_path_components ] - self._parameters = { key : ( HydrusSerialisable.CreateFromSerialisableTuple( serialisable_string_match ), default ) for ( key, ( serialisable_string_match, default ) ) in serialisable_parameters } + self._parameters = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_parameters ) self._single_value_parameters_string_match = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_single_value_parameters_string_match ) self._header_overrides = dict( serialisable_header_overrides ) self._api_lookup_converter = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_api_lookup_converter ) @@ -569,6 +735,68 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): return ( 12, new_serialisable_info ) + if version == 12: + + ( + serialisable_url_class_key, + url_type, + preferred_scheme, + netloc, + booleans, + serialisable_path_components, + serialisable_parameters, + has_single_value_parameters, + serialisable_single_value_parameters_match, + serialisable_header_overrides, + serialisable_api_lookup_converter, + send_referral_url, + serialisable_referrel_url_converter, + gallery_index_type, + gallery_index_identifier, + gallery_index_delta, + example_url + ) = old_serialisable_info + + new_parameters = HydrusSerialisable.SerialisableList() + + for ( name, ( serialisable_value_string_match, default_value ) ) in serialisable_parameters: + + value_string_match = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_value_string_match ) + + parameter = URLClassParameterFixedName( + name = name, + value_string_match = value_string_match, + default_value = default_value + ) + + new_parameters.append( parameter ) + + + serialisable_parameters = new_parameters.GetSerialisableTuple() + + new_serialisable_info = ( + serialisable_url_class_key, + url_type, + preferred_scheme, + netloc, + booleans, + serialisable_path_components, + serialisable_parameters, + has_single_value_parameters, + serialisable_single_value_parameters_match, + serialisable_header_overrides, + serialisable_api_lookup_converter, + send_referral_url, + serialisable_referrel_url_converter, + gallery_index_type, + gallery_index_identifier, + gallery_index_delta, + example_url + ) + + return ( 13, new_serialisable_info ) + + def AlphabetiseGetParameters( self ): @@ -602,6 +830,11 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): return self._should_be_associated_with_files or self.UsesAPIURL() + def GetAPILookupConverter( self ): + + return self._api_lookup_converter + + def GetAPIURL( self, url = None ): if url is None: @@ -609,7 +842,7 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): url = self._example_url - url = self.Normalise( url ) + url = self.Normalise( url, ephemeral_ok = True ) return self._api_lookup_converter.Convert( url ) @@ -639,9 +872,14 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): return self._header_overrides + def GetNetloc( self ): + + return self._netloc + + def GetNextGalleryPage( self, url ): - url = self.Normalise( url ) + url = self.Normalise( url, ephemeral_ok = True ) p = ClientNetworkingFunctions.ParseURL( url ) @@ -731,6 +969,21 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): return r.geturl() + def GetParameters( self ) -> typing.List[ URLClassParameterFixedName ]: + + return self._parameters + + + def GetPathComponents( self ): + + return self._path_components + + + def GetPreferredScheme( self ): + + return self._preferred_scheme + + def GetReferralURL( self, url, referral_url ): if self._send_referral_url == SEND_REFERRAL_URL_ONLY_IF_PROVIDED: @@ -768,6 +1021,11 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): return referral_url + def GetReferralURLInfo( self ): + + return ( self._send_referral_url, self._referral_url_converter ) + + def GetSafeSummary( self ): return 'URL Class "' + self._name + '" - ' + ClientNetworkingFunctions.ConvertURLIntoDomain( self.GetExampleURL() ) @@ -789,9 +1047,9 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): num_required_path_components = len( [ 1 for ( string_match, default ) in self._path_components if default is None ] ) num_total_path_components = len( self._path_components ) - num_required_parameters = len( [ 1 for ( key, ( string_match, default ) ) in self._parameters.items() if default is None ] ) + num_required_parameters = len( [ 1 for parameter in self._parameters if parameter.GetDefaultValue() is None ] ) num_total_parameters = len( self._parameters ) - len_example_url = len( self.Normalise( self._example_url ) ) + len_example_url = len( self.Normalise( self._example_url, ephemeral_ok = True ) ) return ( num_required_path_components, num_total_path_components, num_required_parameters, num_total_parameters, len_example_url ) @@ -845,7 +1103,7 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): return self._match_subdomains - def Normalise( self, url ): + def Normalise( self, url, ephemeral_ok = False ): p = ClientNetworkingFunctions.ParseURL( url ) @@ -865,13 +1123,13 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): netloc = self._ClipNetLoc( p.netloc ) path = self._ClipAndFleshOutPath( p.path ) - query = self._ClipAndFleshOutQuery( p.query ) + query = self._ClipAndFleshOutQuery( p.query, ephemeral_ok ) else: netloc = p.netloc path = self._ClipAndFleshOutPath( p.path, allow_clip = False ) - query = self._ClipAndFleshOutQuery( p.query, allow_clip = False ) + query = self._ClipAndFleshOutQuery( p.query, ephemeral_ok, allow_clip = False ) r = urllib.parse.ParseResult( scheme, netloc, path, params, query, fragment ) @@ -985,9 +1243,12 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): url_path_components = url_path.split( '/' ) - if len( url_path_components ) > len( self._path_components ) and self._no_more_path_components_than_this: + if self._no_more_path_components_than_this: - raise HydrusExceptions.URLClassException( '"{}" has {} path components, but I will not allow more than my defined {}!'.format( url_path, len( url_path_components ), len( self._path_components ) ) ) + if len( url_path_components ) > len( self._path_components ): + + raise HydrusExceptions.URLClassException( '"{}" has {} path components, but I will not allow more than my defined {}!'.format( url_path, len( url_path_components ), len( self._path_components ) ) ) + for ( index, ( string_match, default ) ) in enumerate( self._path_components ): @@ -1020,42 +1281,55 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): - ( url_parameters, single_value_parameters, param_order ) = ClientNetworkingFunctions.ConvertQueryTextToDict( p.query ) + ( url_query_dict, single_value_parameters, param_order ) = ClientNetworkingFunctions.ConvertQueryTextToDict( p.query ) - if len( url_parameters ) > len( self._parameters ) and self._no_more_parameters_than_this: + if self._no_more_parameters_than_this: - raise HydrusExceptions.URLClassException( '"{}" has {} parameters, but I will not allow more than my defined {}!'.format( url_path, len( url_parameters ), len( self._parameters ) ) ) + good_fixed_names = { parameter.GetName() for parameter in self._parameters if isinstance( parameter, URLClassParameterFixedName ) } + + for ( name, value ) in url_query_dict.items(): + + if name not in good_fixed_names: + + raise HydrusExceptions.URLClassException( f'"This has a "{name}" parameter, but I am set to not allow any unexpected parameters!' ) + + - for ( key, ( string_match, default ) ) in self._parameters.items(): + for parameter in self._parameters: - if key not in url_parameters: + if isinstance( parameter, URLClassParameterFixedName ): - if default is None: + name = parameter.GetName() + + if name not in url_query_dict: - raise HydrusExceptions.URLClassException( key + ' not found in ' + p.query ) - - else: - - continue + if parameter.MustBeInOriginalURL(): + + raise HydrusExceptions.URLClassException( f'{name} not found in {p.query}' ) + + else: + + continue + - - value = url_parameters[ key ] - - try: + value = url_query_dict[ name ] - string_match.Test( value ) - - except HydrusExceptions.StringMatchException as e: - - raise HydrusExceptions.URLClassException( str( e ) ) + try: + + parameter.TestValue( value ) + + except HydrusExceptions.StringMatchException as e: + + raise HydrusExceptions.URLClassException( f'Problem with {name}: ' + str( e ) ) + if len( single_value_parameters ) > 0 and not self._has_single_value_parameters and self._no_more_parameters_than_this: - raise HydrusExceptions.URLClassException( '"{}" has unexpected single-value parameters, but I am set not to allow any unexpected parameters!'.format( url_path ) ) + raise HydrusExceptions.URLClassException( '"{}" has unexpected single-value parameters, but I am set to not allow any unexpected parameters!'.format( url_path ) ) if self._has_single_value_parameters: @@ -1079,11 +1353,6 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): - def ToTuple( self ): - - return ( self._url_type, self._preferred_scheme, self._netloc, self._path_components, self._parameters, self._api_lookup_converter, self._send_referral_url, self._referral_url_converter, self._example_url ) - - def UsesAPIURL( self ): return self._api_lookup_converter.MakesChanges() diff --git a/hydrus/client/search/ClientSearchParseSystemPredicates.py b/hydrus/client/search/ClientSearchParseSystemPredicates.py index 936334f8..a4ae5931 100644 --- a/hydrus/client/search/ClientSearchParseSystemPredicates.py +++ b/hydrus/client/search/ClientSearchParseSystemPredicates.py @@ -186,7 +186,7 @@ def strip_quotes( s: str ) -> str: def url_class_pred_generator( include, url_class_name ): - description = ( 'has {} url' if include else 'does not have {} url' ).format( url_class_name ) + description = f'has url with class {url_class_name}' if include else f'does not have url with class {url_class_name}' try: @@ -251,12 +251,12 @@ pred_generators = { SystemPredicateParser.Predicate.MEDIA_VIEWTIME : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_FILE_VIEWING_STATS, ( 'viewtime', ( 'media', ), o, convert_timetuple_to_seconds( v ) ) ), SystemPredicateParser.Predicate.PREVIEW_VIEWTIME : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_FILE_VIEWING_STATS, ( 'viewtime', ( 'preview', ), o, convert_timetuple_to_seconds( v ) ) ), SystemPredicateParser.Predicate.ALL_VIEWTIME : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_FILE_VIEWING_STATS, ( 'viewtime', ( 'media', 'preview' ), o, convert_timetuple_to_seconds( v ) ) ), - SystemPredicateParser.Predicate.URL_REGEX : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( True, 'regex', v, 'has a url matching regex: {}'.format( v ) ) ), - SystemPredicateParser.Predicate.NO_URL_REGEX : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( False, 'regex', v, 'does not have a url matching regex: {}'.format( v ) ) ), - SystemPredicateParser.Predicate.URL : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( True, 'exact_match', v, 'has url: {}'.format( v ) ) ), - SystemPredicateParser.Predicate.NO_URL : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( False, 'exact_match', v, 'does not have url: {}'.format( v ) ) ), - SystemPredicateParser.Predicate.DOMAIN : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( True, 'domain', v, 'has a url with domain: {}'.format( v ) ) ), - SystemPredicateParser.Predicate.NO_DOMAIN : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( False, 'domain', v, 'does not have a url with domain: {}'.format( v ) ) ), + SystemPredicateParser.Predicate.URL_REGEX : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( True, 'regex', v, 'has url matching regex {}'.format( v ) ) ), + SystemPredicateParser.Predicate.NO_URL_REGEX : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( False, 'regex', v, 'does not have url matching regex {}'.format( v ) ) ), + SystemPredicateParser.Predicate.URL : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( True, 'exact_match', v, 'has url {}'.format( v ) ) ), + SystemPredicateParser.Predicate.NO_URL : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( False, 'exact_match', v, 'does not have url {}'.format( v ) ) ), + SystemPredicateParser.Predicate.DOMAIN : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( True, 'domain', v, 'has url with domain {}'.format( v ) ) ), + SystemPredicateParser.Predicate.NO_DOMAIN : lambda o, v, u: ClientSearch.Predicate( ClientSearch.PREDICATE_TYPE_SYSTEM_KNOWN_URLS, ( False, 'domain', v, 'does not have url with domain {}'.format( v ) ) ), SystemPredicateParser.Predicate.URL_CLASS : lambda o, v, u: url_class_pred_generator( True, v ), SystemPredicateParser.Predicate.NO_URL_CLASS : lambda o, v, u: url_class_pred_generator( False, v ), SystemPredicateParser.Predicate.MOD_DATE : lambda o, v, u: date_pred_generator( ClientSearch.PREDICATE_TYPE_SYSTEM_MODIFIED_TIME, o, v ), diff --git a/hydrus/core/HydrusConstants.py b/hydrus/core/HydrusConstants.py index 63a419b9..6f00f5b2 100644 --- a/hydrus/core/HydrusConstants.py +++ b/hydrus/core/HydrusConstants.py @@ -105,7 +105,7 @@ options = {} # Misc NETWORK_VERSION = 20 -SOFTWARE_VERSION = 566 +SOFTWARE_VERSION = 567 CLIENT_API_VERSION = 62 SERVER_THUMBNAIL_DIMENSIONS = ( 200, 200 ) diff --git a/hydrus/core/HydrusSerialisable.py b/hydrus/core/HydrusSerialisable.py index 41ef0a02..b581862b 100644 --- a/hydrus/core/HydrusSerialisable.py +++ b/hydrus/core/HydrusSerialisable.py @@ -141,6 +141,7 @@ SERIALISABLE_TYPE_METADATA_SINGLE_FILE_IMPORTER_MEDIA_TIMESTAMPS = 123 SERIALISABLE_TYPE_PETITION_HEADER = 124 SERIALISABLE_TYPE_STRING_JOINER = 125 SERIALISABLE_TYPE_FILE_FILTER = 126 +SERIALISABLE_TYPE_URL_CLASS_PARAMETER_FIXED_NAME = 127 SERIALISABLE_TYPES_TO_OBJECT_TYPES = {} diff --git a/hydrus/core/files/HydrusFileHandling.py b/hydrus/core/files/HydrusFileHandling.py index bdcedc26..889e4ada 100644 --- a/hydrus/core/files/HydrusFileHandling.py +++ b/hydrus/core/files/HydrusFileHandling.py @@ -263,11 +263,16 @@ def GenerateThumbnailNumPy( path, target_resolution, mime, duration, num_frames, thumbnail_numpy = HydrusOfficeOpenXMLHandling.GenerateThumbnailNumPyFromOfficePath( path, target_resolution ) + except HydrusExceptions.NoThumbnailFileException: + + thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution) + except Exception as e: PrintMoreThumbErrorInfo( e, f'Problem generating thumbnail for "{path}".', extra_description = extra_description ) thumbnail_numpy = GenerateDefaultThumbnail(mime, target_resolution) + elif mime == HC.APPLICATION_FLASH: @@ -547,12 +552,11 @@ def GetFileInfo( path, mime = None, ok_to_look_for_hydrus_updates = False ): pass - elif mime == HC.APPLICATION_DOCX: try: - ( num_words ) = HydrusOfficeOpenXMLHandling.GetDOCXInfo( path ) + num_words = HydrusOfficeOpenXMLHandling.GetDOCXInfo( path ) except HydrusExceptions.LimitedSupportFileException: diff --git a/hydrus/core/files/HydrusOfficeOpenXMLHandling.py b/hydrus/core/files/HydrusOfficeOpenXMLHandling.py index 88c997ea..a0275653 100644 --- a/hydrus/core/files/HydrusOfficeOpenXMLHandling.py +++ b/hydrus/core/files/HydrusOfficeOpenXMLHandling.py @@ -1,6 +1,7 @@ import typing from hydrus.core import HydrusConstants as HC +from hydrus.core import HydrusExceptions from hydrus.core.files.HydrusArchiveHandling import GetZipAsPath from hydrus.core.files.images import HydrusImageHandling @@ -8,7 +9,6 @@ import xml.etree.ElementTree as ET from PIL import Image as PILImage - DOCX_XPATH = ".//{*}Override[@PartName='/word/document.xml'][@ContentType='application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml']" XLSX_XPATH = ".//{*}Override[@PartName='/xl/workbook.xml'][@ContentType='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml']" PPTX_XPATH = ".//{*}Override[@PartName='/ppt/presentation.xml'][@ContentType='application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml']" @@ -35,17 +35,26 @@ def MimeFromMicrosoftOpenXMLDocument(path: str): else: - return None + return None + except: return None + def GenerateThumbnailNumPyFromOfficePath( path: str, target_resolution: typing.Tuple[ int, int ] ) -> bytes: - zip_path_file_obj = GetZipAsPath( path, 'docProps/thumbnail.jpeg' ).open( 'rb' ) + try: + zip_path_file_obj = GetZipAsPath( path, 'docProps/thumbnail.jpeg' ).open( 'rb' ) + + except FileNotFoundError: + + raise HydrusExceptions.NoThumbnailFileException( 'No thumbnail.jpeg file!' ) + + pil_image = HydrusImageHandling.GeneratePILImage( zip_path_file_obj ) thumbnail_pil_image = pil_image.resize( target_resolution, PILImage.LANCZOS ) @@ -64,7 +73,7 @@ PPTX_ASSUMED_DPI = 300 PPTX_PIXEL_PER_EMU = PPTX_ASSUMED_DPI / 914400 def PowerPointResolution( path: str ): - + file = GetZipAsPath( path, 'ppt/presentation.xml' ).open( 'rb' ) root = ET.parse( file ) @@ -126,5 +135,6 @@ def GetDOCXInfo( path:str ): num_words = None - return ( num_words ) + + return num_words diff --git a/hydrus/core/networking/HydrusNetworkVariableHandling.py b/hydrus/core/networking/HydrusNetworkVariableHandling.py index d3081f64..723acdde 100644 --- a/hydrus/core/networking/HydrusNetworkVariableHandling.py +++ b/hydrus/core/networking/HydrusNetworkVariableHandling.py @@ -381,6 +381,7 @@ def ParseTwistedRequestGETArgs( requests_args: dict, int_params, byte_params, st else: args[ name ] = json.loads( urllib.parse.unquote( value ) ) + except Exception as e: @@ -398,6 +399,7 @@ def ParseTwistedRequestGETArgs( requests_args: dict, int_params, byte_params, st else: list_of_hex_strings = json.loads( urllib.parse.unquote( value ) ) + args[ name ] = [ bytes.fromhex( hex_string ) for hex_string in list_of_hex_strings ] diff --git a/hydrus/external/SystemPredicateParser.py b/hydrus/external/SystemPredicateParser.py index fc4f238a..75c52b1c 100644 --- a/hydrus/external/SystemPredicateParser.py +++ b/hydrus/external/SystemPredicateParser.py @@ -265,12 +265,12 @@ SYSTEM_PREDICATES = { 'all viewtime': (Predicate.ALL_VIEWTIME, Operators.RELATIONAL, Value.TIME_INTERVAL, None), 'has (a )?url matching regex': (Predicate.URL_REGEX, None, Value.ANY_STRING, None), '(does not|doesn\'t) have (a )?url matching regex': (Predicate.NO_URL_REGEX, None, Value.ANY_STRING, None), - 'has url': (Predicate.URL, None, Value.ANY_STRING, None), - '(does not|doesn\'t) have url': (Predicate.NO_URL, None, Value.ANY_STRING, None), - 'has (a )?(url with )?domain': (Predicate.DOMAIN, None, Value.ANY_STRING, None), - '(does not|doesn\'t) have (a )?(url with )?domain': (Predicate.NO_DOMAIN, None, Value.ANY_STRING, None), - 'has (a )?url with (url )?class': (Predicate.URL_CLASS, None, Value.ANY_STRING, None), - '(does not|doesn\'t) have (a )?url with (url )?class': (Predicate.NO_URL_CLASS, None, Value.ANY_STRING, None), + 'has url:? (?=http)': (Predicate.URL, None, Value.ANY_STRING, None), + '(does not|doesn\'t) have url:? (?=http)': (Predicate.NO_URL, None, Value.ANY_STRING, None), + 'has (an? )?(url with )?domain': (Predicate.DOMAIN, None, Value.ANY_STRING, None), + '(does not|doesn\'t) have (an? )?(url with )?domain': (Predicate.NO_DOMAIN, None, Value.ANY_STRING, None), + 'has (an? )?url with (url )?class': (Predicate.URL_CLASS, None, Value.ANY_STRING, None), + '(does not|doesn\'t) have (an? )?url with (url )?class': (Predicate.NO_URL_CLASS, None, Value.ANY_STRING, None), 'tag as number': (Predicate.TAG_AS_NUMBER, Operators.TAG_RELATIONAL, Value.INTEGER, None), 'has notes?$': (Predicate.HAS_NOTES, None, None, None), '((has )?no|does not have( a)?|doesn\'t have) notes?$': (Predicate.NO_NOTES, None, None, None), @@ -300,7 +300,13 @@ def parse_system_predicate( string: str ): # TODO: (hydev): rework this thing into passing around a 'parse result object' that the operator parser can set a value for and say 'yeah value is sorted' for things like 'has words' = '> 0' in one swoop - string = string.lower().strip() + string = string.strip() + + if 'url' not in string: # hack for system:url has regex (blah) and matching url in general + + string = string.lower() + + string = string.replace( '_', ' ' ) if string.startswith( "-" ): raise ValueError( "System predicate can't start with negation" ) diff --git a/hydrus/test/TestClientNetworking.py b/hydrus/test/TestClientNetworking.py index ec64fb35..9361eda9 100644 --- a/hydrus/test/TestClientNetworking.py +++ b/hydrus/test/TestClientNetworking.py @@ -228,6 +228,8 @@ class TestNetworkingDomain( unittest.TestCase ): def test_url_classes( self ): + # TODO: Yo, these all suck and should be broken into separate spammy tests with more appropriate example urls and all that! + name = 'test' url_type = HC.URL_TYPE_POST preferred_scheme = 'https' @@ -245,10 +247,10 @@ class TestNetworkingDomain( unittest.TestCase ): path_components.append( ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'post', example_string = 'post' ), None ) ) path_components.append( ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'page.php', example_string = 'page.php' ), None ) ) - parameters = {} + parameters = [] - parameters[ 's' ] = ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ), None ) - parameters[ 'id' ] = ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '123456' ), None ) + parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 's', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ) ) ) + parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 'id', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '123456' ) ) ) send_referral_url = ClientNetworkingURLClass.SEND_REFERRAL_URL_ONLY_IF_PROVIDED referral_url_converter = None @@ -278,6 +280,57 @@ class TestNetworkingDomain( unittest.TestCase ): self.assertEqual( url_class.GetReferralURL( good_url, referral_url ), referral_url ) self.assertEqual( url_class.GetReferralURL( good_url, None ), None ) + # encoding test + + parameters = [] + + parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 's', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ) ) ) + parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 'id', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_ANY, example_string = 'hello' ) ) ) + + url_class = ClientNetworkingURLClass.URLClass( name, url_type = url_type, preferred_scheme = preferred_scheme, netloc = netloc, path_components = path_components, parameters = parameters, send_referral_url = send_referral_url, referral_url_converter = referral_url_converter, gallery_index_type = gallery_index_type, gallery_index_identifier = gallery_index_identifier, gallery_index_delta = gallery_index_delta, example_url = example_url ) + + url_class.SetURLBooleans( match_subdomains, keep_matched_subdomains, alphabetise_get_parameters, can_produce_multiple_files, should_be_associated_with_files, keep_fragment ) + + unnormalised_human_url = 'https://testbooru.cx/post/page.php?id=1234 56&s=view' + normalised_encoded_url = 'https://testbooru.cx/post/page.php?id=1234%2056&s=view' + + self.assertEqual( url_class.Normalise( unnormalised_human_url ), normalised_encoded_url ) + self.assertEqual( url_class.Normalise( normalised_encoded_url ), normalised_encoded_url ) + + parameters = [] + + parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 's', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ) ) ) + parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 'id', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '123456' ) ) ) + + # default test + + parameters = [] + + parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 's', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ) ) ) + parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 'id', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '123456' ) ) ) + parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 'pid', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '0' ), default_value = '0' ) ) + + url_class = ClientNetworkingURLClass.URLClass( name, url_type = url_type, preferred_scheme = preferred_scheme, netloc = netloc, path_components = path_components, parameters = parameters, send_referral_url = send_referral_url, referral_url_converter = referral_url_converter, gallery_index_type = gallery_index_type, gallery_index_identifier = gallery_index_identifier, gallery_index_delta = gallery_index_delta, example_url = example_url ) + + url_class.SetURLBooleans( match_subdomains, keep_matched_subdomains, alphabetise_get_parameters, can_produce_multiple_files, should_be_associated_with_files, keep_fragment ) + + unnormalised_without_pid = 'https://testbooru.cx/post/page.php?id=123456&s=view' + unnormalised_with_pid = 'https://testbooru.cx/post/page.php?id=123456&pid=3&s=view' + normalised_with_pid = 'https://testbooru.cx/post/page.php?id=123456&pid=0&s=view' + + self.assertEqual( url_class.Normalise( unnormalised_without_pid ), normalised_with_pid ) + self.assertEqual( url_class.Normalise( normalised_with_pid ), normalised_with_pid ) + self.assertEqual( url_class.Normalise( unnormalised_with_pid ), unnormalised_with_pid ) + + self.assertTrue( url_class.Matches( unnormalised_without_pid ) ) + self.assertTrue( url_class.Matches( unnormalised_with_pid ) ) + self.assertTrue( url_class.Matches( good_url ) ) + + parameters = [] + + parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 's', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ) ) ) + parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 'id', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '123456' ) ) ) + # alphabetise_get_parameters = False @@ -349,7 +402,7 @@ class TestNetworkingDomain( unittest.TestCase ): path_components.append( ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'file', example_string = 'file' ), None ) ) path_components.append( ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_ANY ), None ) ) - parameters = {} + parameters = [] send_referral_url = ClientNetworkingURLClass.SEND_REFERRAL_URL_ONLY_IF_PROVIDED referral_url_converter = None @@ -401,10 +454,10 @@ class TestNetworkingDomain( unittest.TestCase ): path_components.append( ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'post', example_string = 'post' ), None ) ) path_components.append( ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'page.php', example_string = 'page.php' ), None ) ) - parameters = {} + parameters = [] - parameters[ 's' ] = ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ), None ) - parameters[ 'id' ] = ( ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '123456' ), None ) + parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 's', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FIXED, match_value = 'view', example_string = 'view' ) ) ) + parameters.append( ClientNetworkingURLClass.URLClassParameterFixedName( name = 'id', value_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_FLEXIBLE, match_value = ClientStrings.NUMERIC, example_string = '123456' ) ) ) has_single_value_parameters = True single_value_parameters_string_match = ClientStrings.StringMatch( match_type = ClientStrings.STRING_MATCH_REGEX, match_value = '^token.*', example_string = 'token1' ) diff --git a/hydrus/test/TestClientTags.py b/hydrus/test/TestClientTags.py index 1d2d545c..71151ad4 100644 --- a/hydrus/test/TestClientTags.py +++ b/hydrus/test/TestClientTags.py @@ -2167,14 +2167,14 @@ class TestTagObjects( unittest.TestCase ): ( 'system:media viewtime < 1 day 1 hour', "system:media viewtime < 1 days 1 hour 0 minutes" ), ( 'system:all viewtime > 1 hour 1 minute', "system:all viewtime > 1 hours 100 seconds" ), ( f'system:preview viewtime {HC.UNICODE_APPROX_EQUAL} 2 days 7 hours', "system:preview viewtime ~= 1 day 30 hours 100 minutes 90s" ), - ( 'system:has a url matching regex: index\\.php', " system:has url matching regex index\\.php" ), - ( 'system:does not have a url matching regex: index\\.php', "system:does not have a url matching regex index\\.php" ), - ( 'system:has url: https://safebooru.donmai.us/posts/4695284', "system:has_url https://safebooru.donmai.us/posts/4695284" ), - ( 'system:does not have url: https://safebooru.donmai.us/posts/4695284', " system:doesn't have url https://safebooru.donmai.us/posts/4695284 " ), - ( 'system:has a url with domain: safebooru.com', "system:has domain safebooru.com" ), - ( 'system:does not have a url with domain: safebooru.com', "system:doesn't have domain safebooru.com" ), - ( 'system:has safebooru file page url', "system:has a url with class safebooru file page" ), - ( 'system:does not have safebooru file page url', "system:doesn't have a url with url class safebooru file page " ), + ( 'system:has url matching regex index\\.php', " system:has url matching regex index\\.php" ), + ( 'system:does not have url matching regex index\\.php', "system:does not have a url matching regex index\\.php" ), + ( 'system:has url https://safebooru.donmai.us/posts/4695284', "system:has_url https://safebooru.donmai.us/posts/4695284" ), + ( 'system:does not have url https://safebooru.donmai.us/posts/4695284', " system:doesn't have url https://safebooru.donmai.us/posts/4695284 " ), + ( 'system:has url with domain safebooru.com', "system:has domain safebooru.com" ), + ( 'system:does not have url with domain safebooru.com', "system:doesn't have domain safebooru.com" ), + ( 'system:has url with class safebooru file page', "system:has url with class safebooru file page" ), + ( 'system:does not have url with class safebooru file page', "system:doesn't have a url with url class safebooru file page " ), ( 'system:tag as number: page less than 5', "system:tag as number page < 5" ), ( 'system:tag as number: page less than 5', "system:tag as number: page less than 5" ), ( 'system:number of notes: has notes', 'system:has note' ), @@ -2266,6 +2266,12 @@ class TestTagRendering( unittest.TestCase ): HG.test_controller.new_options.SetBoolean( 'replace_tag_underscores_with_spaces', False ) + HG.test_controller.new_options.SetBoolean( 'replace_tag_emojis_with_boxes', True ) + + self.assertEqual( ClientTags.RenderTag( 'title:skeb⛓️💙', True ), 'title:skeb□□' ) + + HG.test_controller.new_options.SetBoolean( 'replace_tag_emojis_with_boxes', False ) + self.assertEqual( ClientTags.RenderTag( 'character:lara', True ), 'character:lara' ) HG.test_controller.new_options.SetBoolean( 'show_namespaces', False ) diff --git a/hydrus/test/TestController.py b/hydrus/test/TestController.py index 07807c6d..47dfd0ae 100644 --- a/hydrus/test/TestController.py +++ b/hydrus/test/TestController.py @@ -632,7 +632,7 @@ class Controller( object ): def ImportURLFromAPI( self, url, filterable_tags, additional_service_keys_to_tags, destination_page_name, destination_page_key, show_destination_page ): - normalised_url = self.network_engine.domain_manager.NormaliseURL( url ) + normalised_url = self.network_engine.domain_manager.NormaliseURL( url, ephemeral_ok = True ) human_result_text = '"{}" URL added successfully.'.format( normalised_url ) From 3c9793e788f912725b65bb3220e3d0f60f451758 Mon Sep 17 00:00:00 2001 From: Hydrus Network Developer Date: Wed, 27 Mar 2024 17:23:03 -0500 Subject: [PATCH 5/7] Revert "Revert "Fixing URL comparison issue"" This reverts commit 9ab079a80f4aab656b11f8a62a3f5f17f5c13469. --- .../client/importing/ClientImportFileSeeds.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/hydrus/client/importing/ClientImportFileSeeds.py b/hydrus/client/importing/ClientImportFileSeeds.py index 6f274b5e..37970441 100644 --- a/hydrus/client/importing/ClientImportFileSeeds.py +++ b/hydrus/client/importing/ClientImportFileSeeds.py @@ -133,6 +133,18 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): self.file_seed_data = file_seed_data self.file_seed_data_for_comparison = file_seed_data + if self.file_seed_type == FILE_SEED_TYPE_URL: + + try: + + self.file_seed_data_for_comparison = CG.client_controller.network_engine.domain_manager.NormaliseURL( self.file_seed_data ) + + except: + + pass + + + self.created = HydrusTime.GetNow() self.modified = self.created self.source_time = None @@ -298,6 +310,20 @@ class FileSeed( HydrusSerialisable.SerialisableBase ): serialisable_hashes ) = serialisable_info + self.file_seed_data_for_comparison = self.file_seed_data + + if self.file_seed_type == FILE_SEED_TYPE_URL: + + try: + + self.file_seed_data_for_comparison = CG.client_controller.network_engine.domain_manager.NormaliseURL( self.file_seed_data ) + + except: + + pass + + + self._external_filterable_tags = set( serialisable_external_filterable_tags ) self._external_additional_service_keys_to_tags = HydrusSerialisable.CreateFromSerialisableTuple( serialisable_external_additional_service_keys_to_tags ) From ffd2ba42b823581599ce6f8941d42bf446c9c640 Mon Sep 17 00:00:00 2001 From: Hydrus Network Developer Date: Wed, 27 Mar 2024 17:23:05 -0500 Subject: [PATCH 6/7] Revert "Revert "Fix for damaged URL definitions"" This reverts commit 608787e4555fad23498badceba420206c0546b79. --- hydrus/client/networking/ClientNetworkingURLClass.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/hydrus/client/networking/ClientNetworkingURLClass.py b/hydrus/client/networking/ClientNetworkingURLClass.py index 57dfcb84..31508427 100644 --- a/hydrus/client/networking/ClientNetworkingURLClass.py +++ b/hydrus/client/networking/ClientNetworkingURLClass.py @@ -1049,7 +1049,15 @@ class URLClass( HydrusSerialisable.SerialisableBaseNamed ): num_total_path_components = len( self._path_components ) num_required_parameters = len( [ 1 for parameter in self._parameters if parameter.GetDefaultValue() is None ] ) num_total_parameters = len( self._parameters ) - len_example_url = len( self.Normalise( self._example_url, ephemeral_ok = True ) ) + + try: + + len_example_url = len( self.Normalise( self._example_url, ephemeral_ok = True ) ) + + except: + + len_example_url = len( self._example_url ) + return ( num_required_path_components, num_total_path_components, num_required_parameters, num_total_parameters, len_example_url ) From 19a1552c5e6abe44c7d62321aef6901240b3d75f Mon Sep 17 00:00:00 2001 From: bbappserver Date: Sat, 4 May 2024 11:12:29 -0700 Subject: [PATCH 7/7] Update Understanding_Database_Synchronization.md (#1545) Fix typos, clarify terminology and punctuation, link SQLite reading. --- .../Understanding_Database_Synchronization.md | 32 +++++++++++-------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/docs/Understanding_Database_Synchronization.md b/docs/Understanding_Database_Synchronization.md index e66de1b7..46c36a68 100644 --- a/docs/Understanding_Database_Synchronization.md +++ b/docs/Understanding_Database_Synchronization.md @@ -7,11 +7,11 @@ Tuning your database synchronization using the `--db_synchronous_override=0` lau - This is a tutorial for advanced users who have read and understood this document and the risk/recovery procedure. - It is nearly always safe to use `--db_synchronous_override=1` on any modern filesystem and this is the default. - It is always more expensive to access the disk than doing things in memory. SSDs are 10-100x as slow as memory, and HDDs are 1000-10000x as slow as memory. -- If you turn synchronization to `0` you are gambling, but it is a safe gamble if you have a backup and **know exactly** what you are doing +- If you turn synchronization to `0` you are gambling, but it is a safe gamble if you have a backup and **know exactly** what you are doing. - After running with synchronization set to zero you must either: - Exit hydrus normally and let the OS flush disk caches (either by letting the system run/"idle" for a while, running `sync` on *NIX systems, or normal shutdown), or - Restore the sqlite database files backup if **the OS shutdown abnormally**. -- Because of the potential for a lot of outstanding writes when using `synchronous=0`, other I/O on your system will slow down as the pending writes are interleaved. Normal shutdown may also take abnormally long because the system is flushing these pending writes, but you must allow it to take its time as explained in the section below. +- Because of the potential for a lot of outstanding writes when using `synchronous=0`, other I/O on your system will slow down as the pending writes are interleaved. Normal shutdown may also take abnormally long because the system is syncing these pending writes, but you must allow it to take its time as explained in the section below. **Note:** In historical versions of hydrus (`synchronous=2`), performance was terrible because hydrus would agressively (it was arguably somewhat paranoid) write changes to disk. @@ -21,7 +21,7 @@ Setting the synchronous to 0 lets the database engine defer writing to disk as l When not in synchronous 0 mode, the database engine syncs at regular intervals to make sure data has been written. - Setting synchronous to 0 is generally safe **if and only if** the system also shuts down normally, allowing any of these pending writes to be flushed. -- The database can back out of partial changes if hydrus crashes **even if** `synchronous=0`, so your database will not go corrupt from hydrus shutting down abnormally, only from the system shutting down abnormally. +- The database can back out of partial changes if hydrus crashes **even if** `synchronous=0`, so **your database will not go corrupt from hydrus shutting down abnormally**, **only from the system shutting down abnormally**. ## Technical Explanation @@ -33,9 +33,9 @@ An existing file may be in 3 possible states: - **Flushed**: Pending write to permenant storage but memory has been transfered to the operating system. Data will not be lost if the calling program crashes, since the OS promises it will "eventually" arrive on disk before returning from `fflush()`. When you "safely shutdown:, you are instructing the OS among other things to sync the flushed files. If someone decides to read a file before it has been synced the OS will read the contents up until the flush from the flush buffer, and return that instead of what is actually on disk. If the OS crashes due to error or power failure, data that are flushed but not synced will be lost. - **Synced**: Written to permenant storage. A programmer may request that the contents of the file be synced, or it is done gradually over time to free the OS buffers -To ensure the consistency of the database and rollback when needed, the database engine keeps a **journal** of what it is doing. Each transaction ends in a `flush` followed by a `sync`. The *flush* ensures that everything written before the flush will occur before the line that indicats the transaction completed. The *sync* ensures that the entire contents of the transaction has been written to permenant storage before proceeding. The OS is not obligated to write chunks of the database file in the order it recieves them. It only guarantees that if you flush everything before the flush happens first, and everything after happens next. +To ensure the consistency of the database and rollback when needed, the database engine keeps a **journal** of what it is doing. Each transaction ends in a `flush` which may be followed by a `sync`. In `synchronous=2` there is a sync after EVERY `COMMIT`, for `synchronous=1` it depends on the journal mode, often enough to maintian consistanc, but not after every commit. The **flush** ensures that everything written before the flush will occur before the line that indicates the transaction completed. The **sync** ensures that the entire contents of the transaction has been written to permenant storage before proceeding. The OS is not obligated to write chunks of the database file in the order it recieves them. It only guarantees that if you flush, everything submitted before the flush happens first, and everything submitted after the flush happens next. -The sync is what is controlled by the `synchronous` switch. Allowing the database to ignore whether sync actually completes is the magic that makes `synchronous=0` so dang fast. +The **sync** is what is controlled by the `synchronous` switch. Allowing the database to ignore whether sync actually completes is the magic that makes `synchronous=0` so dang fast. ### An example journal @@ -58,7 +58,7 @@ Hydrus is structured in such a way that the database is written to to keep track ### Where synchronization comes in -Lets revisit the journal, this time with two transactions. Note that the database is syncing on step 8 and thus will have to wait for the OS to write to disk before proceeding, holding up transaction 2, and any other access to the database. +Let's revisit the journal, this time with two transactions. Note that the database is syncing on step 8 and thus will have to wait for the OS to write to disk before proceeding, holding up transaction 2, and any other access to the database. 1. Begin Transaction 1 2. Write Change 1 @@ -77,28 +77,34 @@ Lets revisit the journal, this time with two transactions. Note that the databa 15. End Transaction 2 16. SYNC -**What happens if we remove step 6 and 8 and then die at step 11?** +**What happens if we remove step 8 and then die at step 11?** 1. Begin Transaction 1 2. Write Change 1 3. Write Change 2 4. Read data 5. Write Change 3 -6. ~~FLUSH~~ +6. FLUSH 7. End Transaction 1 8. ~~SYNC~~ 9. Begin Transaction 2 10. Write Change 2 11. Write Ch -What if we crash and step, `End Transaction` has not been written to disk. Now not only do we need to repeat transaction 2, we also need to repeat transaction 1. Note that **this just increaeses the ammount of repeatable work, and actually is fully recoverable** (assuming a file you were downloading didn't cease to exist in the interim). +What if we crash , `End Transaction 1` possibly has not been written to disk. Now not only do we need to repeat transaction 2, we also need to repeat transaction 1. Note that **this just increases the ammount of repeatable work, and actually is fully recoverable** (assuming a file you were downloading didn't cease to exist in the interim). **Now what happens if we do the above and the OS crashes?** -The OS is not obligated to write chunks of the database file in the order you give them to it, in fact for harddrives it is optimal to scatter chunks of the file around the spinning disks so it might arbitrarily reorder your write calls. +As written we are actually glossing over a number of steps that happen in step 8. Actually the database must make a few syncs to be sure the database is reversible. The steps are roughly speaking -- The only way you can be certain that all of the changes in the transaction have been written before writing `END Transaction` is to `flush()` -- The only way you can be sure `END Transaction` was written before doing more changes is to `sync()`. +1. Write and sync rollback +2. Update database file with changes +3. Sync database file +4. Remove rollback/update WAL checkpoint -Thus if the OS crashes at the exact wrong moment, there is no way to be sure that the journal is correct if flushing was skipped (`synchronous=0`). **This means there is no way for you to determine whether the database file is correct after a system crash if you had synchronous 0, and you MUST restore your files from backup as this will be the ONLY WAY to know they are in a known good state.** +If sqlite crashes, but the OS doesn't that's fine all of this in flight data is in the OS write buffer and the OS will pretend as if it is on disc. But what if We haven't even finished creating a rollback for the changes made in step 1 and step 2 starts partially changing the database file? Then bam power failure. We now can't revert the database because we don't have a complete rollback, but we also can't move forward in time either because we don't have a marker showing the completion of transaction 2. So we are stuck in the middle of an incomplete transaction, and have lost the data necessary to leave either end. + +See also: https://www.sqlite.org/atomiccommit.html#section_6_2 + +Thus if the OS crashes at the exact wrong moment, there is no way to be sure that the journal is correct if syncing was skipped (`synchronous=0`). **This means there is no way for you to determine whether the database file is correct after a system crash if you had synchronous 0, and you MUST restore your files from backup as this will be the ONLY WAY to know they are in a known good state.** So, setting `synchronous=0` gets you a pretty huge speed boost, but you are gambling that everything goes perfectly and will pay the price of a manual restore every time it doesn't.