diff --git a/changelog.d/1065.change.rst b/changelog.d/1065.change.rst new file mode 100644 index 00000000..575de5fb --- /dev/null +++ b/changelog.d/1065.change.rst @@ -0,0 +1 @@ +To conform with `PEP 681 `_, ``attr.s()` and ``attrs.define()`` now accept *unsafe_hash* in addition to *hash*. diff --git a/docs/api.rst b/docs/api.rst index 59d006f3..98d921a6 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -120,7 +120,7 @@ Classic Same as `attrs.NOTHING`. -.. autofunction:: attr.s(these=None, repr_ns=None, repr=None, cmp=None, hash=None, init=None, slots=False, frozen=False, weakref_slot=True, str=False, auto_attribs=False, kw_only=False, cache_hash=False, auto_exc=False, eq=None, order=None, auto_detect=False, collect_by_mro=False, getstate_setstate=None, on_setattr=None, field_transformer=None, match_args=True) +.. autofunction:: attr.s(these=None, repr_ns=None, repr=None, cmp=None, hash=None, init=None, slots=False, frozen=False, weakref_slot=True, str=False, auto_attribs=False, kw_only=False, cache_hash=False, auto_exc=False, eq=None, order=None, auto_detect=False, collect_by_mro=False, getstate_setstate=None, on_setattr=None, field_transformer=None, match_args=True, unsafe_hash=None) .. note:: diff --git a/docs/conf.py b/docs/conf.py index 9ce47764..87e0b586 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -45,6 +45,11 @@ extensions = [ "sphinxcontrib.towncrier", ] +myst_enable_extensions = [ + "colon_fence", + "smartquotes", + "deflist", +] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] diff --git a/docs/hashing.md b/docs/hashing.md new file mode 100644 index 00000000..231d818a --- /dev/null +++ b/docs/hashing.md @@ -0,0 +1,85 @@ +# Hashing + +## Hash Method Generation + +:::{warning} +The overarching theme is to never set the `@attrs.define(unsafe_hash=X)` parameter yourself. +Leave it at `None` which means that *attrs* will do the right thing for you, depending on the other parameters: + +- If you want to make objects hashable by value: use `@define(frozen=True)`. +- If you want hashing and equality by object identity: use `@define(eq=False)` + +Setting `unsafe_hash` yourself can have unexpected consequences so we recommend to tinker with it only if you know exactly what you're doing. +::: + +Under certain circumstances, it's necessary for objects to be *hashable*. +For example if you want to put them into a {class}`set` or if you want to use them as keys in a {class}`dict`. + +The *hash* of an object is an integer that represents the contents of an object. +It can be obtained by calling {func}`hash` on an object and is implemented by writing a `__hash__` method for your class. + +*attrs* will happily write a `__hash__` method for you [^fn1], however it will *not* do so by default. +Because according to the [definition](https://docs.python.org/3/glossary.html#term-hashable) from the official Python docs, the returned hash has to fulfill certain constraints: + +[^fn1]: The hash is computed by hashing a tuple that consists of a unique id for the class plus all attribute values. + +1. Two objects that are equal, **must** have the same hash. + This means that if `x == y`, it *must* follow that `hash(x) == hash(y)`. + + By default, Python classes are compared *and* hashed by their `id`. + That means that every instance of a class has a different hash, no matter what attributes it carries. + + It follows that the moment you (or *attrs*) change the way equality is handled by implementing `__eq__` which is based on attribute values, this constraint is broken. + For that reason Python 3 will make a class that has customized equality unhashable. + Python 2 on the other hand will happily let you shoot your foot off. + Unfortunately, *attrs* still mimics (otherwise unsupported) Python 2's behavior for backward-compatibility reasons if you set `unsafe_hash=False`. + + The *correct way* to achieve hashing by id is to set `@define(eq=False)`. + Setting `@define(unsafe_hash=False)` (which implies `eq=True`) is almost certainly a *bug*. + + :::{warning} + Be careful when subclassing! + Setting `eq=False` on a class whose base class has a non-default `__hash__` method will *not* make *attrs* remove that `__hash__` for you. + + It is part of *attrs*'s philosophy to only *add* to classes so you have the freedom to customize your classes as you wish. + So if you want to *get rid* of methods, you'll have to do it by hand. + + The easiest way to reset `__hash__` on a class is adding `__hash__ = object.__hash__` in the class body. + ::: + +2. If two objects are not equal, their hash **should** be different. + + While this isn't a requirement from a standpoint of correctness, sets and dicts become less effective if there are a lot of identical hashes. + The worst case is when all objects have the same hash which turns a set into a list. + +3. The hash of an object **must not** change. + + If you create a class with `@define(frozen=True)` this is fulfilled by definition, therefore *attrs* will write a `__hash__` function for you automatically. + You can also force it to write one with `unsafe_hash=True` but then it's *your* responsibility to make sure that the object is not mutated. + + This point is the reason why mutable structures like lists, dictionaries, or sets aren't hashable while immutable ones like tuples or `frozenset`s are: + point 1 and 2 require that the hash changes with the contents but point 3 forbids it. + +For a more thorough explanation of this topic, please refer to this blog post: [*Python Hashes and Equality*](https://hynek.me/articles/hashes-and-equality/). + +:::{note} +Please note that the `unsafe_hash` argument's original name was `hash` but was changed to conform with {pep}`681` in 22.2.0. +The old argument name is still around and will **not** be removed -- but setting `unsafe_hash` takes precedence over `hash`. +The field-level argument is still called `hash` and will remain so. +::: + + +## Hashing and Mutability + +Changing any field involved in hash code computation after the first call to `__hash__` (typically this would be after its insertion into a hash-based collection) can result in silent bugs. +Therefore, it is strongly recommended that hashable classes be `frozen`. +Beware, however, that this is not a complete guarantee of safety: +if a field points to an object and that object is mutated, the hash code may change, but `frozen` will not protect you. + + +## Hash Code Caching + +Some objects have hash codes which are expensive to compute. +If such objects are to be stored in hash-based collections, it can be useful to compute the hash codes only once and then store the result on the object to make future hash code requests fast. +To enable caching of hash codes, pass `@define(cache_hash=True)`. +This may only be done if *attrs* is already generating a hash function for the object. diff --git a/docs/hashing.rst b/docs/hashing.rst deleted file mode 100644 index 4f2b868e..00000000 --- a/docs/hashing.rst +++ /dev/null @@ -1,86 +0,0 @@ -Hashing -======= - -Hash Method Generation ----------------------- - -.. warning:: - - The overarching theme is to never set the ``@attr.s(hash=X)`` parameter yourself. - Leave it at ``None`` which means that ``attrs`` will do the right thing for you, depending on the other parameters: - - - If you want to make objects hashable by value: use ``@attr.s(frozen=True)``. - - If you want hashing and equality by object identity: use ``@attr.s(eq=False)`` - - Setting ``hash`` yourself can have unexpected consequences so we recommend to tinker with it only if you know exactly what you're doing. - -Under certain circumstances, it's necessary for objects to be *hashable*. -For example if you want to put them into a `set` or if you want to use them as keys in a `dict`. - -The *hash* of an object is an integer that represents the contents of an object. -It can be obtained by calling `hash` on an object and is implemented by writing a ``__hash__`` method for your class. - -``attrs`` will happily write a ``__hash__`` method for you [#fn1]_, however it will *not* do so by default. -Because according to the definition_ from the official Python docs, the returned hash has to fulfill certain constraints: - -#. Two objects that are equal, **must** have the same hash. - This means that if ``x == y``, it *must* follow that ``hash(x) == hash(y)``. - - By default, Python classes are compared *and* hashed by their `id`. - That means that every instance of a class has a different hash, no matter what attributes it carries. - - It follows that the moment you (or ``attrs``) change the way equality is handled by implementing ``__eq__`` which is based on attribute values, this constraint is broken. - For that reason Python 3 will make a class that has customized equality unhashable. - Python 2 on the other hand will happily let you shoot your foot off. - Unfortunately, ``attrs`` still mimics (otherwise unsupported) Python 2's behavior for backward compatibility reasons if you set ``hash=False``. - - The *correct way* to achieve hashing by id is to set ``@attr.s(eq=False)``. - Setting ``@attr.s(hash=False)`` (which implies ``eq=True``) is almost certainly a *bug*. - - .. warning:: - - Be careful when subclassing! - Setting ``eq=False`` on a class whose base class has a non-default ``__hash__`` method will *not* make ``attrs`` remove that ``__hash__`` for you. - - It is part of ``attrs``'s philosophy to only *add* to classes so you have the freedom to customize your classes as you wish. - So if you want to *get rid* of methods, you'll have to do it by hand. - - The easiest way to reset ``__hash__`` on a class is adding ``__hash__ = object.__hash__`` in the class body. - -#. If two objects are not equal, their hash **should** be different. - - While this isn't a requirement from a standpoint of correctness, sets and dicts become less effective if there are a lot of identical hashes. - The worst case is when all objects have the same hash which turns a set into a list. - -#. The hash of an object **must not** change. - - If you create a class with ``@attr.s(frozen=True)`` this is fulfilled by definition, therefore ``attrs`` will write a ``__hash__`` function for you automatically. - You can also force it to write one with ``hash=True`` but then it's *your* responsibility to make sure that the object is not mutated. - - This point is the reason why mutable structures like lists, dictionaries, or sets aren't hashable while immutable ones like tuples or frozensets are: - point 1 and 2 require that the hash changes with the contents but point 3 forbids it. - -For a more thorough explanation of this topic, please refer to this blog post: `Python Hashes and Equality`_. - - -Hashing and Mutability ----------------------- - -Changing any field involved in hash code computation after the first call to ``__hash__`` (typically this would be after its insertion into a hash-based collection) can result in silent bugs. -Therefore, it is strongly recommended that hashable classes be ``frozen``. -Beware, however, that this is not a complete guarantee of safety: -if a field points to an object and that object is mutated, the hash code may change, but ``frozen`` will not protect you. - - -Hash Code Caching ------------------ - -Some objects have hash codes which are expensive to compute. -If such objects are to be stored in hash-based collections, it can be useful to compute the hash codes only once and then store the result on the object to make future hash code requests fast. -To enable caching of hash codes, pass ``cache_hash=True`` to ``@attrs``. -This may only be done if ``attrs`` is already generating a hash function for the object. - -.. [#fn1] The hash is computed by hashing a tuple that consists of a unique id for the class plus all attribute values. - -.. _definition: https://docs.python.org/3/glossary.html#term-hashable -.. _`Python Hashes and Equality`: https://hynek.me/articles/hashes-and-equality/ diff --git a/src/attr/__init__.pyi b/src/attr/__init__.pyi index 7aff65c3..baa5b2a8 100644 --- a/src/attr/__init__.pyi +++ b/src/attr/__init__.pyi @@ -342,6 +342,7 @@ def attrs( on_setattr: Optional[_OnSetAttrArgType] = ..., field_transformer: Optional[_FieldTransformer] = ..., match_args: bool = ..., + unsafe_hash: Optional[bool] = ..., ) -> _C: ... @overload @__dataclass_transform__(order_default=True, field_descriptors=(attrib, field)) @@ -369,6 +370,7 @@ def attrs( on_setattr: Optional[_OnSetAttrArgType] = ..., field_transformer: Optional[_FieldTransformer] = ..., match_args: bool = ..., + unsafe_hash: Optional[bool] = ..., ) -> Callable[[_C], _C]: ... @overload @__dataclass_transform__(field_descriptors=(attrib, field)) @@ -377,6 +379,7 @@ def define( *, these: Optional[Dict[str, Any]] = ..., repr: bool = ..., + unsafe_hash: Optional[bool] = ..., hash: Optional[bool] = ..., init: bool = ..., slots: bool = ..., @@ -402,6 +405,7 @@ def define( *, these: Optional[Dict[str, Any]] = ..., repr: bool = ..., + unsafe_hash: Optional[bool] = ..., hash: Optional[bool] = ..., init: bool = ..., slots: bool = ..., diff --git a/src/attr/_make.py b/src/attr/_make.py index 4e6846aa..b70ebdf8 100644 --- a/src/attr/_make.py +++ b/src/attr/_make.py @@ -1217,6 +1217,7 @@ def attrs( on_setattr=None, field_transformer=None, match_args=True, + unsafe_hash=None, ): r""" A class decorator that adds :term:`dunder methods` according to the @@ -1279,8 +1280,8 @@ def attrs( *eq*. :param Optional[bool] cmp: Setting *cmp* is equivalent to setting *eq* and *order* to the same value. Must not be mixed with *eq* or *order*. - :param Optional[bool] hash: If ``None`` (default), the ``__hash__`` method - is generated according how *eq* and *frozen* are set. + :param Optional[bool] unsafe_hash: If ``None`` (default), the ``__hash__`` + method is generated according how *eq* and *frozen* are set. 1. If *both* are True, ``attrs`` will generate a ``__hash__`` for you. 2. If *eq* is True and *frozen* is False, ``__hash__`` will be set to @@ -1298,6 +1299,8 @@ def attrs( `object.__hash__`, and the `GitHub issue that led to the default \ behavior `_ for more details. + :param Optional[bool] hash: Alias for *unsafe_hash*. *unsafe_hash* takes + precedence. :param bool init: Create a ``__init__`` method that initializes the ``attrs`` attributes. Leading underscores are stripped for the argument name. If a ``__attrs_pre_init__`` method exists on the class, it will @@ -1469,9 +1472,14 @@ def attrs( .. versionchanged:: 21.1.0 Support for ``__attrs_pre_init__`` .. versionchanged:: 21.1.0 *cmp* undeprecated .. versionadded:: 21.3.0 *match_args* + .. versionadded:: 22.2.0 + *unsafe_hash* as an alias for *hash* (for :pep:`681` compliance). """ eq_, order_ = _determine_attrs_eq_order(cmp, eq, order, None) - hash_ = hash # work around the lack of nonlocal + + # unsafe_hash takes precedence due to PEP 681. + if unsafe_hash is not None: + hash = unsafe_hash if isinstance(on_setattr, (list, tuple)): on_setattr = setters.pipe(*on_setattr) @@ -1527,14 +1535,14 @@ def attrs( builder.add_setattr() + nonlocal hash if ( - hash_ is None + hash is None and auto_detect is True and _has_own_attribute(cls, "__hash__") ): hash = False - else: - hash = hash_ + if hash is not True and hash is not False and hash is not None: # Can't use `hash in` because 1 == True for example. raise TypeError( diff --git a/src/attr/_next_gen.py b/src/attr/_next_gen.py index 79e8a44d..c59d8486 100644 --- a/src/attr/_next_gen.py +++ b/src/attr/_next_gen.py @@ -26,6 +26,7 @@ def define( *, these=None, repr=None, + unsafe_hash=None, hash=None, init=None, slots=True, @@ -81,6 +82,8 @@ def define( .. versionadded:: 20.1.0 .. versionchanged:: 21.3.0 Converters are also run ``on_setattr``. + .. versionadded:: 22.2.0 + *unsafe_hash* as an alias for *hash* (for :pep:`681` compliance). """ def do_it(cls, auto_attribs): @@ -89,6 +92,7 @@ def define( these=these, repr=repr, hash=hash, + unsafe_hash=unsafe_hash, init=init, slots=slots, frozen=frozen, diff --git a/tests/dataclass_transform_example.py b/tests/dataclass_transform_example.py index af09e8a5..c65ea613 100644 --- a/tests/dataclass_transform_example.py +++ b/tests/dataclass_transform_example.py @@ -55,3 +55,9 @@ class AliasedField: af = AliasedField(42) reveal_type(af.__init__) # noqa + + +# unsafe_hash is accepted +@attrs.define(unsafe_hash=True) +class Hashable: + pass diff --git a/tests/test_functional.py b/tests/test_functional.py index 617266c1..ad60f2ee 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -739,3 +739,14 @@ class TestFunctional: assert "_setattr('x', x)" in src assert "_setattr('y', y)" in src assert object.__setattr__ != D.__setattr__ + + def test_unsafe_hash(self, slots): + """ + attr.s(unsafe_hash=True) makes a class hashable. + """ + + @attr.s(slots=slots, unsafe_hash=True) + class Hashable: + pass + + assert hash(Hashable()) diff --git a/tests/typing_example.py b/tests/typing_example.py index b4b617e0..40080865 100644 --- a/tests/typing_example.py +++ b/tests/typing_example.py @@ -452,3 +452,8 @@ def accessing_from_attrs() -> None: foo = object if attrs.has(foo) or attr.has(foo): foo.__attrs_attrs__ + + +@attrs.define(unsafe_hash=True) +class Hashable: + pass