From f802c8bf872ab882d3056675acc79c950fe5b93c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 18 Dec 2024 16:34:31 +0100 Subject: [PATCH] gh-128013: Convert unicodeobject.c macros to functions (#128061) Convert unicodeobject.c macros to static inline functions. * Add _PyUnicode_SET_UTF8() and _PyUnicode_SET_UTF8_LENGTH() macros. * Add PyUnicode_HASH() and PyUnicode_SET_HASH() macros. * Remove unused _PyUnicode_KIND() and _PyUnicode_GET_LENGTH() macros. --- Objects/unicodeobject.c | 123 +++++++++++++++++++++++++--------------- 1 file changed, 78 insertions(+), 45 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index b7aeb06d32b..53be6f5b901 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -112,20 +112,42 @@ NOTE: In the interpreter's initialization phase, some globals are currently # define _PyUnicode_CHECK(op) PyUnicode_Check(op) #endif -#define _PyUnicode_UTF8(op) \ - (_PyCompactUnicodeObject_CAST(op)->utf8) -#define PyUnicode_UTF8(op) \ - (assert(_PyUnicode_CHECK(op)), \ - PyUnicode_IS_COMPACT_ASCII(op) ? \ - ((char*)(_PyASCIIObject_CAST(op) + 1)) : \ - _PyUnicode_UTF8(op)) -#define _PyUnicode_UTF8_LENGTH(op) \ - (_PyCompactUnicodeObject_CAST(op)->utf8_length) -#define PyUnicode_UTF8_LENGTH(op) \ - (assert(_PyUnicode_CHECK(op)), \ - PyUnicode_IS_COMPACT_ASCII(op) ? \ - _PyASCIIObject_CAST(op)->length : \ - _PyUnicode_UTF8_LENGTH(op)) +static inline char* _PyUnicode_UTF8(PyObject *op) +{ + return (_PyCompactUnicodeObject_CAST(op)->utf8); +} + +static inline char* PyUnicode_UTF8(PyObject *op) +{ + assert(_PyUnicode_CHECK(op)); + if (PyUnicode_IS_COMPACT_ASCII(op)) { + return ((char*)(_PyASCIIObject_CAST(op) + 1)); + } + else { + return _PyUnicode_UTF8(op); + } +} + +static inline void PyUnicode_SET_UTF8(PyObject *op, char *utf8) +{ + _PyCompactUnicodeObject_CAST(op)->utf8 = utf8; +} + +static inline Py_ssize_t PyUnicode_UTF8_LENGTH(PyObject *op) +{ + assert(_PyUnicode_CHECK(op)); + if (PyUnicode_IS_COMPACT_ASCII(op)) { + return _PyASCIIObject_CAST(op)->length; + } + else { + return _PyCompactUnicodeObject_CAST(op)->utf8_length; + } +} + +static inline void PyUnicode_SET_UTF8_LENGTH(PyObject *op, Py_ssize_t length) +{ + _PyCompactUnicodeObject_CAST(op)->utf8_length = length; +} #define _PyUnicode_LENGTH(op) \ (_PyASCIIObject_CAST(op)->length) @@ -133,26 +155,37 @@ NOTE: In the interpreter's initialization phase, some globals are currently (_PyASCIIObject_CAST(op)->state) #define _PyUnicode_HASH(op) \ (_PyASCIIObject_CAST(op)->hash) -#define _PyUnicode_KIND(op) \ - (assert(_PyUnicode_CHECK(op)), \ - _PyASCIIObject_CAST(op)->state.kind) -#define _PyUnicode_GET_LENGTH(op) \ - (assert(_PyUnicode_CHECK(op)), \ - _PyASCIIObject_CAST(op)->length) + +static inline Py_hash_t PyUnicode_HASH(PyObject *op) +{ + assert(_PyUnicode_CHECK(op)); + return FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash); +} + +static inline void PyUnicode_SET_HASH(PyObject *op, Py_hash_t hash) +{ + FT_ATOMIC_STORE_SSIZE_RELAXED(_PyASCIIObject_CAST(op)->hash, hash); +} + #define _PyUnicode_DATA_ANY(op) \ (_PyUnicodeObject_CAST(op)->data.any) -#define _PyUnicode_SHARE_UTF8(op) \ - (assert(_PyUnicode_CHECK(op)), \ - assert(!PyUnicode_IS_COMPACT_ASCII(op)), \ - (_PyUnicode_UTF8(op) == PyUnicode_DATA(op))) +static inline int _PyUnicode_SHARE_UTF8(PyObject *op) +{ + assert(_PyUnicode_CHECK(op)); + assert(!PyUnicode_IS_COMPACT_ASCII(op)); + return (_PyUnicode_UTF8(op) == PyUnicode_DATA(op)); +} /* true if the Unicode object has an allocated UTF-8 memory block (not shared with other data) */ -#define _PyUnicode_HAS_UTF8_MEMORY(op) \ - ((!PyUnicode_IS_COMPACT_ASCII(op) \ - && _PyUnicode_UTF8(op) \ - && _PyUnicode_UTF8(op) != PyUnicode_DATA(op))) +static inline int _PyUnicode_HAS_UTF8_MEMORY(PyObject *op) +{ + return (!PyUnicode_IS_COMPACT_ASCII(op) + && _PyUnicode_UTF8(op) != NULL + && _PyUnicode_UTF8(op) != PyUnicode_DATA(op)); +} + /* Generic helper macro to convert characters of different types. from_type and to_type have to be valid type names, begin and end @@ -1123,8 +1156,8 @@ resize_compact(PyObject *unicode, Py_ssize_t length) if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { PyMem_Free(_PyUnicode_UTF8(unicode)); - _PyUnicode_UTF8(unicode) = NULL; - _PyUnicode_UTF8_LENGTH(unicode) = 0; + PyUnicode_SET_UTF8(unicode, NULL); + PyUnicode_SET_UTF8_LENGTH(unicode, 0); } #ifdef Py_TRACE_REFS _Py_ForgetReference(unicode); @@ -1177,8 +1210,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode)) { PyMem_Free(_PyUnicode_UTF8(unicode)); - _PyUnicode_UTF8(unicode) = NULL; - _PyUnicode_UTF8_LENGTH(unicode) = 0; + PyUnicode_SET_UTF8(unicode, NULL); + PyUnicode_SET_UTF8_LENGTH(unicode, 0); } data = (PyObject *)PyObject_Realloc(data, new_size); @@ -1188,8 +1221,8 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) } _PyUnicode_DATA_ANY(unicode) = data; if (share_utf8) { - _PyUnicode_UTF8(unicode) = data; - _PyUnicode_UTF8_LENGTH(unicode) = length; + PyUnicode_SET_UTF8(unicode, data); + PyUnicode_SET_UTF8_LENGTH(unicode, length); } _PyUnicode_LENGTH(unicode) = length; PyUnicode_WRITE(PyUnicode_KIND(unicode), data, length, 0); @@ -1769,7 +1802,7 @@ unicode_modifiable(PyObject *unicode) assert(_PyUnicode_CHECK(unicode)); if (Py_REFCNT(unicode) != 1) return 0; - if (FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(unicode)) != -1) + if (PyUnicode_HASH(unicode) != -1) return 0; if (PyUnicode_CHECK_INTERNED(unicode)) return 0; @@ -5862,8 +5895,8 @@ unicode_fill_utf8(PyObject *unicode) PyErr_NoMemory(); return -1; } - _PyUnicode_UTF8(unicode) = cache; - _PyUnicode_UTF8_LENGTH(unicode) = len; + PyUnicode_SET_UTF8(unicode, cache); + PyUnicode_SET_UTF8_LENGTH(unicode, len); memcpy(cache, start, len); cache[len] = '\0'; _PyBytesWriter_Dealloc(&writer); @@ -11434,9 +11467,9 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right) return 0; } - Py_hash_t right_hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(right_uni)); + Py_hash_t right_hash = PyUnicode_HASH(right_uni); assert(right_hash != -1); - Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(left)); + Py_hash_t hash = PyUnicode_HASH(left); if (hash != -1 && hash != right_hash) { return 0; } @@ -11916,14 +11949,14 @@ unicode_hash(PyObject *self) #ifdef Py_DEBUG assert(_Py_HashSecret_Initialized); #endif - Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(self)); + Py_hash_t hash = PyUnicode_HASH(self); if (hash != -1) { return hash; } x = Py_HashBuffer(PyUnicode_DATA(self), PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self)); - FT_ATOMIC_STORE_SSIZE_RELAXED(_PyUnicode_HASH(self), x); + PyUnicode_SET_HASH(self, x); return x; } @@ -15427,8 +15460,8 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode) _PyUnicode_STATE(self).compact = 0; _PyUnicode_STATE(self).ascii = _PyUnicode_STATE(unicode).ascii; _PyUnicode_STATE(self).statically_allocated = 0; - _PyUnicode_UTF8_LENGTH(self) = 0; - _PyUnicode_UTF8(self) = NULL; + PyUnicode_SET_UTF8_LENGTH(self, 0); + PyUnicode_SET_UTF8(self, NULL); _PyUnicode_DATA_ANY(self) = NULL; share_utf8 = 0; @@ -15458,8 +15491,8 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode) _PyUnicode_DATA_ANY(self) = data; if (share_utf8) { - _PyUnicode_UTF8_LENGTH(self) = length; - _PyUnicode_UTF8(self) = data; + PyUnicode_SET_UTF8_LENGTH(self, length); + PyUnicode_SET_UTF8(self, data); } memcpy(data, PyUnicode_DATA(unicode), kind * (length + 1));