gh-114203: Optimise simple recursive critical sections (#128126)

Add a fast path to (single-mutex) critical section locking _iff_ the mutex
is already held by the currently active, top-most critical section of this
thread. This can matter a lot for indirectly recursive critical sections
without intervening critical sections.
This commit is contained in:
T. Wouters 2024-12-23 04:31:33 -08:00 committed by GitHub
parent 831b6de6d7
commit 180d417e9f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 32 additions and 7 deletions

View File

@ -145,6 +145,12 @@ _PyCriticalSection_Pop(PyCriticalSection *c)
static inline void static inline void
_PyCriticalSection_End(PyCriticalSection *c) _PyCriticalSection_End(PyCriticalSection *c)
{ {
// If the mutex is NULL, we used the fast path in
// _PyCriticalSection_BeginSlow for locks already held in the top-most
// critical section, and we shouldn't unlock or pop this critical section.
if (c->_cs_mutex == NULL) {
return;
}
PyMutex_Unlock(c->_cs_mutex); PyMutex_Unlock(c->_cs_mutex);
_PyCriticalSection_Pop(c); _PyCriticalSection_Pop(c);
} }
@ -199,6 +205,14 @@ _PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b)
static inline void static inline void
_PyCriticalSection2_End(PyCriticalSection2 *c) _PyCriticalSection2_End(PyCriticalSection2 *c)
{ {
// if mutex1 is NULL, we used the fast path in
// _PyCriticalSection_BeginSlow for mutexes that are already held,
// which should only happen when mutex1 and mutex2 were the same mutex,
// and mutex2 should also be NULL.
if (c->_cs_base._cs_mutex == NULL) {
assert(c->_cs_mutex2 == NULL);
return;
}
if (c->_cs_mutex2) { if (c->_cs_mutex2) {
PyMutex_Unlock(c->_cs_mutex2); PyMutex_Unlock(c->_cs_mutex2);
} }

View File

@ -0,0 +1 @@
Optimize ``Py_BEGIN_CRITICAL_SECTION`` for simple recursive calls.

View File

@ -8,11 +8,28 @@ static_assert(_Alignof(PyCriticalSection) >= 4,
"critical section must be aligned to at least 4 bytes"); "critical section must be aligned to at least 4 bytes");
#endif #endif
#ifdef Py_GIL_DISABLED
static PyCriticalSection *
untag_critical_section(uintptr_t tag)
{
return (PyCriticalSection *)(tag & ~_Py_CRITICAL_SECTION_MASK);
}
#endif
void void
_PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m) _PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m)
{ {
#ifdef Py_GIL_DISABLED #ifdef Py_GIL_DISABLED
PyThreadState *tstate = _PyThreadState_GET(); PyThreadState *tstate = _PyThreadState_GET();
// As an optimisation for locking the same object recursively, skip
// locking if the mutex is currently locked by the top-most critical
// section.
if (tstate->critical_section &&
untag_critical_section(tstate->critical_section)->_cs_mutex == m) {
c->_cs_mutex = NULL;
c->_cs_prev = 0;
return;
}
c->_cs_mutex = NULL; c->_cs_mutex = NULL;
c->_cs_prev = (uintptr_t)tstate->critical_section; c->_cs_prev = (uintptr_t)tstate->critical_section;
tstate->critical_section = (uintptr_t)c; tstate->critical_section = (uintptr_t)c;
@ -42,13 +59,6 @@ _PyCriticalSection2_BeginSlow(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2,
#endif #endif
} }
#ifdef Py_GIL_DISABLED
static PyCriticalSection *
untag_critical_section(uintptr_t tag)
{
return (PyCriticalSection *)(tag & ~_Py_CRITICAL_SECTION_MASK);
}
#endif
// Release all locks held by critical sections. This is called by // Release all locks held by critical sections. This is called by
// _PyThreadState_Detach. // _PyThreadState_Detach.