gh-114203: Optimise simple recursive critical sections (#128126)

Add a fast path to (single-mutex) critical section locking _iff_ the mutex is already held by the currently active, top-most critical section of this thread. This can matter a lot for indirectly recursive critical sections without intervening critical sections.
2024-12-23 04:31:33 -08:00 · 2024-12-23 04:31:33 -08:00 · 180d417e9f
parent 831b6de6d7
commit 180d417e9f
3 changed files with 32 additions and 7 deletions
--- a/Include/internal/pycore_critical_section.h
+++ b/Include/internal/pycore_critical_section.h
@ -145,6 +145,12 @@ _PyCriticalSection_Pop(PyCriticalSection *c)
 static inline void
 _PyCriticalSection_End(PyCriticalSection *c)
 {
    // If the mutex is NULL, we used the fast path in
    // _PyCriticalSection_BeginSlow for locks already held in the top-most
    // critical section, and we shouldn't unlock or pop this critical section.
    if (c->_cs_mutex == NULL) {
        return;
    }
    PyMutex_Unlock(c->_cs_mutex);
    _PyCriticalSection_Pop(c);
 }
@ -199,6 +205,14 @@ _PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b)
 static inline void
 _PyCriticalSection2_End(PyCriticalSection2 *c)
 {
    // if mutex1 is NULL, we used the fast path in
    // _PyCriticalSection_BeginSlow for mutexes that are already held,
    // which should only happen when mutex1 and mutex2 were the same mutex,
    // and mutex2 should also be NULL.
    if (c->_cs_base._cs_mutex == NULL) {
        assert(c->_cs_mutex2 == NULL);
        return;
    }
    if (c->_cs_mutex2) {
        PyMutex_Unlock(c->_cs_mutex2);
    }
--- a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-20-23-07-33.gh-issue-114203.84NgoW.rst
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-20-23-07-33.gh-issue-114203.84NgoW.rst
@ -0,0 +1 @@
 Optimize ``Py_BEGIN_CRITICAL_SECTION`` for simple recursive calls.
--- a/Python/critical_section.c
+++ b/Python/critical_section.c
@ -8,11 +8,28 @@ static_assert(_Alignof(PyCriticalSection) >= 4,
              "critical section must be aligned to at least 4 bytes");
 #endif
 #ifdef Py_GIL_DISABLED
 static PyCriticalSection *
 untag_critical_section(uintptr_t tag)
 {
    return (PyCriticalSection *)(tag & ~_Py_CRITICAL_SECTION_MASK);
 }
 #endif
 void
 _PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m)
 {
 #ifdef Py_GIL_DISABLED
    PyThreadState *tstate = _PyThreadState_GET();
    // As an optimisation for locking the same object recursively, skip
    // locking if the mutex is currently locked by the top-most critical
    // section.
    if (tstate->critical_section &&
        untag_critical_section(tstate->critical_section)->_cs_mutex == m) {
        c->_cs_mutex = NULL;
        c->_cs_prev = 0;
        return;
    }
    c->_cs_mutex = NULL;
    c->_cs_prev = (uintptr_t)tstate->critical_section;
    tstate->critical_section = (uintptr_t)c;
@ -42,13 +59,6 @@ _PyCriticalSection2_BeginSlow(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2,
 #endif
 }
 #ifdef Py_GIL_DISABLED
 static PyCriticalSection *
 untag_critical_section(uintptr_t tag)
 {
    return (PyCriticalSection *)(tag & ~_Py_CRITICAL_SECTION_MASK);
 }
 #endif
 // Release all locks held by critical sections. This is called by
 // _PyThreadState_Detach.
		`@ -0,0 +1 @@`
							Optimize ``Py_BEGIN_CRITICAL_SECTION`` for simple recursive calls.