ansible: new multiplexer/workers configuration
Following on from 152effc26c9a5918cb7ead7a97fe7fa7f81b6764, * Pin mux to CPU 0 * Pin top-level CPU 1 * Pin workers sequentially to CPU 2..n Nets 19.5% improvement on issue_140__thread_pileup.yml when targetting 64 Docker containers on the same 8 core/16 thread machine. Before (prior to last scheme, no affinity at all): 2294528.731458 task-clock (msec) # 6.443 CPUs utilized 10,429,745 context-switches # 0.005 M/sec 2,049,618 cpu-migrations # 0.893 K/sec 8,258,952 page-faults # 0.004 M/sec 5,532,719,253,824 cycles # 2.411 GHz (83.35%) 3,267,471,616,230 instructions # 0.59 insn per cycle # 1.22 stalled cycles per insn (83.35%) 662,006,455,943 branches # 288.515 M/sec (83.33%) 39,453,895,977 branch-misses # 5.96% of all branches (83.37%) 356.148064576 seconds time elapsed After: 2226463.958975 task-clock (msec) # 7.784 CPUs utilized 9,831,466 context-switches # 0.004 M/sec 180,065 cpu-migrations # 0.081 K/sec 5,082,278 page-faults # 0.002 M/sec 5,592,548,587,259 cycles # 2.512 GHz (83.35%) 3,135,038,855,414 instructions # 0.56 insn per cycle # 1.32 stalled cycles per insn (83.32%) 636,397,509,232 branches # 285.833 M/sec (83.30%) 39,135,441,790 branch-misses # 6.15% of all branches (83.35%) 286.036681644 seconds time elapsed
This commit is contained in:
parent
8f6e6b3940
commit
c6d5aa29ba
|
@ -0,0 +1,132 @@
|
|||
# Copyright 2017, David Wilson
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice,
|
||||
# this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the copyright holder nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software without
|
||||
# specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
import ctypes
|
||||
import mmap
|
||||
import multiprocessing
|
||||
import os
|
||||
import struct
|
||||
|
||||
import mitogen.parent
|
||||
|
||||
|
||||
try:
|
||||
_libc = ctypes.CDLL(None, use_errno=True)
|
||||
_strerror = _libc.strerror
|
||||
_strerror.restype = ctypes.c_char_p
|
||||
_pthread_mutex_init = _libc.pthread_mutex_init
|
||||
_pthread_mutex_lock = _libc.pthread_mutex_lock
|
||||
_pthread_mutex_unlock = _libc.pthread_mutex_unlock
|
||||
_sched_setaffinity = _libc.sched_setaffinity
|
||||
except (OSError, AttributeError):
|
||||
_libc = None
|
||||
|
||||
|
||||
class pthread_mutex_t(ctypes.Structure):
|
||||
_fields_ = [
|
||||
('data', ctypes.c_uint8 * 512),
|
||||
]
|
||||
|
||||
def init(self):
|
||||
if _pthread_mutex_init(self.data, 0):
|
||||
raise Exception(_strerror(ctypes.get_errno()))
|
||||
|
||||
def acquire(self):
|
||||
if _pthread_mutex_lock(self.data):
|
||||
raise Exception(_strerror(ctypes.get_errno()))
|
||||
|
||||
def release(self):
|
||||
if _pthread_mutex_unlock(self.data):
|
||||
raise Exception(_strerror(ctypes.get_errno()))
|
||||
|
||||
|
||||
class State(ctypes.Structure):
|
||||
_fields_ = [
|
||||
('lock', pthread_mutex_t),
|
||||
('counter', ctypes.c_uint8),
|
||||
]
|
||||
|
||||
|
||||
class Manager(object):
|
||||
"""
|
||||
Bind this process to a randomly selected CPU. If done prior to starting
|
||||
threads, all threads will be bound to the same CPU. This call is a no-op on
|
||||
systems other than Linux.
|
||||
|
||||
A hook is installed that causes `reset_affinity(clear=True)` to run in the
|
||||
child of any process created with :func:`mitogen.parent.detach_popen`,
|
||||
ensuring CPU-intensive children like SSH are not forced to share the same
|
||||
core as the (otherwise potentially very busy) parent.
|
||||
|
||||
Threads bound to the same CPU share cache and experience the lowest
|
||||
possible inter-thread roundtrip latency, for example ensuring the minimum
|
||||
possible time required for :class:`mitogen.service.Pool` to interact with
|
||||
:class:`mitogen.core.Broker`, as required for every message transmitted or
|
||||
received.
|
||||
|
||||
Binding threads of a Python process to one CPU makes sense, as they are
|
||||
otherwise unable to operate in parallel, and all must acquire the same lock
|
||||
prior to executing.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.mem = mmap.mmap(-1, 4096)
|
||||
self.state = State.from_buffer(self.mem)
|
||||
self.state.lock.init()
|
||||
|
||||
def _set_affinity(self, mask):
|
||||
mitogen.parent._preexec_hook = self.clear
|
||||
s = struct.pack('L', mask)
|
||||
_sched_setaffinity(os.getpid(), len(s), s)
|
||||
|
||||
def cpu_count(self):
|
||||
return multiprocessing.cpu_count()
|
||||
|
||||
def clear(self):
|
||||
"""
|
||||
Clear any prior binding, except for reserved CPUs.
|
||||
"""
|
||||
self._set_affinity(0xffffffff & ~3)
|
||||
|
||||
def set_cpu(self, cpu):
|
||||
"""
|
||||
Bind to 0-based `cpu`.
|
||||
"""
|
||||
self._set_affinity(1 << cpu)
|
||||
|
||||
def assign(self):
|
||||
self.state.lock.acquire()
|
||||
try:
|
||||
n = self.state.counter
|
||||
self.state.counter += 1
|
||||
finally:
|
||||
self.state.lock.release()
|
||||
|
||||
self.set_cpu(2 + (n % (self.cpu_count() - 2)))
|
||||
|
||||
|
||||
manager = Manager()
|
|
@ -56,6 +56,7 @@ import ansible_mitogen.logging
|
|||
import ansible_mitogen.services
|
||||
|
||||
from mitogen.core import b
|
||||
import ansible_mitogen.affinity
|
||||
|
||||
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
@ -172,11 +173,12 @@ class MuxProcess(object):
|
|||
if _init_logging:
|
||||
ansible_mitogen.logging.setup()
|
||||
if cls.child_pid:
|
||||
ansible_mitogen.affinity.manager.set_cpu(1)
|
||||
cls.child_sock.close()
|
||||
cls.child_sock = None
|
||||
mitogen.core.io_op(cls.worker_sock.recv, 1)
|
||||
else:
|
||||
mitogen.utils.reset_affinity()
|
||||
ansible_mitogen.affinity.manager.set_cpu(0)
|
||||
cls.worker_sock.close()
|
||||
cls.worker_sock = None
|
||||
self = cls()
|
||||
|
|
|
@ -31,6 +31,7 @@ import os
|
|||
import threading
|
||||
|
||||
import mitogen.core
|
||||
import ansible_mitogen.affinity
|
||||
import ansible_mitogen.loaders
|
||||
import ansible_mitogen.mixins
|
||||
import ansible_mitogen.process
|
||||
|
@ -105,6 +106,7 @@ def wrap_worker__run(*args, **kwargs):
|
|||
import signal
|
||||
signal.signal(signal.SIGTERM, signal.SIG_IGN)
|
||||
|
||||
ansible_mitogen.affinity.manager.assign()
|
||||
return mitogen.core._profile_hook('WorkerProcess',
|
||||
lambda: worker__run(*args, **kwargs)
|
||||
)
|
||||
|
|
|
@ -28,17 +28,9 @@
|
|||
|
||||
import datetime
|
||||
import logging
|
||||
import multiprocessing
|
||||
import os
|
||||
import random
|
||||
import struct
|
||||
import sys
|
||||
|
||||
try:
|
||||
import ctypes
|
||||
except ImportError:
|
||||
ctypes = None
|
||||
|
||||
import mitogen
|
||||
import mitogen.core
|
||||
import mitogen.master
|
||||
|
@ -53,50 +45,6 @@ if mitogen.core.PY3:
|
|||
else:
|
||||
iteritems = dict.iteritems
|
||||
|
||||
if ctypes:
|
||||
try:
|
||||
_libc = ctypes.CDLL(None)
|
||||
_sched_setaffinity = _libc.sched_setaffinity
|
||||
except (OSError, AttributeError):
|
||||
_sched_setaffinity = None
|
||||
|
||||
|
||||
def reset_affinity(clear=False):
|
||||
"""
|
||||
Bind this process to a randomly selected CPU. If done prior to starting
|
||||
threads, all threads will be bound to the same CPU. This call is a no-op on
|
||||
systems other than Linux.
|
||||
|
||||
:param bool clear:
|
||||
If :data:`True`, clear any prior binding.
|
||||
|
||||
A hook is installed that causes `reset_affinity(clear=True)` to run in the
|
||||
child of any process created with :func:`mitogen.parent.detach_popen`,
|
||||
ensuring CPU-intensive children like SSH are not forced to share the same
|
||||
core as the (otherwise potentially very busy) parent.
|
||||
|
||||
Threads bound to the same CPU share cache and experience the lowest
|
||||
possible inter-thread roundtrip latency, for example ensuring the minimum
|
||||
possible time required for :class:`mitogen.service.Pool` to interact with
|
||||
:class:`mitogen.core.Broker`, as required for every message transmitted or
|
||||
received.
|
||||
|
||||
Binding threads of a Python process to one CPU makes sense, as they are
|
||||
otherwise unable to operate in parallel, and all must acquire the same lock
|
||||
prior to executing.
|
||||
"""
|
||||
if _sched_setaffinity is None:
|
||||
return
|
||||
|
||||
if clear:
|
||||
mask = 0xffffffff
|
||||
else:
|
||||
mask = 1 << random.randint(0, multiprocessing.cpu_count() - 1)
|
||||
|
||||
s = struct.pack('L', mask)
|
||||
_sched_setaffinity(os.getpid(), len(s), s)
|
||||
mitogen.parent._preexec_hook = lambda: reset_affinity(clear=True)
|
||||
|
||||
|
||||
def setup_gil():
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue