ansible: new multiplexer/workers configuration
Following on from 152effc26c9a5918cb7ead7a97fe7fa7f81b6764, * Pin mux to CPU 0 * Pin top-level CPU 1 * Pin workers sequentially to CPU 2..n Nets 19.5% improvement on issue_140__thread_pileup.yml when targetting 64 Docker containers on the same 8 core/16 thread machine. Before (prior to last scheme, no affinity at all): 2294528.731458 task-clock (msec) # 6.443 CPUs utilized 10,429,745 context-switches # 0.005 M/sec 2,049,618 cpu-migrations # 0.893 K/sec 8,258,952 page-faults # 0.004 M/sec 5,532,719,253,824 cycles # 2.411 GHz (83.35%) 3,267,471,616,230 instructions # 0.59 insn per cycle # 1.22 stalled cycles per insn (83.35%) 662,006,455,943 branches # 288.515 M/sec (83.33%) 39,453,895,977 branch-misses # 5.96% of all branches (83.37%) 356.148064576 seconds time elapsed After: 2226463.958975 task-clock (msec) # 7.784 CPUs utilized 9,831,466 context-switches # 0.004 M/sec 180,065 cpu-migrations # 0.081 K/sec 5,082,278 page-faults # 0.002 M/sec 5,592,548,587,259 cycles # 2.512 GHz (83.35%) 3,135,038,855,414 instructions # 0.56 insn per cycle # 1.32 stalled cycles per insn (83.32%) 636,397,509,232 branches # 285.833 M/sec (83.30%) 39,135,441,790 branch-misses # 6.15% of all branches (83.35%) 286.036681644 seconds time elapsed
This commit is contained in:
parent
8f6e6b3940
commit
c6d5aa29ba
|
@ -0,0 +1,132 @@
|
||||||
|
# Copyright 2017, David Wilson
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions are met:
|
||||||
|
#
|
||||||
|
# 1. Redistributions of source code must retain the above copyright notice,
|
||||||
|
# this list of conditions and the following disclaimer.
|
||||||
|
#
|
||||||
|
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
# this list of conditions and the following disclaimer in the documentation
|
||||||
|
# and/or other materials provided with the distribution.
|
||||||
|
#
|
||||||
|
# 3. Neither the name of the copyright holder nor the names of its contributors
|
||||||
|
# may be used to endorse or promote products derived from this software without
|
||||||
|
# specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
|
||||||
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
import ctypes
|
||||||
|
import mmap
|
||||||
|
import multiprocessing
|
||||||
|
import os
|
||||||
|
import struct
|
||||||
|
|
||||||
|
import mitogen.parent
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
_libc = ctypes.CDLL(None, use_errno=True)
|
||||||
|
_strerror = _libc.strerror
|
||||||
|
_strerror.restype = ctypes.c_char_p
|
||||||
|
_pthread_mutex_init = _libc.pthread_mutex_init
|
||||||
|
_pthread_mutex_lock = _libc.pthread_mutex_lock
|
||||||
|
_pthread_mutex_unlock = _libc.pthread_mutex_unlock
|
||||||
|
_sched_setaffinity = _libc.sched_setaffinity
|
||||||
|
except (OSError, AttributeError):
|
||||||
|
_libc = None
|
||||||
|
|
||||||
|
|
||||||
|
class pthread_mutex_t(ctypes.Structure):
|
||||||
|
_fields_ = [
|
||||||
|
('data', ctypes.c_uint8 * 512),
|
||||||
|
]
|
||||||
|
|
||||||
|
def init(self):
|
||||||
|
if _pthread_mutex_init(self.data, 0):
|
||||||
|
raise Exception(_strerror(ctypes.get_errno()))
|
||||||
|
|
||||||
|
def acquire(self):
|
||||||
|
if _pthread_mutex_lock(self.data):
|
||||||
|
raise Exception(_strerror(ctypes.get_errno()))
|
||||||
|
|
||||||
|
def release(self):
|
||||||
|
if _pthread_mutex_unlock(self.data):
|
||||||
|
raise Exception(_strerror(ctypes.get_errno()))
|
||||||
|
|
||||||
|
|
||||||
|
class State(ctypes.Structure):
|
||||||
|
_fields_ = [
|
||||||
|
('lock', pthread_mutex_t),
|
||||||
|
('counter', ctypes.c_uint8),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class Manager(object):
|
||||||
|
"""
|
||||||
|
Bind this process to a randomly selected CPU. If done prior to starting
|
||||||
|
threads, all threads will be bound to the same CPU. This call is a no-op on
|
||||||
|
systems other than Linux.
|
||||||
|
|
||||||
|
A hook is installed that causes `reset_affinity(clear=True)` to run in the
|
||||||
|
child of any process created with :func:`mitogen.parent.detach_popen`,
|
||||||
|
ensuring CPU-intensive children like SSH are not forced to share the same
|
||||||
|
core as the (otherwise potentially very busy) parent.
|
||||||
|
|
||||||
|
Threads bound to the same CPU share cache and experience the lowest
|
||||||
|
possible inter-thread roundtrip latency, for example ensuring the minimum
|
||||||
|
possible time required for :class:`mitogen.service.Pool` to interact with
|
||||||
|
:class:`mitogen.core.Broker`, as required for every message transmitted or
|
||||||
|
received.
|
||||||
|
|
||||||
|
Binding threads of a Python process to one CPU makes sense, as they are
|
||||||
|
otherwise unable to operate in parallel, and all must acquire the same lock
|
||||||
|
prior to executing.
|
||||||
|
"""
|
||||||
|
def __init__(self):
|
||||||
|
self.mem = mmap.mmap(-1, 4096)
|
||||||
|
self.state = State.from_buffer(self.mem)
|
||||||
|
self.state.lock.init()
|
||||||
|
|
||||||
|
def _set_affinity(self, mask):
|
||||||
|
mitogen.parent._preexec_hook = self.clear
|
||||||
|
s = struct.pack('L', mask)
|
||||||
|
_sched_setaffinity(os.getpid(), len(s), s)
|
||||||
|
|
||||||
|
def cpu_count(self):
|
||||||
|
return multiprocessing.cpu_count()
|
||||||
|
|
||||||
|
def clear(self):
|
||||||
|
"""
|
||||||
|
Clear any prior binding, except for reserved CPUs.
|
||||||
|
"""
|
||||||
|
self._set_affinity(0xffffffff & ~3)
|
||||||
|
|
||||||
|
def set_cpu(self, cpu):
|
||||||
|
"""
|
||||||
|
Bind to 0-based `cpu`.
|
||||||
|
"""
|
||||||
|
self._set_affinity(1 << cpu)
|
||||||
|
|
||||||
|
def assign(self):
|
||||||
|
self.state.lock.acquire()
|
||||||
|
try:
|
||||||
|
n = self.state.counter
|
||||||
|
self.state.counter += 1
|
||||||
|
finally:
|
||||||
|
self.state.lock.release()
|
||||||
|
|
||||||
|
self.set_cpu(2 + (n % (self.cpu_count() - 2)))
|
||||||
|
|
||||||
|
|
||||||
|
manager = Manager()
|
|
@ -56,6 +56,7 @@ import ansible_mitogen.logging
|
||||||
import ansible_mitogen.services
|
import ansible_mitogen.services
|
||||||
|
|
||||||
from mitogen.core import b
|
from mitogen.core import b
|
||||||
|
import ansible_mitogen.affinity
|
||||||
|
|
||||||
|
|
||||||
LOG = logging.getLogger(__name__)
|
LOG = logging.getLogger(__name__)
|
||||||
|
@ -172,11 +173,12 @@ class MuxProcess(object):
|
||||||
if _init_logging:
|
if _init_logging:
|
||||||
ansible_mitogen.logging.setup()
|
ansible_mitogen.logging.setup()
|
||||||
if cls.child_pid:
|
if cls.child_pid:
|
||||||
|
ansible_mitogen.affinity.manager.set_cpu(1)
|
||||||
cls.child_sock.close()
|
cls.child_sock.close()
|
||||||
cls.child_sock = None
|
cls.child_sock = None
|
||||||
mitogen.core.io_op(cls.worker_sock.recv, 1)
|
mitogen.core.io_op(cls.worker_sock.recv, 1)
|
||||||
else:
|
else:
|
||||||
mitogen.utils.reset_affinity()
|
ansible_mitogen.affinity.manager.set_cpu(0)
|
||||||
cls.worker_sock.close()
|
cls.worker_sock.close()
|
||||||
cls.worker_sock = None
|
cls.worker_sock = None
|
||||||
self = cls()
|
self = cls()
|
||||||
|
|
|
@ -31,6 +31,7 @@ import os
|
||||||
import threading
|
import threading
|
||||||
|
|
||||||
import mitogen.core
|
import mitogen.core
|
||||||
|
import ansible_mitogen.affinity
|
||||||
import ansible_mitogen.loaders
|
import ansible_mitogen.loaders
|
||||||
import ansible_mitogen.mixins
|
import ansible_mitogen.mixins
|
||||||
import ansible_mitogen.process
|
import ansible_mitogen.process
|
||||||
|
@ -105,6 +106,7 @@ def wrap_worker__run(*args, **kwargs):
|
||||||
import signal
|
import signal
|
||||||
signal.signal(signal.SIGTERM, signal.SIG_IGN)
|
signal.signal(signal.SIGTERM, signal.SIG_IGN)
|
||||||
|
|
||||||
|
ansible_mitogen.affinity.manager.assign()
|
||||||
return mitogen.core._profile_hook('WorkerProcess',
|
return mitogen.core._profile_hook('WorkerProcess',
|
||||||
lambda: worker__run(*args, **kwargs)
|
lambda: worker__run(*args, **kwargs)
|
||||||
)
|
)
|
||||||
|
|
|
@ -28,17 +28,9 @@
|
||||||
|
|
||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
import multiprocessing
|
|
||||||
import os
|
import os
|
||||||
import random
|
|
||||||
import struct
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
try:
|
|
||||||
import ctypes
|
|
||||||
except ImportError:
|
|
||||||
ctypes = None
|
|
||||||
|
|
||||||
import mitogen
|
import mitogen
|
||||||
import mitogen.core
|
import mitogen.core
|
||||||
import mitogen.master
|
import mitogen.master
|
||||||
|
@ -53,50 +45,6 @@ if mitogen.core.PY3:
|
||||||
else:
|
else:
|
||||||
iteritems = dict.iteritems
|
iteritems = dict.iteritems
|
||||||
|
|
||||||
if ctypes:
|
|
||||||
try:
|
|
||||||
_libc = ctypes.CDLL(None)
|
|
||||||
_sched_setaffinity = _libc.sched_setaffinity
|
|
||||||
except (OSError, AttributeError):
|
|
||||||
_sched_setaffinity = None
|
|
||||||
|
|
||||||
|
|
||||||
def reset_affinity(clear=False):
|
|
||||||
"""
|
|
||||||
Bind this process to a randomly selected CPU. If done prior to starting
|
|
||||||
threads, all threads will be bound to the same CPU. This call is a no-op on
|
|
||||||
systems other than Linux.
|
|
||||||
|
|
||||||
:param bool clear:
|
|
||||||
If :data:`True`, clear any prior binding.
|
|
||||||
|
|
||||||
A hook is installed that causes `reset_affinity(clear=True)` to run in the
|
|
||||||
child of any process created with :func:`mitogen.parent.detach_popen`,
|
|
||||||
ensuring CPU-intensive children like SSH are not forced to share the same
|
|
||||||
core as the (otherwise potentially very busy) parent.
|
|
||||||
|
|
||||||
Threads bound to the same CPU share cache and experience the lowest
|
|
||||||
possible inter-thread roundtrip latency, for example ensuring the minimum
|
|
||||||
possible time required for :class:`mitogen.service.Pool` to interact with
|
|
||||||
:class:`mitogen.core.Broker`, as required for every message transmitted or
|
|
||||||
received.
|
|
||||||
|
|
||||||
Binding threads of a Python process to one CPU makes sense, as they are
|
|
||||||
otherwise unable to operate in parallel, and all must acquire the same lock
|
|
||||||
prior to executing.
|
|
||||||
"""
|
|
||||||
if _sched_setaffinity is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
if clear:
|
|
||||||
mask = 0xffffffff
|
|
||||||
else:
|
|
||||||
mask = 1 << random.randint(0, multiprocessing.cpu_count() - 1)
|
|
||||||
|
|
||||||
s = struct.pack('L', mask)
|
|
||||||
_sched_setaffinity(os.getpid(), len(s), s)
|
|
||||||
mitogen.parent._preexec_hook = lambda: reset_affinity(clear=True)
|
|
||||||
|
|
||||||
|
|
||||||
def setup_gil():
|
def setup_gil():
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Reference in New Issue