From 0dd89a7f4054bb68e7c201c20f939ba03732bde9 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Fri, 16 Aug 2024 10:07:42 +0200 Subject: [PATCH] [3.13] gh-116622: Android logging fixes (GH-122698) (#122719) gh-116622: Android logging fixes (GH-122698) Modifies the handling of stdout/stderr redirection on Android to accomodate the rate and buffer size limits imposed by Android's logging infrastructure. (cherry picked from commit b0c48b8fd88f26b31ec2f743358091073277dcde) Co-authored-by: Malcolm Smith --- Lib/_android_support.py | 128 +++++++++++++---- Lib/test/test_android.py | 132 +++++++++++++++--- ...-08-05-19-04-06.gh-issue-116622.3LWUzE.rst | 1 + 3 files changed, 213 insertions(+), 48 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-08-05-19-04-06.gh-issue-116622.3LWUzE.rst diff --git a/Lib/_android_support.py b/Lib/_android_support.py index 590e85ea8c2..d5d13ec6a48 100644 --- a/Lib/_android_support.py +++ b/Lib/_android_support.py @@ -1,19 +1,20 @@ import io import sys - +from threading import RLock +from time import sleep, time # The maximum length of a log message in bytes, including the level marker and -# tag, is defined as LOGGER_ENTRY_MAX_PAYLOAD in -# platform/system/logging/liblog/include/log/log.h. As of API level 30, messages -# longer than this will be be truncated by logcat. This limit has already been -# reduced at least once in the history of Android (from 4076 to 4068 between API -# level 23 and 26), so leave some headroom. +# tag, is defined as LOGGER_ENTRY_MAX_PAYLOAD at +# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log.h;l=71. +# Messages longer than this will be be truncated by logcat. This limit has already +# been reduced at least once in the history of Android (from 4076 to 4068 between +# API level 23 and 26), so leave some headroom. MAX_BYTES_PER_WRITE = 4000 # UTF-8 uses a maximum of 4 bytes per character, so limiting text writes to this -# size ensures that TextIOWrapper can always avoid exceeding MAX_BYTES_PER_WRITE. +# size ensures that we can always avoid exceeding MAX_BYTES_PER_WRITE. # However, if the actual number of bytes per character is smaller than that, -# then TextIOWrapper may still join multiple consecutive text writes into binary +# then we may still join multiple consecutive text writes into binary # writes containing a larger number of characters. MAX_CHARS_PER_WRITE = MAX_BYTES_PER_WRITE // 4 @@ -26,18 +27,22 @@ def init_streams(android_log_write, stdout_prio, stderr_prio): if sys.executable: return # Not embedded in an app. + global logcat + logcat = Logcat(android_log_write) + sys.stdout = TextLogStream( - android_log_write, stdout_prio, "python.stdout", errors=sys.stdout.errors) + stdout_prio, "python.stdout", errors=sys.stdout.errors) sys.stderr = TextLogStream( - android_log_write, stderr_prio, "python.stderr", errors=sys.stderr.errors) + stderr_prio, "python.stderr", errors=sys.stderr.errors) class TextLogStream(io.TextIOWrapper): - def __init__(self, android_log_write, prio, tag, **kwargs): + def __init__(self, prio, tag, **kwargs): kwargs.setdefault("encoding", "UTF-8") - kwargs.setdefault("line_buffering", True) - super().__init__(BinaryLogStream(android_log_write, prio, tag), **kwargs) - self._CHUNK_SIZE = MAX_BYTES_PER_WRITE + super().__init__(BinaryLogStream(prio, tag), **kwargs) + self._lock = RLock() + self._pending_bytes = [] + self._pending_bytes_count = 0 def __repr__(self): return f"" @@ -52,19 +57,48 @@ def write(self, s): s = str.__str__(s) # We want to emit one log message per line wherever possible, so split - # the string before sending it to the superclass. Note that - # "".splitlines() == [], so nothing will be logged for an empty string. - for line in s.splitlines(keepends=True): - while line: - super().write(line[:MAX_CHARS_PER_WRITE]) - line = line[MAX_CHARS_PER_WRITE:] + # the string into lines first. Note that "".splitlines() == [], so + # nothing will be logged for an empty string. + with self._lock: + for line in s.splitlines(keepends=True): + while line: + chunk = line[:MAX_CHARS_PER_WRITE] + line = line[MAX_CHARS_PER_WRITE:] + self._write_chunk(chunk) return len(s) + # The size and behavior of TextIOWrapper's buffer is not part of its public + # API, so we handle buffering ourselves to avoid truncation. + def _write_chunk(self, s): + b = s.encode(self.encoding, self.errors) + if self._pending_bytes_count + len(b) > MAX_BYTES_PER_WRITE: + self.flush() + + self._pending_bytes.append(b) + self._pending_bytes_count += len(b) + if ( + self.write_through + or b.endswith(b"\n") + or self._pending_bytes_count > MAX_BYTES_PER_WRITE + ): + self.flush() + + def flush(self): + with self._lock: + self.buffer.write(b"".join(self._pending_bytes)) + self._pending_bytes.clear() + self._pending_bytes_count = 0 + + # Since this is a line-based logging system, line buffering cannot be turned + # off, i.e. a newline always causes a flush. + @property + def line_buffering(self): + return True + class BinaryLogStream(io.RawIOBase): - def __init__(self, android_log_write, prio, tag): - self.android_log_write = android_log_write + def __init__(self, prio, tag): self.prio = prio self.tag = tag @@ -85,10 +119,48 @@ def write(self, b): # Writing an empty string to the stream should have no effect. if b: - # Encode null bytes using "modified UTF-8" to avoid truncating the - # message. This should not affect the return value, as the caller - # may be expecting it to match the length of the input. - self.android_log_write(self.prio, self.tag, - b.replace(b"\x00", b"\xc0\x80")) - + logcat.write(self.prio, self.tag, b) return len(b) + + +# When a large volume of data is written to logcat at once, e.g. when a test +# module fails in --verbose3 mode, there's a risk of overflowing logcat's own +# buffer and losing messages. We avoid this by imposing a rate limit using the +# token bucket algorithm, based on a conservative estimate of how fast `adb +# logcat` can consume data. +MAX_BYTES_PER_SECOND = 1024 * 1024 + +# The logcat buffer size of a device can be determined by running `logcat -g`. +# We set the token bucket size to half of the buffer size of our current minimum +# API level, because other things on the system will be producing messages as +# well. +BUCKET_SIZE = 128 * 1024 + +# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log_read.h;l=39 +PER_MESSAGE_OVERHEAD = 28 + + +class Logcat: + def __init__(self, android_log_write): + self.android_log_write = android_log_write + self._lock = RLock() + self._bucket_level = 0 + self._prev_write_time = time() + + def write(self, prio, tag, message): + # Encode null bytes using "modified UTF-8" to avoid them truncating the + # message. + message = message.replace(b"\x00", b"\xc0\x80") + + with self._lock: + now = time() + self._bucket_level += ( + (now - self._prev_write_time) * MAX_BYTES_PER_SECOND) + self._bucket_level = min(self._bucket_level, BUCKET_SIZE) + self._prev_write_time = now + + self._bucket_level -= PER_MESSAGE_OVERHEAD + len(tag) + len(message) + if self._bucket_level < 0: + sleep(-self._bucket_level / MAX_BYTES_PER_SECOND) + + self.android_log_write(prio, tag, message) diff --git a/Lib/test/test_android.py b/Lib/test/test_android.py index 115882a4c28..82035061bb6 100644 --- a/Lib/test/test_android.py +++ b/Lib/test/test_android.py @@ -1,14 +1,17 @@ +import io import platform import queue import re import subprocess import sys import unittest +from _android_support import TextLogStream from array import array -from contextlib import contextmanager +from contextlib import ExitStack, contextmanager from threading import Thread from test.support import LOOPBACK_TIMEOUT -from time import time +from time import sleep, time +from unittest.mock import patch if sys.platform != "android": @@ -81,18 +84,39 @@ def unbuffered(self, stream): finally: stream.reconfigure(write_through=False) + # In --verbose3 mode, sys.stdout and sys.stderr are captured, so we can't + # test them directly. Detect this mode and use some temporary streams with + # the same properties. + def stream_context(self, stream_name, level): + # https://developer.android.com/ndk/reference/group/logging + prio = {"I": 4, "W": 5}[level] + + stack = ExitStack() + stack.enter_context(self.subTest(stream_name)) + stream = getattr(sys, stream_name) + if isinstance(stream, io.StringIO): + stack.enter_context( + patch( + f"sys.{stream_name}", + TextLogStream( + prio, f"python.{stream_name}", errors="backslashreplace" + ), + ) + ) + return stack + def test_str(self): for stream_name, level in [("stdout", "I"), ("stderr", "W")]: - with self.subTest(stream=stream_name): + with self.stream_context(stream_name, level): stream = getattr(sys, stream_name) tag = f"python.{stream_name}" self.assertEqual(f"", repr(stream)) - self.assertTrue(stream.writable()) - self.assertFalse(stream.readable()) + self.assertIs(stream.writable(), True) + self.assertIs(stream.readable(), False) self.assertEqual("UTF-8", stream.encoding) - self.assertTrue(stream.line_buffering) - self.assertFalse(stream.write_through) + self.assertIs(stream.line_buffering, True) + self.assertIs(stream.write_through, False) # stderr is backslashreplace by default; stdout is configured # that way by libregrtest.main. @@ -147,6 +171,13 @@ def write(s, lines=None, *, write_len=None): write("f\n\ng", ["exxf", ""]) write("\n", ["g"]) + # Since this is a line-based logging system, line buffering + # cannot be turned off, i.e. a newline always causes a flush. + stream.reconfigure(line_buffering=False) + self.assertIs(stream.line_buffering, True) + + # However, buffering can be turned off completely if you want a + # flush after every write. with self.unbuffered(stream): write("\nx", ["", "x"]) write("\na\n", ["", "a"]) @@ -209,30 +240,30 @@ def __str__(self): # (MAX_BYTES_PER_WRITE). # # ASCII (1 byte per character) - write(("foobar" * 700) + "\n", - [("foobar" * 666) + "foob", # 4000 bytes - "ar" + ("foobar" * 33)]) # 200 bytes + write(("foobar" * 700) + "\n", # 4200 bytes in + [("foobar" * 666) + "foob", # 4000 bytes out + "ar" + ("foobar" * 33)]) # 200 bytes out # "Full-width" digits 0-9 (3 bytes per character) s = "\uff10\uff11\uff12\uff13\uff14\uff15\uff16\uff17\uff18\uff19" - write((s * 150) + "\n", - [s * 100, # 3000 bytes - s * 50]) # 1500 bytes + write((s * 150) + "\n", # 4500 bytes in + [s * 100, # 3000 bytes out + s * 50]) # 1500 bytes out s = "0123456789" - write(s * 200, []) - write(s * 150, []) - write(s * 51, [s * 350]) # 3500 bytes - write("\n", [s * 51]) # 510 bytes + write(s * 200, []) # 2000 bytes in + write(s * 150, []) # 1500 bytes in + write(s * 51, [s * 350]) # 510 bytes in, 3500 bytes out + write("\n", [s * 51]) # 0 bytes in, 510 bytes out def test_bytes(self): for stream_name, level in [("stdout", "I"), ("stderr", "W")]: - with self.subTest(stream=stream_name): + with self.stream_context(stream_name, level): stream = getattr(sys, stream_name).buffer tag = f"python.{stream_name}" self.assertEqual(f"", repr(stream)) - self.assertTrue(stream.writable()) - self.assertFalse(stream.readable()) + self.assertIs(stream.writable(), True) + self.assertIs(stream.readable(), False) def write(b, lines=None, *, write_len=None): if write_len is None: @@ -330,3 +361,64 @@ def write(b, lines=None, *, write_len=None): fr"{type(obj).__name__}" ): stream.write(obj) + + def test_rate_limit(self): + # https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log_read.h;l=39 + PER_MESSAGE_OVERHEAD = 28 + + # https://developer.android.com/ndk/reference/group/logging + ANDROID_LOG_DEBUG = 3 + + # To avoid flooding the test script output, use a different tag rather + # than stdout or stderr. + tag = "python.rate_limit" + stream = TextLogStream(ANDROID_LOG_DEBUG, tag) + + # Make a test message which consumes 1 KB of the logcat buffer. + message = "Line {:03d} " + message += "." * ( + 1024 - PER_MESSAGE_OVERHEAD - len(tag) - len(message.format(0)) + ) + "\n" + + # See _android_support.py. The default values of these parameters work + # well across a wide range of devices, but we'll use smaller values to + # ensure a quick and reliable test that doesn't flood the log too much. + MAX_KB_PER_SECOND = 100 + BUCKET_KB = 10 + with ( + patch("_android_support.MAX_BYTES_PER_SECOND", MAX_KB_PER_SECOND * 1024), + patch("_android_support.BUCKET_SIZE", BUCKET_KB * 1024), + ): + # Make sure the token bucket is full. + sleep(BUCKET_KB / MAX_KB_PER_SECOND) + line_num = 0 + + # Write BUCKET_KB messages, and return the rate at which they were + # accepted in KB per second. + def write_bucketful(): + nonlocal line_num + start = time() + max_line_num = line_num + BUCKET_KB + while line_num < max_line_num: + stream.write(message.format(line_num)) + line_num += 1 + return BUCKET_KB / (time() - start) + + # The first bucketful should be written with minimal delay. The + # factor of 2 here is not arbitrary: it verifies that the system can + # write fast enough to empty the bucket within two bucketfuls, which + # the next part of the test depends on. + self.assertGreater(write_bucketful(), MAX_KB_PER_SECOND * 2) + + # Write another bucketful to empty the token bucket completely. + write_bucketful() + + # The next bucketful should be written at the rate limit. + self.assertAlmostEqual( + write_bucketful(), MAX_KB_PER_SECOND, + delta=MAX_KB_PER_SECOND * 0.1 + ) + + # Once the token bucket refills, we should go back to full speed. + sleep(BUCKET_KB / MAX_KB_PER_SECOND) + self.assertGreater(write_bucketful(), MAX_KB_PER_SECOND * 2) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-08-05-19-04-06.gh-issue-116622.3LWUzE.rst b/Misc/NEWS.d/next/Core and Builtins/2024-08-05-19-04-06.gh-issue-116622.3LWUzE.rst new file mode 100644 index 00000000000..9320928477a --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-08-05-19-04-06.gh-issue-116622.3LWUzE.rst @@ -0,0 +1 @@ +Fix Android stdout and stderr messages being truncated or lost.