[3.13] gh-116622: Android logging fixes (GH-122698) ()

gh-116622: Android logging fixes (GH-122698)

Modifies the handling of stdout/stderr redirection on Android to accomodate
the rate and buffer size limits imposed by Android's logging infrastructure.
(cherry picked from commit b0c48b8fd8)

Co-authored-by: Malcolm Smith <smith@chaquo.com>
This commit is contained in:
Miss Islington (bot) 2024-08-16 10:07:42 +02:00 committed by GitHub
parent 69cf92fbca
commit 0dd89a7f40
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 213 additions and 48 deletions

View File

@ -1,19 +1,20 @@
import io
import sys
from threading import RLock
from time import sleep, time
# The maximum length of a log message in bytes, including the level marker and
# tag, is defined as LOGGER_ENTRY_MAX_PAYLOAD in
# platform/system/logging/liblog/include/log/log.h. As of API level 30, messages
# longer than this will be be truncated by logcat. This limit has already been
# reduced at least once in the history of Android (from 4076 to 4068 between API
# level 23 and 26), so leave some headroom.
# tag, is defined as LOGGER_ENTRY_MAX_PAYLOAD at
# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log.h;l=71.
# Messages longer than this will be be truncated by logcat. This limit has already
# been reduced at least once in the history of Android (from 4076 to 4068 between
# API level 23 and 26), so leave some headroom.
MAX_BYTES_PER_WRITE = 4000
# UTF-8 uses a maximum of 4 bytes per character, so limiting text writes to this
# size ensures that TextIOWrapper can always avoid exceeding MAX_BYTES_PER_WRITE.
# size ensures that we can always avoid exceeding MAX_BYTES_PER_WRITE.
# However, if the actual number of bytes per character is smaller than that,
# then TextIOWrapper may still join multiple consecutive text writes into binary
# then we may still join multiple consecutive text writes into binary
# writes containing a larger number of characters.
MAX_CHARS_PER_WRITE = MAX_BYTES_PER_WRITE // 4
@ -26,18 +27,22 @@ def init_streams(android_log_write, stdout_prio, stderr_prio):
if sys.executable:
return # Not embedded in an app.
global logcat
logcat = Logcat(android_log_write)
sys.stdout = TextLogStream(
android_log_write, stdout_prio, "python.stdout", errors=sys.stdout.errors)
stdout_prio, "python.stdout", errors=sys.stdout.errors)
sys.stderr = TextLogStream(
android_log_write, stderr_prio, "python.stderr", errors=sys.stderr.errors)
stderr_prio, "python.stderr", errors=sys.stderr.errors)
class TextLogStream(io.TextIOWrapper):
def __init__(self, android_log_write, prio, tag, **kwargs):
def __init__(self, prio, tag, **kwargs):
kwargs.setdefault("encoding", "UTF-8")
kwargs.setdefault("line_buffering", True)
super().__init__(BinaryLogStream(android_log_write, prio, tag), **kwargs)
self._CHUNK_SIZE = MAX_BYTES_PER_WRITE
super().__init__(BinaryLogStream(prio, tag), **kwargs)
self._lock = RLock()
self._pending_bytes = []
self._pending_bytes_count = 0
def __repr__(self):
return f"<TextLogStream {self.buffer.tag!r}>"
@ -52,19 +57,48 @@ def write(self, s):
s = str.__str__(s)
# We want to emit one log message per line wherever possible, so split
# the string before sending it to the superclass. Note that
# "".splitlines() == [], so nothing will be logged for an empty string.
for line in s.splitlines(keepends=True):
while line:
super().write(line[:MAX_CHARS_PER_WRITE])
line = line[MAX_CHARS_PER_WRITE:]
# the string into lines first. Note that "".splitlines() == [], so
# nothing will be logged for an empty string.
with self._lock:
for line in s.splitlines(keepends=True):
while line:
chunk = line[:MAX_CHARS_PER_WRITE]
line = line[MAX_CHARS_PER_WRITE:]
self._write_chunk(chunk)
return len(s)
# The size and behavior of TextIOWrapper's buffer is not part of its public
# API, so we handle buffering ourselves to avoid truncation.
def _write_chunk(self, s):
b = s.encode(self.encoding, self.errors)
if self._pending_bytes_count + len(b) > MAX_BYTES_PER_WRITE:
self.flush()
self._pending_bytes.append(b)
self._pending_bytes_count += len(b)
if (
self.write_through
or b.endswith(b"\n")
or self._pending_bytes_count > MAX_BYTES_PER_WRITE
):
self.flush()
def flush(self):
with self._lock:
self.buffer.write(b"".join(self._pending_bytes))
self._pending_bytes.clear()
self._pending_bytes_count = 0
# Since this is a line-based logging system, line buffering cannot be turned
# off, i.e. a newline always causes a flush.
@property
def line_buffering(self):
return True
class BinaryLogStream(io.RawIOBase):
def __init__(self, android_log_write, prio, tag):
self.android_log_write = android_log_write
def __init__(self, prio, tag):
self.prio = prio
self.tag = tag
@ -85,10 +119,48 @@ def write(self, b):
# Writing an empty string to the stream should have no effect.
if b:
# Encode null bytes using "modified UTF-8" to avoid truncating the
# message. This should not affect the return value, as the caller
# may be expecting it to match the length of the input.
self.android_log_write(self.prio, self.tag,
b.replace(b"\x00", b"\xc0\x80"))
logcat.write(self.prio, self.tag, b)
return len(b)
# When a large volume of data is written to logcat at once, e.g. when a test
# module fails in --verbose3 mode, there's a risk of overflowing logcat's own
# buffer and losing messages. We avoid this by imposing a rate limit using the
# token bucket algorithm, based on a conservative estimate of how fast `adb
# logcat` can consume data.
MAX_BYTES_PER_SECOND = 1024 * 1024
# The logcat buffer size of a device can be determined by running `logcat -g`.
# We set the token bucket size to half of the buffer size of our current minimum
# API level, because other things on the system will be producing messages as
# well.
BUCKET_SIZE = 128 * 1024
# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log_read.h;l=39
PER_MESSAGE_OVERHEAD = 28
class Logcat:
def __init__(self, android_log_write):
self.android_log_write = android_log_write
self._lock = RLock()
self._bucket_level = 0
self._prev_write_time = time()
def write(self, prio, tag, message):
# Encode null bytes using "modified UTF-8" to avoid them truncating the
# message.
message = message.replace(b"\x00", b"\xc0\x80")
with self._lock:
now = time()
self._bucket_level += (
(now - self._prev_write_time) * MAX_BYTES_PER_SECOND)
self._bucket_level = min(self._bucket_level, BUCKET_SIZE)
self._prev_write_time = now
self._bucket_level -= PER_MESSAGE_OVERHEAD + len(tag) + len(message)
if self._bucket_level < 0:
sleep(-self._bucket_level / MAX_BYTES_PER_SECOND)
self.android_log_write(prio, tag, message)

View File

@ -1,14 +1,17 @@
import io
import platform
import queue
import re
import subprocess
import sys
import unittest
from _android_support import TextLogStream
from array import array
from contextlib import contextmanager
from contextlib import ExitStack, contextmanager
from threading import Thread
from test.support import LOOPBACK_TIMEOUT
from time import time
from time import sleep, time
from unittest.mock import patch
if sys.platform != "android":
@ -81,18 +84,39 @@ def unbuffered(self, stream):
finally:
stream.reconfigure(write_through=False)
# In --verbose3 mode, sys.stdout and sys.stderr are captured, so we can't
# test them directly. Detect this mode and use some temporary streams with
# the same properties.
def stream_context(self, stream_name, level):
# https://developer.android.com/ndk/reference/group/logging
prio = {"I": 4, "W": 5}[level]
stack = ExitStack()
stack.enter_context(self.subTest(stream_name))
stream = getattr(sys, stream_name)
if isinstance(stream, io.StringIO):
stack.enter_context(
patch(
f"sys.{stream_name}",
TextLogStream(
prio, f"python.{stream_name}", errors="backslashreplace"
),
)
)
return stack
def test_str(self):
for stream_name, level in [("stdout", "I"), ("stderr", "W")]:
with self.subTest(stream=stream_name):
with self.stream_context(stream_name, level):
stream = getattr(sys, stream_name)
tag = f"python.{stream_name}"
self.assertEqual(f"<TextLogStream '{tag}'>", repr(stream))
self.assertTrue(stream.writable())
self.assertFalse(stream.readable())
self.assertIs(stream.writable(), True)
self.assertIs(stream.readable(), False)
self.assertEqual("UTF-8", stream.encoding)
self.assertTrue(stream.line_buffering)
self.assertFalse(stream.write_through)
self.assertIs(stream.line_buffering, True)
self.assertIs(stream.write_through, False)
# stderr is backslashreplace by default; stdout is configured
# that way by libregrtest.main.
@ -147,6 +171,13 @@ def write(s, lines=None, *, write_len=None):
write("f\n\ng", ["exxf", ""])
write("\n", ["g"])
# Since this is a line-based logging system, line buffering
# cannot be turned off, i.e. a newline always causes a flush.
stream.reconfigure(line_buffering=False)
self.assertIs(stream.line_buffering, True)
# However, buffering can be turned off completely if you want a
# flush after every write.
with self.unbuffered(stream):
write("\nx", ["", "x"])
write("\na\n", ["", "a"])
@ -209,30 +240,30 @@ def __str__(self):
# (MAX_BYTES_PER_WRITE).
#
# ASCII (1 byte per character)
write(("foobar" * 700) + "\n",
[("foobar" * 666) + "foob", # 4000 bytes
"ar" + ("foobar" * 33)]) # 200 bytes
write(("foobar" * 700) + "\n", # 4200 bytes in
[("foobar" * 666) + "foob", # 4000 bytes out
"ar" + ("foobar" * 33)]) # 200 bytes out
# "Full-width" digits 0-9 (3 bytes per character)
s = "\uff10\uff11\uff12\uff13\uff14\uff15\uff16\uff17\uff18\uff19"
write((s * 150) + "\n",
[s * 100, # 3000 bytes
s * 50]) # 1500 bytes
write((s * 150) + "\n", # 4500 bytes in
[s * 100, # 3000 bytes out
s * 50]) # 1500 bytes out
s = "0123456789"
write(s * 200, [])
write(s * 150, [])
write(s * 51, [s * 350]) # 3500 bytes
write("\n", [s * 51]) # 510 bytes
write(s * 200, []) # 2000 bytes in
write(s * 150, []) # 1500 bytes in
write(s * 51, [s * 350]) # 510 bytes in, 3500 bytes out
write("\n", [s * 51]) # 0 bytes in, 510 bytes out
def test_bytes(self):
for stream_name, level in [("stdout", "I"), ("stderr", "W")]:
with self.subTest(stream=stream_name):
with self.stream_context(stream_name, level):
stream = getattr(sys, stream_name).buffer
tag = f"python.{stream_name}"
self.assertEqual(f"<BinaryLogStream '{tag}'>", repr(stream))
self.assertTrue(stream.writable())
self.assertFalse(stream.readable())
self.assertIs(stream.writable(), True)
self.assertIs(stream.readable(), False)
def write(b, lines=None, *, write_len=None):
if write_len is None:
@ -330,3 +361,64 @@ def write(b, lines=None, *, write_len=None):
fr"{type(obj).__name__}"
):
stream.write(obj)
def test_rate_limit(self):
# https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log_read.h;l=39
PER_MESSAGE_OVERHEAD = 28
# https://developer.android.com/ndk/reference/group/logging
ANDROID_LOG_DEBUG = 3
# To avoid flooding the test script output, use a different tag rather
# than stdout or stderr.
tag = "python.rate_limit"
stream = TextLogStream(ANDROID_LOG_DEBUG, tag)
# Make a test message which consumes 1 KB of the logcat buffer.
message = "Line {:03d} "
message += "." * (
1024 - PER_MESSAGE_OVERHEAD - len(tag) - len(message.format(0))
) + "\n"
# See _android_support.py. The default values of these parameters work
# well across a wide range of devices, but we'll use smaller values to
# ensure a quick and reliable test that doesn't flood the log too much.
MAX_KB_PER_SECOND = 100
BUCKET_KB = 10
with (
patch("_android_support.MAX_BYTES_PER_SECOND", MAX_KB_PER_SECOND * 1024),
patch("_android_support.BUCKET_SIZE", BUCKET_KB * 1024),
):
# Make sure the token bucket is full.
sleep(BUCKET_KB / MAX_KB_PER_SECOND)
line_num = 0
# Write BUCKET_KB messages, and return the rate at which they were
# accepted in KB per second.
def write_bucketful():
nonlocal line_num
start = time()
max_line_num = line_num + BUCKET_KB
while line_num < max_line_num:
stream.write(message.format(line_num))
line_num += 1
return BUCKET_KB / (time() - start)
# The first bucketful should be written with minimal delay. The
# factor of 2 here is not arbitrary: it verifies that the system can
# write fast enough to empty the bucket within two bucketfuls, which
# the next part of the test depends on.
self.assertGreater(write_bucketful(), MAX_KB_PER_SECOND * 2)
# Write another bucketful to empty the token bucket completely.
write_bucketful()
# The next bucketful should be written at the rate limit.
self.assertAlmostEqual(
write_bucketful(), MAX_KB_PER_SECOND,
delta=MAX_KB_PER_SECOND * 0.1
)
# Once the token bucket refills, we should go back to full speed.
sleep(BUCKET_KB / MAX_KB_PER_SECOND)
self.assertGreater(write_bucketful(), MAX_KB_PER_SECOND * 2)

View File

@ -0,0 +1 @@
Fix Android stdout and stderr messages being truncated or lost.