diff --git a/Lib/codecs.py b/Lib/codecs.py index d972a5191fd..9d29acc20ca 100644 --- a/Lib/codecs.py +++ b/Lib/codecs.py @@ -232,6 +232,7 @@ def __init__(self, stream, errors='strict'): # For str->str decoding this will stay a str # For str->unicode decoding the first read will promote it to unicode self.charbuffer = "" + self.linebuffer = None def decode(self, input, errors='strict'): raise NotImplementedError @@ -264,6 +265,11 @@ def read(self, size=-1, chars=-1, firstline=False): optional encoding endings or state markers are available on the stream, these should be read too. """ + # If we have lines cached, first merge them back into characters + if self.linebuffer: + self.charbuffer = "".join(self.linebuffer) + self.linebuffer = None + # read until we get the required number of characters (if available) while True: # can the request can be satisfied from the character buffer? @@ -316,6 +322,20 @@ def readline(self, size=None, keepends=True): read() method. """ + # If we have lines cached from an earlier read, return + # them unconditionally + if self.linebuffer: + line = self.linebuffer[0] + del self.linebuffer[0] + if len(self.linebuffer) == 1: + # revert to charbuffer mode; we might need more data + # next time + self.charbuffer = self.linebuffer[0] + self.linebuffer = None + if not keepends: + line = line.splitlines(False)[0] + return line + readsize = size or 72 line = "" # If size is given, we call read() only once @@ -331,6 +351,22 @@ def readline(self, size=None, keepends=True): line += data lines = line.splitlines(True) if lines: + if len(lines) > 1: + # More than one line result; the first line is a full line + # to return + line = lines[0] + del lines[0] + if len(lines) > 1: + # cache the remaining lines + lines[-1] += self.charbuffer + self.linebuffer = lines + self.charbuffer = None + else: + # only one remaining line, put it back into charbuffer + self.charbuffer = lines[0] + self.charbuffer + if not keepends: + line = line.splitlines(False)[0] + break line0withend = lines[0] line0withoutend = lines[0].splitlines(False)[0] if line0withend != line0withoutend: # We really have a line end @@ -376,6 +412,7 @@ def reset(self): """ self.bytebuffer = "" self.charbuffer = u"" + self.linebuffer = None def seek(self, offset, whence=0): """ Set the input stream's current position. diff --git a/Misc/NEWS b/Misc/NEWS index 1ffa307782a..5eb20ab9263 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -219,6 +219,8 @@ Extension Modules Library ------- +- Patch #1268314: Cache lines in StreamReader.readlines for performance. + - Bug #1290505: Fix clearing the regex cache for time.strptime(). - Bug #1167128: Fix size of a symlink in a tarfile to be 0.