From 3f6a020c7f09c0976cc9a2203a113a73e7dbc2c5 Mon Sep 17 00:00:00 2001 From: Brant Watson Date: Wed, 26 Oct 2016 16:10:44 -0500 Subject: [PATCH] Handle odd codepoints in EncodedFile Use the underlying StreamReader implementaiton for retrieving data from the flo. When doing so, we pass a value for the number of bytes to read & the number of characters to read. --- boltons/ioutils.py | 6 +++--- tests/test_ioutils.py | 11 ++++++++++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/boltons/ioutils.py b/boltons/ioutils.py index ad18bae..a4c97cf 100644 --- a/boltons/ioutils.py +++ b/boltons/ioutils.py @@ -148,7 +148,7 @@ class SpooledIOBase(object): # Emulate truncation to a particular location pos = self.tell() self.seek(size) - self.buffer.truncate(size) + self.buffer.truncate() if pos < size: self.seek(pos) @@ -283,7 +283,7 @@ class SpooledStringIO(SpooledIOBase): super(SpooledStringIO, self).__init__(*args, **kwargs) def read(self, n=-1): - ret = self.buffer.read(n).decode('utf-8') + ret = self.buffer.reader.read(n, n) self._tell = self.tell() + len(ret) return ret @@ -388,7 +388,7 @@ class SpooledStringIO(SpooledIOBase): def len(self): """Determine the number of codepoints in the file""" pos = self.buffer.tell() - self.seek(0) + self.buffer.seek(0) total = 0 while True: ret = self.read(READ_CHUNK_SIZE) diff --git a/tests/test_ioutils.py b/tests/test_ioutils.py index c749535..5421dbf 100644 --- a/tests/test_ioutils.py +++ b/tests/test_ioutils.py @@ -279,6 +279,8 @@ class TestSpooledStringIO(TestCase, BaseTestMixin, AssertionsMixin): self.spooled_flo.seek(0) self.spooled_flo.read(40) self.assertEqual(self.spooled_flo.tell(), 40) + self.spooled_flo.seek(10) + self.assertEqual(self.spooled_flo.tell(), 10) def test_codepoints_all_enc(self): """"Test getting read, seek, tell, on various codepoints""" @@ -287,7 +289,6 @@ class TestSpooledStringIO(TestCase, BaseTestMixin, AssertionsMixin): self.spooled_flo.seek(1) self.assertEqual(self.spooled_flo.read(), u"\u2014\u2014") self.assertEqual(len(self.spooled_flo), len(test_str)) - self.assertEqual(self.spooled_flo.tell(), 3) def test_seek_codepoints_SEEK_END(self): """Make sure seek() moves to codepoints relative to file end""" @@ -334,3 +335,11 @@ class TestSpooledStringIO(TestCase, BaseTestMixin, AssertionsMixin): self.spooled_flo.seek(1) ret = self.spooled_flo.seek(33000, os.SEEK_CUR) self.assertEqual(ret, 33001) + + def test_x80_codepoint(self): + """Make sure x80 codepoint doesn't confuse read value""" + test_str = u'\x8000' + self.spooled_flo.write(test_str) + self.spooled_flo.seek(0) + self.assertEqual(len(self.spooled_flo.read(2)), 2) + self.assertEqual(self.spooled_flo.read(), '0')