Handle odd codepoints in EncodedFile

Use the underlying StreamReader implementaiton for
retrieving data from the flo. When doing so, we pass
a value for the number of bytes to read & the number
of characters to read.
This commit is contained in:
Brant Watson 2016-10-26 16:10:44 -05:00
parent fd77cb81a7
commit 3f6a020c7f
2 changed files with 13 additions and 4 deletions

View File

@ -148,7 +148,7 @@ class SpooledIOBase(object):
# Emulate truncation to a particular location
pos = self.tell()
self.seek(size)
self.buffer.truncate(size)
self.buffer.truncate()
if pos < size:
self.seek(pos)
@ -283,7 +283,7 @@ class SpooledStringIO(SpooledIOBase):
super(SpooledStringIO, self).__init__(*args, **kwargs)
def read(self, n=-1):
ret = self.buffer.read(n).decode('utf-8')
ret = self.buffer.reader.read(n, n)
self._tell = self.tell() + len(ret)
return ret
@ -388,7 +388,7 @@ class SpooledStringIO(SpooledIOBase):
def len(self):
"""Determine the number of codepoints in the file"""
pos = self.buffer.tell()
self.seek(0)
self.buffer.seek(0)
total = 0
while True:
ret = self.read(READ_CHUNK_SIZE)

View File

@ -279,6 +279,8 @@ class TestSpooledStringIO(TestCase, BaseTestMixin, AssertionsMixin):
self.spooled_flo.seek(0)
self.spooled_flo.read(40)
self.assertEqual(self.spooled_flo.tell(), 40)
self.spooled_flo.seek(10)
self.assertEqual(self.spooled_flo.tell(), 10)
def test_codepoints_all_enc(self):
""""Test getting read, seek, tell, on various codepoints"""
@ -287,7 +289,6 @@ class TestSpooledStringIO(TestCase, BaseTestMixin, AssertionsMixin):
self.spooled_flo.seek(1)
self.assertEqual(self.spooled_flo.read(), u"\u2014\u2014")
self.assertEqual(len(self.spooled_flo), len(test_str))
self.assertEqual(self.spooled_flo.tell(), 3)
def test_seek_codepoints_SEEK_END(self):
"""Make sure seek() moves to codepoints relative to file end"""
@ -334,3 +335,11 @@ class TestSpooledStringIO(TestCase, BaseTestMixin, AssertionsMixin):
self.spooled_flo.seek(1)
ret = self.spooled_flo.seek(33000, os.SEEK_CUR)
self.assertEqual(ret, 33001)
def test_x80_codepoint(self):
"""Make sure x80 codepoint doesn't confuse read value"""
test_str = u'\x8000'
self.spooled_flo.write(test_str)
self.spooled_flo.seek(0)
self.assertEqual(len(self.spooled_flo.read(2)), 2)
self.assertEqual(self.spooled_flo.read(), '0')