From f645c63838527d460377c131348a8a9e26e8dd17 Mon Sep 17 00:00:00 2001 From: Augie Fackler Date: Thu, 27 Jul 2017 09:51:37 -0400 Subject: [PATCH 1/7] ioutils: add MultiFileReader to ease concatenation of multiple readers I ended up needing something like this for Mercurial, and mhashemirc suggested that it would make sense in boltons. --- boltons/ioutils.py | 40 +++++++++++++++++++++++++++++++++++++++- tests/test_ioutils.py | 21 +++++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) diff --git a/boltons/ioutils.py b/boltons/ioutils.py index 0fef04c..67bd3f9 100644 --- a/boltons/ioutils.py +++ b/boltons/ioutils.py @@ -16,7 +16,7 @@ from abc import ( abstractproperty, ) from errno import EINVAL -from io import BytesIO +from io import BytesIO, TextIOBase from codecs import EncodedFile from tempfile import TemporaryFile @@ -404,3 +404,41 @@ class SpooledStringIO(SpooledIOBase): total += len(ret) self.buffer.seek(pos) return total + + +class MultiFileReader(object): + + def __init__(self, *fileobjs): + if all(isinstance(f, TextIOBase) for f in fileobjs): + self._joiner = '' + elif any(isinstance(f, TextIOBase) for f in fileobjs): + raise ValueError('All arguments to MultiFileReader must be either ' + 'bytes IO or text IO, not a mix') + else: + self._joiner = b'' + self._fileobjs = fileobjs + self._index = 0 + + def read(self, amt=None): + if not amt: + return self._joiner.join(f.read() for f in self._fileobjs) + parts = [] + while amt > 0 and self._index < len(self._fileobjs): + parts.append(self._fileobjs[self._index].read(amt)) + got = len(parts[-1]) + if got < amt: + self._index += 1 + amt -= got + return self._joiner.join(parts) + + def seek(self, offset, whence=os.SEEK_SET): + if whence != os.SEEK_SET: + raise NotImplementedError( + 'fileprepender does not support anything other' + ' than os.SEEK_SET for whence on seek()') + if offset != 0: + raise NotImplementedError( + 'fileprepender only supports seeking to start, but that ' + 'could be fixed if you need it') + for f in self._fileobjs: + f.seek(0) diff --git a/tests/test_ioutils.py b/tests/test_ioutils.py index f6183c8..5fac8b1 100644 --- a/tests/test_ioutils.py +++ b/tests/test_ioutils.py @@ -1,3 +1,4 @@ +import io import os import random import string @@ -391,3 +392,23 @@ class TestSpooledStringIO(TestCase, BaseTestMixin, AssertionsMixin): self.spooled_flo.write(test_str) self.spooled_flo.seek(0) self.assertEqual(self.spooled_flo.read(3), test_str) + + +class TestMultiFileReader(TestCase): + def test_read_seek_bytes(self): + r = ioutils.MultiFileReader(io.BytesIO(b'narf'), io.BytesIO(b'troz')) + self.assertEqual([b'nar', b'ftr', b'oz'], + list(iter(lambda: r.read(3), b''))) + r.seek(0) + self.assertEqual(b'narftroz', r.read()) + + def test_read_seek_text(self): + r = ioutils.MultiFileReader(io.StringIO(u'narf'), io.StringIO(u'troz')) + self.assertEqual([u'nar', u'ftr', u'oz'], + list(iter(lambda: r.read(3), u''))) + r.seek(0) + self.assertEqual(u'narftroz', r.read()) + + def test_no_mixed_bytes_and_text(self): + with self.assertRaises(ValueError): + ioutils.MultiFileReader(io.BytesIO(b'narf'), io.StringIO(u'troz')) From ae56836176b9b124483eccc5cc8f49e182e65275 Mon Sep 17 00:00:00 2001 From: Mahmoud Hashemi Date: Thu, 27 Jul 2017 22:27:54 -0700 Subject: [PATCH 2/7] py26 compat for multifilereader test --- tests/test_ioutils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_ioutils.py b/tests/test_ioutils.py index 5fac8b1..b02c316 100644 --- a/tests/test_ioutils.py +++ b/tests/test_ioutils.py @@ -410,5 +410,5 @@ class TestMultiFileReader(TestCase): self.assertEqual(u'narftroz', r.read()) def test_no_mixed_bytes_and_text(self): - with self.assertRaises(ValueError): - ioutils.MultiFileReader(io.BytesIO(b'narf'), io.StringIO(u'troz')) + self.assertRaises(ValueError, ioutils.MultiFileReader, + io.BytesIO(b'narf'), io.StringIO(u'troz')) From b61d5af99e683f82098316bc10a2fd4717729f65 Mon Sep 17 00:00:00 2001 From: Mahmoud Hashemi Date: Sat, 29 Jul 2017 17:33:59 -0700 Subject: [PATCH 3/7] improved type checking for MultiFileReader (enables more file-like objects including codecs.open etc.), more tests to match. also corrected some error messages with appropriate class name. --- boltons/ioutils.py | 23 +++++++++++++---------- tests/test_ioutils.py | 32 +++++++++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/boltons/ioutils.py b/boltons/ioutils.py index 67bd3f9..0fdc8e1 100644 --- a/boltons/ioutils.py +++ b/boltons/ioutils.py @@ -1,4 +1,4 @@ -# -*- coding: UTF-8 -*- +# -*- coding: utf-8 -*- # Coding decl above needed for rendering the emdash properly in the # documentation. @@ -409,11 +409,16 @@ class SpooledStringIO(SpooledIOBase): class MultiFileReader(object): def __init__(self, *fileobjs): - if all(isinstance(f, TextIOBase) for f in fileobjs): - self._joiner = '' - elif any(isinstance(f, TextIOBase) for f in fileobjs): - raise ValueError('All arguments to MultiFileReader must be either ' - 'bytes IO or text IO, not a mix') + if not all([callable(getattr(f, 'read', None)) and + callable(getattr(f, 'seek', None)) for f in fileobjs]): + raise TypeError('MultiFileReader expected file-like objects' + ' with .read() and .seek()') + if all([hasattr(f, 'encoding') for f in fileobjs]): + # codecs.open and io.TextIOBase + self._joiner = u'' + elif any([hasattr(f, 'encoding') for f in fileobjs]): + raise ValueError('All arguments to MultiFileReader must handle' + ' bytes OR text, not a mix') else: self._joiner = b'' self._fileobjs = fileobjs @@ -434,11 +439,9 @@ class MultiFileReader(object): def seek(self, offset, whence=os.SEEK_SET): if whence != os.SEEK_SET: raise NotImplementedError( - 'fileprepender does not support anything other' - ' than os.SEEK_SET for whence on seek()') + 'MultiFileReader.seek() only supports os.SEEK_SET') if offset != 0: raise NotImplementedError( - 'fileprepender only supports seeking to start, but that ' - 'could be fixed if you need it') + 'MultiFileReader only supports seeking to start at this time') for f in self._fileobjs: f.seek(0) diff --git a/tests/test_ioutils.py b/tests/test_ioutils.py index b02c316..149ce24 100644 --- a/tests/test_ioutils.py +++ b/tests/test_ioutils.py @@ -1,14 +1,18 @@ import io import os +import sys +import codecs import random import string -import sys from tempfile import mkdtemp from unittest import TestCase from zipfile import ZipFile, ZIP_DEFLATED from boltons import ioutils +CUR_FILE_PATH = os.path.abspath(__file__) + + # Python2/3 compat if sys.version_info[0] == 3: text_type = str @@ -412,3 +416,29 @@ class TestMultiFileReader(TestCase): def test_no_mixed_bytes_and_text(self): self.assertRaises(ValueError, ioutils.MultiFileReader, io.BytesIO(b'narf'), io.StringIO(u'troz')) + + def test_open(self): + with open(CUR_FILE_PATH, 'r') as f: + r_file_str = f.read() + with open(CUR_FILE_PATH, 'r') as f1: + with open(CUR_FILE_PATH, 'r') as f2: + mfr = ioutils.MultiFileReader(f1, f2) + r_double_file_str = mfr.read() + + assert r_double_file_str == (r_file_str * 2) + + with open(CUR_FILE_PATH, 'rb') as f: + rb_file_str = f.read() + with open(CUR_FILE_PATH, 'rb') as f1: + with open(CUR_FILE_PATH, 'rb') as f2: + mfr = ioutils.MultiFileReader(f1, f2) + rb_double_file_str = mfr.read() + + assert rb_double_file_str == (rb_file_str * 2) + + utf8_file_str = codecs.open(CUR_FILE_PATH, encoding='utf8').read() + f1, f2 = (codecs.open(CUR_FILE_PATH, encoding='utf8'), + codecs.open(CUR_FILE_PATH, encoding='utf8')) + mfr = ioutils.MultiFileReader(f1, f2) + utf8_double_file_str = mfr.read() + assert utf8_double_file_str == (utf8_file_str * 2) From ba6941ec19aa2b9c9e29ac6fdfda0cea9f396eda Mon Sep 17 00:00:00 2001 From: Mahmoud Hashemi Date: Sat, 29 Jul 2017 19:03:52 -0700 Subject: [PATCH 4/7] split out text fileobj checking, add support for legacy StringIO, test as much --- boltons/ioutils.py | 21 ++++++++++++++++++--- tests/test_ioutils.py | 12 +++++++++++- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/boltons/ioutils.py b/boltons/ioutils.py index 0fdc8e1..5c22612 100644 --- a/boltons/ioutils.py +++ b/boltons/ioutils.py @@ -10,13 +10,13 @@ ways. """ import os import sys +from io import BytesIO from abc import ( ABCMeta, abstractmethod, abstractproperty, ) from errno import EINVAL -from io import BytesIO, TextIOBase from codecs import EncodedFile from tempfile import TemporaryFile @@ -406,6 +406,20 @@ class SpooledStringIO(SpooledIOBase): return total +def is_text_fileobj(fileobj): + if hasattr(fileobj, 'encoding'): + # codecs.open and io.TextIOBase + return True + if hasattr(fileobj, 'getvalue'): + # StringIO.StringIO / cStringIO.StringIO / io.StringIO + try: + if isinstance(fileobj.getvalue(), type(u'')): + return True + except Exception: + pass + return False + + class MultiFileReader(object): def __init__(self, *fileobjs): @@ -413,13 +427,14 @@ class MultiFileReader(object): callable(getattr(f, 'seek', None)) for f in fileobjs]): raise TypeError('MultiFileReader expected file-like objects' ' with .read() and .seek()') - if all([hasattr(f, 'encoding') for f in fileobjs]): + if all([is_text_fileobj(f) for f in fileobjs]): # codecs.open and io.TextIOBase self._joiner = u'' - elif any([hasattr(f, 'encoding') for f in fileobjs]): + elif any([is_text_fileobj(f) for f in fileobjs]): raise ValueError('All arguments to MultiFileReader must handle' ' bytes OR text, not a mix') else: + # open/file and io.BytesIO self._joiner = b'' self._fileobjs = fileobjs self._index = 0 diff --git a/tests/test_ioutils.py b/tests/test_ioutils.py index 149ce24..a798dcb 100644 --- a/tests/test_ioutils.py +++ b/tests/test_ioutils.py @@ -4,10 +4,18 @@ import sys import codecs import random import string + +try: + from StringIO import StringIO +except: + # py3 + StringIO = io.StringIO + from tempfile import mkdtemp from unittest import TestCase from zipfile import ZipFile, ZIP_DEFLATED + from boltons import ioutils CUR_FILE_PATH = os.path.abspath(__file__) @@ -407,7 +415,9 @@ class TestMultiFileReader(TestCase): self.assertEqual(b'narftroz', r.read()) def test_read_seek_text(self): - r = ioutils.MultiFileReader(io.StringIO(u'narf'), io.StringIO(u'troz')) + # also tests StringIO.StringIO on py2 + r = ioutils.MultiFileReader(StringIO(u'narf'), + io.StringIO(u'troz')) self.assertEqual([u'nar', u'ftr', u'oz'], list(iter(lambda: r.read(3), u''))) r.seek(0) From d4f5d8a3f348f33a7d06db6e6423a86d059969cb Mon Sep 17 00:00:00 2001 From: Mahmoud Hashemi Date: Sat, 29 Jul 2017 19:09:53 -0700 Subject: [PATCH 5/7] avoid hasattr bc of py2, per @durin42's comments --- boltons/ioutils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/boltons/ioutils.py b/boltons/ioutils.py index 5c22612..c17369d 100644 --- a/boltons/ioutils.py +++ b/boltons/ioutils.py @@ -407,10 +407,10 @@ class SpooledStringIO(SpooledIOBase): def is_text_fileobj(fileobj): - if hasattr(fileobj, 'encoding'): + if getattr(fileobj, 'encoding', False): # codecs.open and io.TextIOBase return True - if hasattr(fileobj, 'getvalue'): + if getattr(fileobj, 'getvalue', False): # StringIO.StringIO / cStringIO.StringIO / io.StringIO try: if isinstance(fileobj.getvalue(), type(u'')): From 141c740f2b6a55878357e830a74f82368c4e230f Mon Sep 17 00:00:00 2001 From: Mahmoud Hashemi Date: Sat, 29 Jul 2017 19:22:55 -0700 Subject: [PATCH 6/7] docstrings for MultiFileReader --- boltons/ioutils.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/boltons/ioutils.py b/boltons/ioutils.py index c17369d..47f2b6a 100644 --- a/boltons/ioutils.py +++ b/boltons/ioutils.py @@ -421,6 +421,21 @@ def is_text_fileobj(fileobj): class MultiFileReader(object): + """Takes a list of open files or file-like objects and provides an + interface to read from them all contiguously. Like + :func:`itertools.chain()`, but for reading files. + + >>> mfr = MultiFileReader(BytesIO(b'ab'), BytesIO(b'cd'), BytesIO(b'e')) + >>> mfr.read(3).decode('ascii') + u'abc' + >>> mfr.read(3).decode('ascii') + u'de' + + The constructor takes as many fileobjs as you hand it, and will + raise a TypeError on non-file-like objects. A ValueError is raised + when file-like objects are a mix of bytes- and text-handling + objects (for instance, BytesIO and StringIO). + """ def __init__(self, *fileobjs): if not all([callable(getattr(f, 'read', None)) and @@ -440,6 +455,11 @@ class MultiFileReader(object): self._index = 0 def read(self, amt=None): + """Read up to the specified *amt*, seamlessly bridging across + files. Returns the appropriate type of string (bytes or text) + for the input, and returns an empty string when the files are + exhausted. + """ if not amt: return self._joiner.join(f.read() for f in self._fileobjs) parts = [] @@ -452,6 +472,9 @@ class MultiFileReader(object): return self._joiner.join(parts) def seek(self, offset, whence=os.SEEK_SET): + """Enables setting position of the file cursor to a given + *offset*. Currently only supports ``offset=0``. + """ if whence != os.SEEK_SET: raise NotImplementedError( 'MultiFileReader.seek() only supports os.SEEK_SET') From 45c6fc24fff83bd1bd76148fb65f2851998cef01 Mon Sep 17 00:00:00 2001 From: Mahmoud Hashemi Date: Sat, 29 Jul 2017 19:25:17 -0700 Subject: [PATCH 7/7] integrate MultiFileReader into sphinx docs --- docs/ioutils.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/ioutils.rst b/docs/ioutils.rst index 8de7322..10cfbe9 100644 --- a/docs/ioutils.rst +++ b/docs/ioutils.rst @@ -78,3 +78,13 @@ Here is a simple example using the requests library to download a zip file:: # Print all the files in the zip print(zip_doc.namelist()) + + +Multiple Files +-------------- + +.. _multifilereader: + +MultiFileReader +^^^^^^^^^^^^^^^ +.. autoclass:: boltons.ioutils.MultiFileReader