diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml new file mode 100644 index 0000000..4a7e8ad --- /dev/null +++ b/.github/workflows/tests.yaml @@ -0,0 +1,48 @@ +name: Tests +on: + push: + paths-ignore: + - 'docs/**' + - '*.md' + - '*.rst' + pull_request: + paths-ignore: + - 'docs/**' + - '*.md' + - '*.rst' +jobs: + tests: + name: ${{ matrix.name }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - {name: Linux, python: '3.8', os: ubuntu-latest, tox: py38} + - {name: Windows, python: '3.7', os: windows-latest, tox: py38} + - {name: Mac, python: '3.8', os: macos-latest, tox: py38} + - {name: '3.7', python: '3.7', os: ubuntu-latest, tox: py37} + - {name: '3.6', python: '3.6', os: ubuntu-latest, tox: py36} + - {name: '2.7', python: '2.7', os: ubuntu-latest, tox: py27} + - {name: 'PyPy2', python: 'pypy2', os: ubuntu-latest, tox: pypy} + - {name: 'PyPy3', python: 'pypy3', os: ubuntu-latest, tox: pypy3} + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python }} + - name: update pip + run: | + pip install -U wheel + pip install -U setuptools + python -m pip install -U pip + - name: get pip cache dir + id: pip-cache + run: echo "::set-output name=dir::$(pip cache dir)" + - name: cache pip + uses: actions/cache@v2 + with: + path: ${{ steps.pip-cache.outputs.dir }} + key: pip|${{ runner.os }}|${{ matrix.python }}|${{ hashFiles('setup.py') }}|${{ hashFiles('requirements/*.txt') }} + - run: pip install tox + - run: tox -e ${{ matrix.tox }} diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index c32c011..0000000 --- a/.travis.yml +++ /dev/null @@ -1,42 +0,0 @@ -language: python -cache: pip - -# Python targets, as defined by https://github.com/travis-ci/travis-build/blob -# /master/spec/build/script/python_spec.rb and https://github.com/travis-ci -# /travis-build/blob/master/lib/travis/build/script/python.rb -python: - # Standard release https://docs.travis-ci.com/user/languages - # /python#choosing-python-versions-to-test-against - - "2.7" - - "3.4" - - "3.5" - - "3.6" - - # PyPy2.7: https://doc.pypy.org/en/latest - # /index-of-release-notes.html#cpython-2-7-compatible-versions - - pypy - - # PyPy3.5: https://doc.pypy.org/en/latest - # /index-of-release-notes.html#cpython-3-3-compatible-versions - - pypy3 - -matrix: - include: - - python: 2.6 - dist: trusty - - python: 3.7 - dist: xenial # required for Python 3.7 (travis-ci/travis-ci#9069) - - python: 3.8 - dist: xenial # required for Python 3.8 (travis-ci/travis-ci#9069) - - python: 3.9 - dist: focal - - python: nightly - dist: focal - allow_failures: - - python: nightly - -install: "pip install -r requirements-test.txt" -script: "py.test --doctest-modules boltons tests" -branches: - except: - - function_builder diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index d3e0402..0000000 --- a/appveyor.yml +++ /dev/null @@ -1,50 +0,0 @@ -# What Python version is installed where: -# http://www.appveyor.com/docs/installed-software#python - -# This configuration based on: -# https://github.com/audreyr/cookiecutter/commit/3c4685f536afda3be93da3fe3039cec0ab0d60a3 - -branches: - except: - - function_builder - -environment: - matrix: - - PYTHON: "C:\\Python27-x64" - TOX_ENV: "py27" - - - PYTHON: "C:\\Python37-x64" - TOX_ENV: "py37" - - -init: - - set PATH=%PYTHON%;%PYTHON%\Scripts;%PATH% - - "git config --system http.sslcainfo \"C:\\Program Files\\Git\\mingw64\\ssl\\certs\\ca-bundle.crt\"" - - "%PYTHON%/python -V" - - "%PYTHON%/python -c \"import struct;print(8 * struct.calcsize(\'P\'))\"" - -install: - - "%PYTHON%/Scripts/easy_install -U pip" - - "%PYTHON%/Scripts/pip install -U --force-reinstall tox wheel" - -build: false # Not a C# project, build stuff at the test step instead. - -test_script: - - "%PYTHON%/Scripts/tox -e %TOX_ENV%" - -after_test: - - "%PYTHON%/python setup.py bdist_wheel" - - ps: "ls dist" - -on_success: - # Report coverage results to codecov.io - # and export tox environment variables - - "%PYTHON%/Scripts/pip install codecov" - - set OS=WINDOWS - - "%PYTHON%/Scripts/codecov -e TOX_ENV OS" - -artifacts: - - path: dist\* - -#on_success: -# - TODO: upload the content of dist/*.whl to a public wheelhouse diff --git a/boltons/fileutils.py b/boltons/fileutils.py index 0e75a29..0f4ae12 100644 --- a/boltons/fileutils.py +++ b/boltons/fileutils.py @@ -224,10 +224,15 @@ else: def atomic_save(dest_path, **kwargs): """A convenient interface to the :class:`AtomicSaver` type. Example: - - >>> with atomic_save("/tmp/file.txt", text_mode=True) as fo: - ... data = fo.read() - + + >>> try: + ... with atomic_save("file.txt", text_mode=True) as fo: + ... _ = fo.write('bye') + ... 1/0 # will error + ... fo.write('bye') + ... except ZeroDivisionError: + ... pass # at least our file.txt didn't get overwritten + See the :class:`AtomicSaver` documentation for details. """ return AtomicSaver(dest_path, **kwargs) diff --git a/boltons/jsonutils.py b/boltons/jsonutils.py index b65ed64..bcd527f 100644 --- a/boltons/jsonutils.py +++ b/boltons/jsonutils.py @@ -9,68 +9,80 @@ of working with `JSON Lines`_-formatted files. from __future__ import print_function +import io import os import json DEFAULT_BLOCKSIZE = 4096 -# reverse iter lines algorithm: -# -# - if it ends in a newline, add an empty string to the line list -# - if there's one item, then prepend it to the buffer, continue -# - if there's more than one item, pop the last item and prepend it -# to the buffer, yielding it -# - yield all remaining items in reverse, except for the first -# - first item becomes the new buffer -# -# - when the outer loop completes, yield the buffer - __all__ = ['JSONLIterator', 'reverse_iter_lines'] -def reverse_iter_lines(file_obj, blocksize=DEFAULT_BLOCKSIZE, preseek=True): +def reverse_iter_lines(file_obj, blocksize=DEFAULT_BLOCKSIZE, preseek=True, encoding=None): """Returns an iterator over the lines from a file object, in reverse order, i.e., last line first, first line last. Uses the :meth:`file.seek` method of file objects, and is tested compatible with :class:`file` objects, as well as :class:`StringIO.StringIO`. Args: - file_obj (file): An open file object. Note that ``reverse_iter_lines`` - mutably reads from the file and other functions should not mutably - interact with the file object. - blocksize (int): The block size to pass to :meth:`file.read()` + file_obj (file): An open file object. Note that + ``reverse_iter_lines`` mutably reads from the file and + other functions should not mutably interact with the file + object after being passed. Files can be opened in bytes or + text mode. + blocksize (int): The block size to pass to + :meth:`file.read()`. Warning: keep this a fairly large + multiple of 2, defaults to 4096. preseek (bool): Tells the function whether or not to automatically seek to the end of the file. Defaults to ``True``. ``preseek=False`` is useful in cases when the file cursor is already in position, either at the end of the file or in the middle for relative reverse line generation. + """ + # This function is a bit of a pain because it attempts to be byte/text agnostic + try: + encoding = encoding or file_obj.encoding + except AttributeError: + # BytesIO + encoding = None + else: + encoding = 'utf-8' + + # need orig_obj to keep alive otherwise __del__ on the TextWrapper will close the file + orig_obj = file_obj + try: + file_obj = orig_obj.detach() + except (AttributeError, io.UnsupportedOperation): + pass + + empty_bytes, newline_bytes, empty_text = b'', b'\n', u'' + if preseek: file_obj.seek(0, os.SEEK_END) + buff = empty_bytes cur_pos = file_obj.tell() - buff = '' while 0 < cur_pos: read_size = min(blocksize, cur_pos) cur_pos -= read_size file_obj.seek(cur_pos, os.SEEK_SET) cur = file_obj.read(read_size) - lines = cur.splitlines() - if cur[-1] == '\n': - lines.append('') - if len(lines) == 1: - buff = lines[0] + buff + buff = cur + buff + lines = buff.splitlines() + + if len(lines) < 2 or lines[0] == empty_bytes: continue - last = lines.pop() - yield last + buff + if buff[-1:] == newline_bytes: + yield empty_text if encoding else empty_bytes for line in lines[:0:-1]: - yield line + yield line.decode(encoding) if encoding else line buff = lines[0] if buff: - # TODO: test this, does an empty buffer always mean don't yield? - yield buff + yield buff.decode(encoding) if encoding else buff + """ diff --git a/tests/newlines_test_data.txt b/tests/newlines_test_data.txt index 268abe8..0310f86 100644 --- a/tests/newlines_test_data.txt +++ b/tests/newlines_test_data.txt @@ -3,6 +3,8 @@ b c d e + + f g hijklmnop diff --git a/tests/test_jsonutils.py b/tests/test_jsonutils.py index 21f43ac..fb1c77b 100644 --- a/tests/test_jsonutils.py +++ b/tests/test_jsonutils.py @@ -14,14 +14,24 @@ JSONL_DATA_PATH = CUR_PATH + '/jsonl_test_data.txt' def _test_reverse_iter_lines(filename, blocksize=DEFAULT_BLOCKSIZE): fo = open(filename) reference = fo.read() - fo.seek(0, os.SEEK_END) + fo.seek(0, os.SEEK_SET) rev_lines = list(reverse_iter_lines(fo, blocksize)) assert '\n'.join(rev_lines[::-1]) == reference +def _test_reverse_iter_lines_bytes(filename, blocksize=DEFAULT_BLOCKSIZE): + fo = open(filename, 'rb') + reference = fo.read() + fo.seek(0, os.SEEK_SET) + rev_lines = list(reverse_iter_lines(fo, blocksize)) + assert os.linesep.encode('ascii').join(rev_lines[::-1]) == reference + + + def test_reverse_iter_lines(): - for blocksize in (1, 4, 11, 4096): + for blocksize in (2, 4, 16, 4096): _test_reverse_iter_lines(NEWLINES_DATA_PATH, blocksize) + _test_reverse_iter_lines_bytes(NEWLINES_DATA_PATH, blocksize) def test_jsonl_iterator():