2015-05-19 04:37:49 +00:00
|
|
|
import os
|
|
|
|
|
|
|
|
from boltons.jsonutils import (JSONLIterator,
|
|
|
|
DEFAULT_BLOCKSIZE,
|
|
|
|
reverse_iter_lines)
|
|
|
|
|
|
|
|
CUR_PATH = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
NEWLINES_DATA_PATH = CUR_PATH + '/newlines_test_data.txt'
|
|
|
|
JSONL_DATA_PATH = CUR_PATH + '/jsonl_test_data.txt'
|
|
|
|
|
|
|
|
|
|
|
|
def _test_reverse_iter_lines(filename, blocksize=DEFAULT_BLOCKSIZE):
|
|
|
|
fo = open(filename)
|
|
|
|
reference = fo.read()
|
2021-05-15 07:08:53 +00:00
|
|
|
fo.seek(0, os.SEEK_SET)
|
2015-05-19 04:37:49 +00:00
|
|
|
rev_lines = list(reverse_iter_lines(fo, blocksize))
|
|
|
|
assert '\n'.join(rev_lines[::-1]) == reference
|
|
|
|
|
|
|
|
|
2021-05-15 07:08:53 +00:00
|
|
|
def _test_reverse_iter_lines_bytes(filename, blocksize=DEFAULT_BLOCKSIZE):
|
|
|
|
fo = open(filename, 'rb')
|
|
|
|
reference = fo.read()
|
|
|
|
fo.seek(0, os.SEEK_SET)
|
|
|
|
rev_lines = list(reverse_iter_lines(fo, blocksize))
|
|
|
|
assert os.linesep.encode('ascii').join(rev_lines[::-1]) == reference
|
|
|
|
|
|
|
|
|
|
|
|
|
2015-05-19 04:37:49 +00:00
|
|
|
def test_reverse_iter_lines():
|
2021-05-15 07:08:53 +00:00
|
|
|
for blocksize in (2, 4, 16, 4096):
|
2015-05-19 04:37:49 +00:00
|
|
|
_test_reverse_iter_lines(NEWLINES_DATA_PATH, blocksize)
|
2021-05-15 07:08:53 +00:00
|
|
|
_test_reverse_iter_lines_bytes(NEWLINES_DATA_PATH, blocksize)
|
2015-05-19 04:37:49 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_jsonl_iterator():
|
2024-02-03 17:30:39 +00:00
|
|
|
ref = [{'4': 4}, {'3': 3}, {'2': 2}, {'1': 1}, {}]
|
2015-05-19 04:37:49 +00:00
|
|
|
jsonl_iter = JSONLIterator(open(JSONL_DATA_PATH), reverse=True)
|
|
|
|
jsonl_list = list(jsonl_iter)
|
|
|
|
assert jsonl_list == ref
|