adding jsonl verification __main__ to jsonutils

This commit is contained in:
Mahmoud Hashemi 2015-07-22 02:28:35 -07:00
parent 42996c48c6
commit 0dd5358c6b
1 changed files with 46 additions and 0 deletions

View File

@ -128,6 +128,11 @@ class JSONLIterator(object):
else:
self._line_iter = iter(self._file_obj)
@property
def cur_byte_pos(self):
"A property representing where in the file the iterator is reading."
return self._file_obj.tell()
def _align_to_newline(self):
"Aligns the file object's position to the next newline."
fo, bsize = self._file_obj, self._blocksize
@ -179,3 +184,44 @@ class JSONLIterator(object):
return obj
__next__ = next
if __name__ == '__main__':
def _main():
import sys
if '-h' in sys.argv or '--help' in sys.argv:
print('loads one or more JSON Line files for basic validation.')
return
verbose = False
if '-v' in sys.argv or '--verbose' in sys.argv:
verbose = True
file_count, obj_count = 0, 0
filenames = sys.argv[1:]
for filename in filenames:
if filename in ('-h', '--help', '-v', '--verbose'):
continue
file_count += 1
with open(filename, 'rb') as file_obj:
iterator = JSONLIterator(file_obj)
cur_obj_count = 0
while 1:
try:
next(iterator)
except ValueError:
print('error reading object #%s around byte %s in %s'
% (cur_obj_count + 1, iterator.cur_byte_pos, filename))
return
except StopIteration:
break
obj_count += 1
cur_obj_count += 1
if verbose and obj_count and obj_count % 100 == 0:
sys.stdout.write('.')
if obj_count % 10000:
sys.stdout.write('%s\n' % obj_count)
if verbose:
print('files checked: %s' % file_count)
print('objects loaded: %s' % obj_count)
return
_main()