r0c/test/prototyping/serialization.py

876 lines
28 KiB
Python

#!/usr/bin/env python3
# coding: utf-8
from __future__ import print_function
import os
import sys
import platform
import random
import struct
import time
import json
try:
import cPickle as pickle # py2
except:
import pickle # py3
ITERATIONS = 2
#ITERATIONS = 1
class Message(object):
def __init__(self, ts, user, txt):
self.ts = ts # int timestamp
self.user = user # str username
self.txt = txt # str text
def result(desc, sec, sec2, mul, comp_t, base_t, fn=None):
sz = os.path.getsize(fn) if fn else 'x'
print(u'{0:24} {1:8.3f}s {2:8.3f}s {3:8.3f} ({4:.3f},{5:.3f}) {6:9} byte'.format(
desc, sec, sec2, mul, comp_t, base_t, sz))
""" run a test function, compare time against comp_t after subtracting base_t """
def run(func, write_to, comp_t=None, base_t=None, iterations=ITERATIONS):
mtd = 99999999
desc = func.__name__[2:]
is_windows = platform.system() == 'Windows'
if not is_windows:
print()
best = []
for iteration in range(iterations):
t0 = time.time()
func(write_to)
td = time.time() - t0
base_tv = base_t or td
comp_tv = comp_t or td
rel_tv = td - base_tv
mul = rel_tv / comp_tv if comp_t else 1
if mtd > td:
mtd = td
best = [desc, td, rel_tv, mul, comp_tv, base_tv, write_to]
if not is_windows:
print('\033[A', end='')
result(*best)
if is_windows:
result(*best)
return [ desc, write_to, mtd ]
import struct
all_chars = b''
for n in range(1,128):
all_chars += struct.pack('B', n)
all_chars = all_chars.decode('utf-8').replace('\r', '\\r').replace('\n', '\\n') + u'宇多田ヒカル桜流し'
some_chars = letters = u'宇多田ヒカル桜流しABCDEFGHIJKLMNOPQRSTUVWXYZ\\\'\'\'"/abcdefghijklmnopqrstuvwxyz '
def gen_sentence():
charset = some_chars
ret = u''
retlen = random.randint(4, 64)
for n in range(retlen):
ret += random.choice(charset)
if not ret:
ret = u'a'
return ret.strip()
users = []
letters = u'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
for n in range(12):
ret = u''
for n in range(8):
ret += random.choice(letters)
users.append(ret)
def stream_txt():
with open('txt', 'rb') as f:
for ln in f:
yield ln.decode('utf-8').rstrip()
def stream_msgs_plain(dontcare=None):
iuser = 0
with open('txt', 'rb') as f:
for n, ln in enumerate(f):
txt = ln.decode('utf-8').rstrip()
yield Message(n, users[iuser], txt)
iuser += 1
if iuser >= len(users):
iuser = 0
def stream_msg_newlines(dontcare=None):
iuser = 0
with open('txt', 'rb') as f:
for n, ln in enumerate(f):
txt = ln.decode('utf-8').rstrip()
mod = n % 32
if mod == 31:
mid = int(len(txt) / 2)
txt = u'{0}\n{1}'.format(txt[:mid], txt[mid:])
if mod == 15:
mid = int(len(txt) / 2)
txt = u'{0}\r{1}'.format(txt[:mid], txt[mid:])
yield Message(n, users[iuser], txt)
iuser += 1
if iuser >= len(users):
iuser = 0
stream_msgs = stream_msgs_plain
def t_gen_txt_file(dontcare):
try:
memes = xrange
except:
memes = range
with open('txt', 'wb') as f:
for n in memes(1048576):
if n % 8192 == 0:
print('{0} {1:.2f}%'.format(n, n*100.0/1048576))
f.write(u'{0}\n'.format(gen_sentence()).encode('utf-8'))
if not os.path.isfile('txt'):
run(t_gen_txt_file, 'txt')
py_ver = '.'.join([str(x) for x in sys.version_info])
bitness = struct.calcsize('P') * 8
host_os = platform.system()
print('\n\n{0} // {1}{2} // Deserialization'.format(py_ver, host_os, bitness))
### takeaways:
#
# enumerate(list) is slower than looking up each item in a dict
#
# chaining .replace beats most alternatives
#
# checking whether a string contains a character before trying to replace it saves surprisingly little time
#
# iterating over characters in source and conditionally writing ch or \ch is ~30% the speed of chained .replace
#
# loading global variables into a method before repeatedly using it saves a tiny amount of time
#
def t_stream_utf8(fn):
for ln in stream_txt():
pass
td_utf8 = run(t_stream_utf8, 'txt')[2]
base_t = td_utf8
comp_t = td_utf8
def t_stream_msgs(fn):
for msg in stream_msgs():
pass
td_msgs = run(t_stream_msgs, 'txt', None, comp_t)[2]
base_t = td_msgs
comp_t = td_msgs
if os.path.isfile('lst_repr_f'):
def verify_deserialization(deserializer, fn):
for m1, m2 in zip(stream_msgs(), deserializer(fn)):
if m1.ts != m2.ts \
or m1.txt != m2.txt \
or len(m1.user) != len(m2.user):
print('# FAIL\n# [{0}] [{1}] [{2}]\n# [{3}] [{4}] [{5}] {6} {7} {8} {9}\n'.format(
m1.ts, repr(m1.txt), m1.user,
m2.ts, repr(m2.txt), m2.user,
m1.ts == m2.ts,
len(m1.user) == len(m2.user),
repr(m1.txt) == repr(m2.txt),
m1.txt == m2.txt))
return False
return True
def t_dser_dummy(fn):
verify_deserialization(stream_msgs, 'x')
td_dser_dummy = run(t_dser_dummy, 'txt', comp_t, base_t)[2]
base_t = td_dser_dummy
comp_t = td_dser_dummy
for redo in range(2):
def t_d_split_ast_eval(fn):
import ast
def subroutine(fn):
with open(fn, 'rb') as f:
for ln in f:
ts, user, txt = ln.decode('utf-8').rstrip().split(' ', 2)
yield Message(int(ts), user, ast.literal_eval(txt))
verify_deserialization(subroutine, fn)
td_d_split_ast_eval = run(t_d_split_ast_eval, 's_esc3c', None, comp_t)[2]
if redo == 0:
comp_t = td_d_split_ast_eval - base_t
def t_d_split_eval(fn):
def subroutine(fn):
with open(fn, 'rb') as f:
for ln in f:
ts, user, txt = ln.decode('utf-8').rstrip().split(' ', 2)
yield Message(int(ts), user, eval(txt))
verify_deserialization(subroutine, fn)
run(t_d_split_eval, 's_esc3c', comp_t, base_t)[2]
def t_d_lst_eval(fn):
def subroutine(fn):
with open(fn, 'rb') as f:
for ln in f:
yield Message(*eval(ln.decode('utf-8').rstrip()))
verify_deserialization(subroutine, fn)
run(t_d_lst_eval, 'lst_repr_f', comp_t, base_t)[2]
def t_d_lst_ast_eval(fn):
import ast
def subroutine(fn):
with open(fn, 'rb') as f:
for ln in f:
yield Message(*ast.literal_eval(ln.decode('utf-8').rstrip()))
verify_deserialization(subroutine, fn)
run(t_d_lst_ast_eval, 'lst_repr_f', comp_t, base_t)[2]
def t_d_split_repr_ast_e(fn):
import ast
def subroutine(fn):
with open(fn, 'rb') as f:
for ln in f:
ts, user, txt = ln.decode('utf-8').rstrip().split(' ', 2)
yield Message(int(ts), user, ast.literal_eval(txt))
verify_deserialization(subroutine, fn)
run(t_d_split_repr_ast_e, 'txt_repr', comp_t, base_t)[2]
def t_d_split_repr_eval(fn):
def subroutine(fn):
with open(fn, 'rb') as f:
for ln in f:
ts, user, txt = ln.decode('utf-8').rstrip().split(' ', 2)
yield Message(int(ts), user, eval(txt))
verify_deserialization(subroutine, fn)
run(t_d_split_repr_eval, 'txt_repr', comp_t, base_t)[2]
print()
sys.exit(0)
print('\n\n{0} // {1}{2} // Serialization'.format(py_ver, host_os, bitness))
r_from = u'\\\'\r\n'
r_to = [ u'\\\\', u'\\\'', u'\\r', u'\\n' ]
r_map = {
u'\\': u'\\\\',
u'\'': u'\\\'',
u'\r': u'\\r',
u'\n': u'\\n'
}
# py[23] identical: 1.00 1.00
#
def t_chain_replace(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
f.write(u'{0} {1} u\'{2}\'\n'.format(
msg.ts, msg.user, msg.txt.\
replace(u'\\', u'\\\\').\
replace(u'\'', u'\\\'').\
replace(u'\r', u'\\r').\
replace(u'\n', u'\\n')).\
encode('utf-8'))
td_chain = run(t_chain_replace, 's_esc1', None, comp_t)[2]
comp_t = td_chain - base_t
def t_chain_replace_hex(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
f.write(u'{0:x} {1} u\'{2}\'\n'.format(
msg.ts, msg.user, msg.txt.\
replace(u'\\', u'\\\\').\
replace(u'\'', u'\\\'').\
replace(u'\r', u'\\r').\
replace(u'\n', u'\\n')).\
encode('utf-8'))
run(t_chain_replace_hex, 's_esc1_hex', comp_t, base_t)[2]
def t_chain_replace_hexjoin(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
f.write((u' '.join([hex(msg.ts)[2:], msg.user, msg.txt.\
replace(u'\\', u'\\\\').\
replace(u'\'', u'\\\'').\
replace(u'\r', u'\\r').\
replace(u'\n', u'\\n')])).\
encode('utf-8'))
run(t_chain_replace_hexjoin, 's_esc1_hexj', comp_t, base_t)[2]
def t_plain_fmt(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
f.write(u'{0} {1} {2}\n'.format(
msg.ts, msg.user, msg.txt).\
encode('utf-8'))
run(t_plain_fmt, 's_plain_fmt', comp_t, base_t)[2]
def t_plain_hex(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
f.write(u'{0:x} {1} {2}\n'.format(
msg.ts, msg.user, msg.txt).\
encode('utf-8'))
run(t_plain_hex, 's_plain_hex', comp_t, base_t)[2]
def t_plain_hexjoin(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
f.write((u' '.join(
[hex(msg.ts)[2:], msg.user, msg.txt]\
) + u'\n').encode('utf-8'))
run(t_plain_hexjoin, 's_plain_hexj', comp_t, base_t)[2]
def t_plain_join(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
f.write((u' '.join(
[str(msg.ts)[2:], msg.user, msg.txt]\
) + u'\n').encode('utf-8'))
run(t_plain_join, 's_plain_join', comp_t, base_t)[2]
# py[23] identical: 1.49 1.40
#
def t_enumerate_replace(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
txt = msg.txt
for n, bad in enumerate(r_from):
txt = txt.replace(bad, r_to[n])
f.write(u'{0} {1} u\'{2}\'\n'.format(
msg.ts, msg.user, txt).\
encode('utf-8'))
run(t_enumerate_replace, 's_esc2a', comp_t, base_t)
# py[23] identical: 1.41 1.22
#
def t_foreach_dict_replace(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
txt = msg.txt
for bad in r_from:
txt = txt.replace(bad, r_map[bad])
f.write(u'{0} {1} u\'{2}\'\n'.format(
msg.ts, msg.user, txt).\
encode('utf-8'))
run(t_foreach_dict_replace, 's_esc2b', comp_t, base_t)
# py[23] identical: 1.71 1.69
#
def t_foreach_idx_replace(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
txt = msg.txt
for bad in r_from:
txt = txt.replace(bad, r_to[r_from.index(bad)])
f.write(u'{0} {1} u\'{2}\'\n'.format(
msg.ts, msg.user, txt).\
encode('utf-8'))
run(t_foreach_idx_replace, 's_esc2c', comp_t, base_t)
# py[23] identical: 1.27 1.10
#
def t_enumerate_replaceif(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
txt = msg.txt
for n, bad in enumerate(r_from):
if bad in txt:
txt = txt.replace(bad, r_to[n])
f.write(u'{0} {1} u\'{2}\'\n'.format(
msg.ts, msg.user, txt).\
encode('utf-8'))
run(t_enumerate_replaceif, 's_esc3', comp_t, base_t)
# py[23] identical: 1.13 0.91
#
def t_replaceif_dict(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
txt = msg.txt
for bad in r_from:
if bad in txt:
txt = txt.replace(bad, r_map[bad])
f.write(u'{0} {1} u\'{2}\'\n'.format(
msg.ts, msg.user, txt).\
encode('utf-8'))
run(t_replaceif_dict, 's_esc3b', comp_t, base_t)
# py[23] identical: 1.13 0.89
#
def t_replaceif_dict_loc(fn):
with open(fn, 'wb') as f:
lr_from = r_from
lr_map = r_map
for msg in stream_msgs():
txt = msg.txt
for bad in lr_from:
if bad in txt:
txt = txt.replace(bad, lr_map[bad])
f.write(u'{0} {1} u\'{2}\'\n'.format(
msg.ts, msg.user, txt).\
encode('utf-8'))
run(t_replaceif_dict_loc, 's_esc3c', comp_t, base_t)
# py[23] identical: 3.19 3.19
#
def t_condwrite_always_dict(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
txt = u''
for ch in msg.txt:
if ch in r_from:
txt += r_map[ch]
else:
txt += ch
f.write(u'{0} {1} u\'{2}\'\n'.format(
msg.ts, msg.user, txt).\
encode('utf-8'))
run(t_condwrite_always_dict, 's_esc4', comp_t, base_t)
# py[23] identical: 3.06 2.81
#
def t_condwrite_ifneed_list(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
txt = msg.txt
for bad in r_from:
if bad in msg.txt:
txt = u''
for ch in msg.txt:
if ch in r_from:
txt += r_map[ch]
else:
txt += ch
break
f.write(u'{0} {1} u\'{2}\'\n'.format(
msg.ts, msg.user, txt).\
encode('utf-8'))
run(t_condwrite_ifneed_list, 's_esc5', comp_t, base_t)
# py[23] identical: 3.38 2.99
#
def t_condwrite_ifneed_dict(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
txt = msg.txt
for bad in r_from:
if bad in msg.txt:
txt = u''
for ch in msg.txt:
if ch in r_map:
txt += r_map[ch]
else:
txt += ch
break
f.write(u'{0} {1} u\'{2}\'\n'.format(
msg.ts, msg.user, txt).\
encode('utf-8'))
run(t_condwrite_ifneed_dict, 's_esc5b', comp_t, base_t)
# Differ: 0.92 0.57
#
def t_msgtxt_repr(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
f.write(u'{0} {1} {2}\n'.format(
msg.ts, msg.user, repr(msg.txt)).\
encode('utf-8'))
run(t_msgtxt_repr, 'txt_repr', comp_t, base_t)
# Differ: 0.92 0.57
#
def t_msgtxt_repr_u(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
f.write(u'{0} {1} u{2}\n'.format(
msg.ts, msg.user, repr(msg.txt).lstrip('u')).\
encode('utf-8'))
run(t_msgtxt_repr_u, 'txt_repr', comp_t, base_t)
# Differ: ? ?
#
def t_fakelist_repr(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
f.write(u'[{0}, u\'{1}\', {2}]\n'.format(
msg.ts, msg.user, repr(msg.txt)).\
encode('utf-8'))
run(t_fakelist_repr, 'lst_repr_f', comp_t, base_t)
# Differ: 1.07 0.83
#
def t_list_repr(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
f.write(u'{0}\n'.format(
repr([msg.ts, msg.user, msg.txt])).\
encode('utf-8'))
run(t_list_repr, 'lst_repr', comp_t, base_t)
# NG: 1.26 1.35
#
def t_uesc(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
f.write(u'{0}\n'.format(
u'{0} {1} {2}'.format(
msg.ts, msg.user, msg.txt).\
encode('unicode_escape')).\
encode('utf-8'))
run(t_uesc, 'uesc', comp_t, base_t)
# Too slow + insecure: 3.15 2.09
#
def t_pickle2(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
pickle.dump(msg, f, 2)
run(t_pickle2, 'p2', comp_t, base_t)
# py[23] identical: 2.38 2.41
#
def t_json_str(fn):
with open(fn, 'wb') as f:
for msg in stream_msgs():
f.write(u'{0}\n'.format(json.dumps([msg.ts, msg.user, msg.txt])).encode('utf-8'))
run(t_json_str, 'json1', comp_t, base_t)
# py[23] different + 2slow: 5.5 5.6
#
def t_json_fh(fn):
with open(fn, 'w') as f:
for msg in stream_msgs():
json.dump([msg.ts, msg.user, msg.txt], f)
run(t_json_fh, 'json2', comp_t, base_t)
"""
2.6.0.final.0 // Windows32 // Serialization
gen_txt_file 28.487s 0.000s 1.000 45024723 byte
stream_utf8 1.399s 0.000s 1.000 (1.399,1.399) 45024723 byte
stream_msgs 2.413s 1.014s 1.000 (2.413,1.399) 45024723 byte
chain_replace 7.011s 4.598s 1.000 (7.011,2.413) 63772299 byte
enumerate_replace 8.774s 4.176s 1.731 (2.413,4.598) 63772299 byte
foreach_dict_replace 8.316s 3.718s 1.541 (2.413,4.598) 63772299 byte
foreach_idx_replace 9.026s 4.428s 1.835 (2.413,4.598) 63772299 byte
enumerate_replaceif 8.295s 3.697s 1.532 (2.413,4.598) 63772299 byte
replaceif_dict 7.925s 3.327s 1.379 (2.413,4.598) 63772299 byte
replaceif_dict_loc 7.913s 3.315s 1.374 (2.413,4.598) 63772299 byte 1.37
condwrite_always_dict 13.482s 8.884s 3.682 (2.413,4.598) 63772299 byte
condwrite_ifneed_list 13.424s 8.826s 3.658 (2.413,4.598) 63772299 byte
condwrite_ifneed_dict 13.714s 9.116s 3.778 (2.413,4.598) 63772299 byte
msgtxt_repr 7.197s 2.599s 1.077 (2.413,4.598) 78927560 byte time size
fakelist_repr 7.248s 2.650s 1.098 (2.413,4.598) 86267592 byte 1.10, 1.35
list_repr 7.717s 3.119s 1.293 (2.413,4.598) 86267592 byte
uesc 8.259s 3.661s 1.517 (2.413,4.598) 75184474 byte
pickle2 13.299s 8.701s 3.606 (2.413,4.598) 128844837 byte
json_str 17.263s 12.665s 5.249 (2.413,4.598) 84047968 byte
json_fh 15.216s 10.618s 4.400 (2.413,4.598) 82999392 byte
3.6.2.final.0 // Windows64 // Serialization
gen_txt_file 42.314s 0.000s 1.000 45031335 byte
stream_utf8 0.780s 0.000s 1.000 (0.780,0.780) 45024723 byte
stream_msgs 1.920s 1.141s 1.000 (1.920,0.780) 45024723 byte
chain_replace 5.521s 3.600s 1.000 (5.521,1.920) 63772299 byte
enumerate_replace 6.637s 3.037s 1.581 (1.920,3.600) 63772299 byte
foreach_dict_replace 6.088s 2.487s 1.295 (1.920,3.600) 63772299 byte
foreach_idx_replace 7.056s 3.455s 1.799 (1.920,3.600) 63772299 byte
enumerate_replaceif 6.056s 2.455s 1.279 (1.920,3.600) 63772299 byte
replaceif_dict 5.555s 1.954s 1.018 (1.920,3.600) 63772299 byte
replaceif_dict_loc 5.599s 1.999s 1.041 (1.920,3.600) 63772299 byte 1.04
condwrite_always_dict 9.801s 6.201s 3.229 (1.920,3.600) 63772299 byte
condwrite_ifneed_list 9.608s 6.008s 3.128 (1.920,3.600) 63772299 byte
condwrite_ifneed_dict 9.960s 6.359s 3.312 (1.920,3.600) 63772299 byte
msgtxt_repr 4.681s 1.080s 0.563 (1.920,3.600) 65040655 byte time size
fakelist_repr 4.778s 1.177s 0.613 (1.920,3.600) 72380687 byte 0.61, 1.13
list_repr 5.337s 1.736s 0.904 (1.920,3.600) 71332111 byte
uesc 6.562s 2.962s 1.542 (1.920,3.600) 84223875 byte
pickle2 9.625s 6.025s 3.138 (1.920,3.600) 138282195 byte
json_str 9.394s 5.793s 3.017 (1.920,3.600) 84047968 byte
json_fh 19.304s 15.704s 8.178 (1.920,3.600) 82999392 byte
2.6.6.final.0 // Linux64 // Serialization
gen_txt_file 20.595s 0.000s 1.000 45013059 byte
stream_utf8 1.106s 0.000s 1.000 (1.106,1.106) 45013059 byte
stream_msgs 1.894s 0.787s 1.000 (1.894,1.106) 45013059 byte
chain_replace 4.846s 2.952s 1.000 (4.846,1.894) 63759315 byte
enumerate_replace 6.075s 3.123s 1.649 (1.894,2.952) 63759315 byte
foreach_dict_replace 5.888s 2.935s 1.550 (1.894,2.952) 63759315 byte
foreach_idx_replace 6.357s 3.404s 1.797 (1.894,2.952) 63759315 byte
enumerate_replaceif 5.744s 2.791s 1.474 (1.894,2.952) 63759315 byte
replaceif_dict 5.425s 2.472s 1.305 (1.894,2.952) 63759315 byte
replaceif_dict_loc 5.388s 2.435s 1.286 (1.894,2.952) 63759315 byte 1.29
condwrite_always_dict 9.718s 6.766s 3.572 (1.894,2.952) 63759315 byte
condwrite_ifneed_list 9.600s 6.647s 3.510 (1.894,2.952) 63759315 byte
condwrite_ifneed_dict 10.032s 7.080s 3.738 (1.894,2.952) 63759315 byte
msgtxt_repr 4.864s 1.912s 1.009 (1.894,2.952) 78917824 byte
fakelist_repr 4.903s 1.950s 1.030 (1.894,2.952) 86257856 byte 1.03
list_repr 5.204s 2.252s 1.189 (1.894,2.952) 86257856 byte
uesc 5.611s 2.658s 1.404 (1.894,2.952) 75173731 byte
pickle2 9.186s 6.234s 3.291 (1.894,2.952) 128833185 byte
json_str 12.212s 9.260s 4.889 (1.894,2.952) 84038028 byte
json_fh 9.866s 6.914s 3.650 (1.894,2.952) 82989452 byte
2.7.13.final.0 // Linux64 // Deserialization
stream_msgs 1.448s 0.609s 1.000 (1.448,0.838) 45004225 byte
dser_dummy 5.216s 3.769s 2.603 (1.448,1.448) 45004225 byte
d_split_ast_eval 13.481s 8.264s 1.000 (13.481,5.216) 66897095 byte
d_split_eval 13.723s 5.459s 1.046 (5.216,8.264) 66897095 byte
d_lst_eval 13.477s 5.212s 0.999 (5.216,8.264) 86246968 byte
d_lst_ast_eval 15.563s 7.298s 1.399 (5.216,8.264) 86246968 byte
d_split_repr_ast_e 11.164s 2.900s 1.000 (11.164,8.264) 78906936 byte
d_split_repr_eval 11.686s 3.421s 1.000 (11.686,8.264) 78906936 byte
2.7.13.final.0 // Linux64 // Serialization
gen lines 15.888s
stream_utf8 0.817s 0.000s 1.000 (0.817,0.817) 45012467 byte
stream_msgs 1.409s 0.592s 1.000 (1.409,0.817) 45012467 byte
chain_replace 3.641s 2.232s 1.000 (3.641,1.409) 63760646 byte
enumerate_replace 4.327s 2.095s 1.487 (1.409,2.232) 63760646 byte
foreach_dict_replace 4.213s 1.981s 1.406 (1.409,2.232) 63760646 byte
foreach_idx_replace 4.638s 2.406s 1.707 (1.409,2.232) 63760646 byte
enumerate_replaceif 4.021s 1.789s 1.269 (1.409,2.232) 63760646 byte
replaceif_dict x.xxxs 1.614s 1.145 (1.409,2.232) 63760646 byte
replaceif_dict_loc 3.820s 1.588s 1.127 (1.409,2.232) 63760646 byte 1.13
condwrite_always_dict 6.726s 4.494s 3.189 (1.409,2.232) 63760646 byte
condwrite_ifneed_list 6.541s 4.309s 3.058 (1.409,2.232) 63760646 byte
condwrite_ifneed_dict 6.999s 4.767s 3.382 (1.409,2.232) 63760646 byte
msgtxt_repr x.xxxs 1.448s 1.028 (1.409,2.232) 78921907 byte
fakelist_repr x.xxxs 1.449s 1.028 (1.409,2.232) 86261939 byte 1.03
list_repr x.xxxs 1.658s 1.177 (1.409,2.232) 86261939 byte
uesc 4.005s 1.773s 1.258 (1.409,2.232) 75177292 byte
pickle2 6.674s 4.442s 3.152 (1.409,2.232) 128832607 byte
json_str 5.591s 3.359s 2.383 (1.409,2.232) 84040832 byte
json_fh 9.970s 7.738s 5.490 (1.409,2.232) 82992256 byte
3.5.3.final.0 // Linux64 // Deserialization of py3 data
stream_msgs 1.290s 0.731s 1.000 (1.290,0.559) 45021263 byte
dser_dummy 2.868s 1.578s 1.224 (1.290,1.290) 45021263 byte
d_split_ast_eval 8.752s 5.884s 1.000 (8.752,2.868) 66914436 byte
d_split_eval 10.635s 4.751s 1.657 (2.868,5.884) 66914436 byte
d_lst_eval 13.332s 7.448s 2.597 (2.868,5.884) 72378292 byte
d_lst_ast_eval 12.996s 7.112s 2.480 (2.868,5.884) 72378292 byte
d_split_repr_ast_e 8.567s 2.683s 1.000 (8.567,5.884) 65038260 byte
d_split_repr_eval 10.520s 4.636s 1.000 (10.520,5.884) 65038260 byte
3.5.3.final.0 // Linux64 // Deserialization of py2 data
stream_msgs 1.263s 0.717s 1.000 (1.263,0.547) 45004225 byte
dser_dummy 2.849s 1.586s 1.255 (1.263,1.263) 45004225 byte
d_split_ast_eval 8.709s 5.860s 1.000 (8.709,2.849) 66897095 byte
d_split_eval 10.544s 4.684s 1.644 (2.849,5.860) 66897095 byte
d_lst_eval 11.550s 5.690s 1.997 (2.849,5.860) 86246968 byte
d_lst_ast_eval 11.752s 5.892s 2.068 (2.849,5.860) 86246968 byte
d_split_repr_ast_e 7.190s 1.331s 1.000 (7.190,5.860) 78906936 byte
d_split_repr_eval 8.862s 3.002s 1.000 (8.862,5.860) 78906936 byte
3.5.3.final.0 // Linux64 // Serialization
gen lines 25.908s
stream_utf8 0.551s 0.000s 1.000 (0.551,0.551) 45012467 byte
stream_msgs 1.298s 0.747s 1.000 (1.298,0.551) 45012467 byte
chain_replace 3.204s 1.906s 1.000 (3.204,1.298) 63760646 byte
enumerate_replace 3.727s 1.821s 1.403 (1.298,1.906) 63760646 byte
foreach_dict_replace 3.488s 1.582s 1.219 (1.298,1.906) 63760646 byte
foreach_idx_replace 4.098s 2.192s 1.689 (1.298,1.906) 63760646 byte
enumerate_replaceif 3.330s 1.424s 1.097 (1.298,1.906) 63760646 byte
replaceif_dict x.xxxs 1.104s 0.864 (1.298,1.906) 63760646 byte
replaceif_dict_loc x.xxxs 1.085s 0.849 (1.298,1.906) 63760646 byte 0.85
condwrite_always_dict 6.041s 4.136s 3.186 (1.298,1.906) 63760646 byte
condwrite_ifneed_list 5.556s 3.651s 2.813 (1.298,1.906) 63760646 byte
condwrite_ifneed_dict 5.783s 3.877s 2.987 (1.298,1.906) 63760646 byte
msgtxt_repr x.xxxs 0.710s 0.556 (1.298,1.906) 65030226 byte
fakelist_repr x.xxxs 0.760s 0.595 (1.298,1.906) 65030226 byte 0.60
list_repr x.xxxs 1.051s 0.823 (1.298,1.906) 71321682 byte
uesc 3.656s 1.751s 1.349 (1.298,1.906) 84220410 byte
pickle2 4.623s 2.717s 2.093 (1.298,1.906) 138269939 byte
json_str 5.036s 3.130s 2.411 (1.298,1.906) 84040832 byte
json_fh 9.213s 7.307s 5.630 (1.298,1.906) 82992256 byte
# check which serializations are identical across python versions
{ { find -type f | while read fn; do [[ $(head -n 3 "$fn" | wc -c) -gt 300 ]] && { sha256sum "$fn"; continue; }; head -n 1 "$fn" | grep -qE '[^a-zA-Z][a-zA-Z]{8}[^a-zA-Z]' || { sha256sum "$fn"; continue; }; printf '%s %s\n' "$(sed -r 's/([^a-zA-Z])[a-zA-Z]{8}([^a-zA-Z])/\1\2/' < "$fn" | sha256sum)" "$fn"; done; sleep 1; echo; } | tee /dev/stderr; } | sort
## TEST
with open('/dev/shm/py2.repr', 'rb') as f: eval(f.read().decode('utf-8'))
with open('/dev/shm/py3.repr', 'rb') as f: eval(f.read().decode('utf-8'))
with open('/dev/shm/py2.repr', 'rb') as f: __import__('json').dumps(eval(f.read().decode('utf-8')))
with open('/dev/shm/py3.repr', 'rb') as f: __import__('json').dumps(eval(f.read().decode('utf-8')))
## RESULT
Python 2.7.13 (default, Nov 24 2017, 17:33:09) Linux
Python 2.6 (r26:66721, Oct 2 2008, 11:35:03) Windows
[5, u'eyFEfvUb', u'\u591agt\u6d41LD GlONE\'r/u\u5b87FZX\u3057A\\iz iKhz ep"pOzwvA \\ah']
[5, 'RHrVSKcB', '\xe5\xa4\x9agt\xe6\xb5\x81LD GlONE\'r/u\xe5\xae\x87FZX\xe3\x81\x97A\\iz iKhz ep"pOzwvA \\ah']
'[5, "eyFEfvUb", "\\u591agt\\u6d41LD GlONE\'r/u\\u5b87FZX\\u3057A\\\\iz iKhz ep\\"pOzwvA \\\\ah"]'
'[5, "RHrVSKcB", "\\u591agt\\u6d41LD GlONE\'r/u\\u5b87FZX\\u3057A\\\\iz iKhz ep\\"pOzwvA \\\\ah"]'
## RESULT
Python 3.5.3 (default, Jan 19 2017, 14:11:04) Linux
Python 3.6.2 (v3.6.2:5fd33b5, Jul 8 2017, 04:57:36) Windows
[5, 'eyFEfvUb', '多gt流LD GlONE\'r/u宇FZXしA\\iz iKhz ep"pOzwvA \\ah']
[5, 'RHrVSKcB', '多gt流LD GlONE\'r/u宇FZXしA\\iz iKhz ep"pOzwvA \\ah']
'[5, "eyFEfvUb", "\\u591agt\\u6d41LD GlONE\'r/u\\u5b87FZX\\u3057A\\\\iz iKhz ep\\"pOzwvA \\\\ah"]'
'[5, "RHrVSKcB", "\\u591agt\\u6d41LD GlONE\'r/u\\u5b87FZX\\u3057A\\\\iz iKhz ep\\"pOzwvA \\\\ah"]'
## TEST
with open('/dev/shm/py2.repr', 'rb') as f: v2=eval(f.read().decode('utf-8'))[2]
with open('/dev/shm/py3.repr', 'rb') as f: v3=eval(f.read().decode('utf-8'))[2]
if v2==v3: print('eval(py2repr) == eval(py3repr)')
## py2 FAIL, fix:
if isinstance(v3,str): v3=v3.decode('utf-8')
## py3 SUCCESS
## TEST
with open('/dev/shm/py2.repr', 'rb') as f: v2=__import__('ast').literal_eval(f.read().decode('utf-8'))[2]
with open('/dev/shm/py3.repr', 'rb') as f: v3=__import__('ast').literal_eval(f.read().decode('utf-8'))[2]
if v2==v3: print('eval(py2repr) == eval(py3repr)')
# same results as with native eval
currently,
only d_split_ast_eval and d_split_eval succeed with py2 on py3 data
all deserializations succeed with py3 on py2 data
"""