mirror of https://github.com/9001/r0c.git
876 lines
28 KiB
Python
876 lines
28 KiB
Python
#!/usr/bin/env python3
|
|
# coding: utf-8
|
|
from __future__ import print_function
|
|
|
|
|
|
import os
|
|
import sys
|
|
import platform
|
|
import random
|
|
import struct
|
|
import time
|
|
import json
|
|
try:
|
|
import cPickle as pickle # py2
|
|
except:
|
|
import pickle # py3
|
|
|
|
|
|
ITERATIONS = 2
|
|
#ITERATIONS = 1
|
|
|
|
|
|
class Message(object):
|
|
def __init__(self, ts, user, txt):
|
|
self.ts = ts # int timestamp
|
|
self.user = user # str username
|
|
self.txt = txt # str text
|
|
|
|
|
|
def result(desc, sec, sec2, mul, comp_t, base_t, fn=None):
|
|
sz = os.path.getsize(fn) if fn else 'x'
|
|
print(u'{0:24} {1:8.3f}s {2:8.3f}s {3:8.3f} ({4:.3f},{5:.3f}) {6:9} byte'.format(
|
|
desc, sec, sec2, mul, comp_t, base_t, sz))
|
|
|
|
|
|
""" run a test function, compare time against comp_t after subtracting base_t """
|
|
def run(func, write_to, comp_t=None, base_t=None, iterations=ITERATIONS):
|
|
mtd = 99999999
|
|
desc = func.__name__[2:]
|
|
is_windows = platform.system() == 'Windows'
|
|
if not is_windows:
|
|
print()
|
|
|
|
best = []
|
|
for iteration in range(iterations):
|
|
t0 = time.time()
|
|
func(write_to)
|
|
td = time.time() - t0
|
|
|
|
base_tv = base_t or td
|
|
comp_tv = comp_t or td
|
|
rel_tv = td - base_tv
|
|
mul = rel_tv / comp_tv if comp_t else 1
|
|
if mtd > td:
|
|
mtd = td
|
|
best = [desc, td, rel_tv, mul, comp_tv, base_tv, write_to]
|
|
if not is_windows:
|
|
print('\033[A', end='')
|
|
result(*best)
|
|
|
|
if is_windows:
|
|
result(*best)
|
|
|
|
return [ desc, write_to, mtd ]
|
|
|
|
|
|
import struct
|
|
all_chars = b''
|
|
for n in range(1,128):
|
|
all_chars += struct.pack('B', n)
|
|
all_chars = all_chars.decode('utf-8').replace('\r', '\\r').replace('\n', '\\n') + u'宇多田ヒカル桜流し'
|
|
some_chars = letters = u'宇多田ヒカル桜流しABCDEFGHIJKLMNOPQRSTUVWXYZ\\\'\'\'"/abcdefghijklmnopqrstuvwxyz '
|
|
|
|
def gen_sentence():
|
|
charset = some_chars
|
|
ret = u''
|
|
retlen = random.randint(4, 64)
|
|
for n in range(retlen):
|
|
ret += random.choice(charset)
|
|
if not ret:
|
|
ret = u'a'
|
|
return ret.strip()
|
|
|
|
|
|
users = []
|
|
letters = u'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
|
|
for n in range(12):
|
|
ret = u''
|
|
for n in range(8):
|
|
ret += random.choice(letters)
|
|
users.append(ret)
|
|
|
|
|
|
def stream_txt():
|
|
with open('txt', 'rb') as f:
|
|
for ln in f:
|
|
yield ln.decode('utf-8').rstrip()
|
|
|
|
|
|
def stream_msgs_plain(dontcare=None):
|
|
iuser = 0
|
|
with open('txt', 'rb') as f:
|
|
for n, ln in enumerate(f):
|
|
txt = ln.decode('utf-8').rstrip()
|
|
yield Message(n, users[iuser], txt)
|
|
iuser += 1
|
|
if iuser >= len(users):
|
|
iuser = 0
|
|
|
|
|
|
def stream_msg_newlines(dontcare=None):
|
|
iuser = 0
|
|
with open('txt', 'rb') as f:
|
|
for n, ln in enumerate(f):
|
|
txt = ln.decode('utf-8').rstrip()
|
|
mod = n % 32
|
|
if mod == 31:
|
|
mid = int(len(txt) / 2)
|
|
txt = u'{0}\n{1}'.format(txt[:mid], txt[mid:])
|
|
if mod == 15:
|
|
mid = int(len(txt) / 2)
|
|
txt = u'{0}\r{1}'.format(txt[:mid], txt[mid:])
|
|
|
|
yield Message(n, users[iuser], txt)
|
|
iuser += 1
|
|
if iuser >= len(users):
|
|
iuser = 0
|
|
|
|
|
|
stream_msgs = stream_msgs_plain
|
|
|
|
|
|
def t_gen_txt_file(dontcare):
|
|
try:
|
|
memes = xrange
|
|
except:
|
|
memes = range
|
|
|
|
with open('txt', 'wb') as f:
|
|
for n in memes(1048576):
|
|
if n % 8192 == 0:
|
|
print('{0} {1:.2f}%'.format(n, n*100.0/1048576))
|
|
f.write(u'{0}\n'.format(gen_sentence()).encode('utf-8'))
|
|
|
|
if not os.path.isfile('txt'):
|
|
run(t_gen_txt_file, 'txt')
|
|
|
|
|
|
py_ver = '.'.join([str(x) for x in sys.version_info])
|
|
bitness = struct.calcsize('P') * 8
|
|
host_os = platform.system()
|
|
print('\n\n{0} // {1}{2} // Deserialization'.format(py_ver, host_os, bitness))
|
|
|
|
|
|
### takeaways:
|
|
#
|
|
# enumerate(list) is slower than looking up each item in a dict
|
|
#
|
|
# chaining .replace beats most alternatives
|
|
#
|
|
# checking whether a string contains a character before trying to replace it saves surprisingly little time
|
|
#
|
|
# iterating over characters in source and conditionally writing ch or \ch is ~30% the speed of chained .replace
|
|
#
|
|
# loading global variables into a method before repeatedly using it saves a tiny amount of time
|
|
#
|
|
|
|
|
|
def t_stream_utf8(fn):
|
|
for ln in stream_txt():
|
|
pass
|
|
|
|
td_utf8 = run(t_stream_utf8, 'txt')[2]
|
|
base_t = td_utf8
|
|
comp_t = td_utf8
|
|
|
|
|
|
|
|
def t_stream_msgs(fn):
|
|
for msg in stream_msgs():
|
|
pass
|
|
|
|
td_msgs = run(t_stream_msgs, 'txt', None, comp_t)[2]
|
|
base_t = td_msgs
|
|
comp_t = td_msgs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if os.path.isfile('lst_repr_f'):
|
|
|
|
def verify_deserialization(deserializer, fn):
|
|
for m1, m2 in zip(stream_msgs(), deserializer(fn)):
|
|
if m1.ts != m2.ts \
|
|
or m1.txt != m2.txt \
|
|
or len(m1.user) != len(m2.user):
|
|
print('# FAIL\n# [{0}] [{1}] [{2}]\n# [{3}] [{4}] [{5}] {6} {7} {8} {9}\n'.format(
|
|
m1.ts, repr(m1.txt), m1.user,
|
|
m2.ts, repr(m2.txt), m2.user,
|
|
m1.ts == m2.ts,
|
|
len(m1.user) == len(m2.user),
|
|
repr(m1.txt) == repr(m2.txt),
|
|
m1.txt == m2.txt))
|
|
return False
|
|
return True
|
|
|
|
def t_dser_dummy(fn):
|
|
verify_deserialization(stream_msgs, 'x')
|
|
|
|
td_dser_dummy = run(t_dser_dummy, 'txt', comp_t, base_t)[2]
|
|
base_t = td_dser_dummy
|
|
comp_t = td_dser_dummy
|
|
|
|
|
|
for redo in range(2):
|
|
|
|
def t_d_split_ast_eval(fn):
|
|
import ast
|
|
def subroutine(fn):
|
|
with open(fn, 'rb') as f:
|
|
for ln in f:
|
|
ts, user, txt = ln.decode('utf-8').rstrip().split(' ', 2)
|
|
yield Message(int(ts), user, ast.literal_eval(txt))
|
|
verify_deserialization(subroutine, fn)
|
|
td_d_split_ast_eval = run(t_d_split_ast_eval, 's_esc3c', None, comp_t)[2]
|
|
|
|
if redo == 0:
|
|
comp_t = td_d_split_ast_eval - base_t
|
|
|
|
|
|
|
|
def t_d_split_eval(fn):
|
|
def subroutine(fn):
|
|
with open(fn, 'rb') as f:
|
|
for ln in f:
|
|
ts, user, txt = ln.decode('utf-8').rstrip().split(' ', 2)
|
|
yield Message(int(ts), user, eval(txt))
|
|
verify_deserialization(subroutine, fn)
|
|
run(t_d_split_eval, 's_esc3c', comp_t, base_t)[2]
|
|
|
|
|
|
|
|
def t_d_lst_eval(fn):
|
|
def subroutine(fn):
|
|
with open(fn, 'rb') as f:
|
|
for ln in f:
|
|
yield Message(*eval(ln.decode('utf-8').rstrip()))
|
|
verify_deserialization(subroutine, fn)
|
|
run(t_d_lst_eval, 'lst_repr_f', comp_t, base_t)[2]
|
|
|
|
|
|
|
|
def t_d_lst_ast_eval(fn):
|
|
import ast
|
|
def subroutine(fn):
|
|
with open(fn, 'rb') as f:
|
|
for ln in f:
|
|
yield Message(*ast.literal_eval(ln.decode('utf-8').rstrip()))
|
|
verify_deserialization(subroutine, fn)
|
|
run(t_d_lst_ast_eval, 'lst_repr_f', comp_t, base_t)[2]
|
|
|
|
|
|
|
|
def t_d_split_repr_ast_e(fn):
|
|
import ast
|
|
def subroutine(fn):
|
|
with open(fn, 'rb') as f:
|
|
for ln in f:
|
|
ts, user, txt = ln.decode('utf-8').rstrip().split(' ', 2)
|
|
yield Message(int(ts), user, ast.literal_eval(txt))
|
|
verify_deserialization(subroutine, fn)
|
|
run(t_d_split_repr_ast_e, 'txt_repr', comp_t, base_t)[2]
|
|
|
|
|
|
|
|
def t_d_split_repr_eval(fn):
|
|
def subroutine(fn):
|
|
with open(fn, 'rb') as f:
|
|
for ln in f:
|
|
ts, user, txt = ln.decode('utf-8').rstrip().split(' ', 2)
|
|
yield Message(int(ts), user, eval(txt))
|
|
verify_deserialization(subroutine, fn)
|
|
run(t_d_split_repr_eval, 'txt_repr', comp_t, base_t)[2]
|
|
|
|
|
|
|
|
print()
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print('\n\n{0} // {1}{2} // Serialization'.format(py_ver, host_os, bitness))
|
|
|
|
r_from = u'\\\'\r\n'
|
|
r_to = [ u'\\\\', u'\\\'', u'\\r', u'\\n' ]
|
|
r_map = {
|
|
u'\\': u'\\\\',
|
|
u'\'': u'\\\'',
|
|
u'\r': u'\\r',
|
|
u'\n': u'\\n'
|
|
}
|
|
|
|
|
|
|
|
# py[23] identical: 1.00 1.00
|
|
#
|
|
def t_chain_replace(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
f.write(u'{0} {1} u\'{2}\'\n'.format(
|
|
msg.ts, msg.user, msg.txt.\
|
|
replace(u'\\', u'\\\\').\
|
|
replace(u'\'', u'\\\'').\
|
|
replace(u'\r', u'\\r').\
|
|
replace(u'\n', u'\\n')).\
|
|
encode('utf-8'))
|
|
|
|
td_chain = run(t_chain_replace, 's_esc1', None, comp_t)[2]
|
|
comp_t = td_chain - base_t
|
|
|
|
|
|
|
|
def t_chain_replace_hex(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
f.write(u'{0:x} {1} u\'{2}\'\n'.format(
|
|
msg.ts, msg.user, msg.txt.\
|
|
replace(u'\\', u'\\\\').\
|
|
replace(u'\'', u'\\\'').\
|
|
replace(u'\r', u'\\r').\
|
|
replace(u'\n', u'\\n')).\
|
|
encode('utf-8'))
|
|
|
|
run(t_chain_replace_hex, 's_esc1_hex', comp_t, base_t)[2]
|
|
|
|
|
|
|
|
def t_chain_replace_hexjoin(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
f.write((u' '.join([hex(msg.ts)[2:], msg.user, msg.txt.\
|
|
replace(u'\\', u'\\\\').\
|
|
replace(u'\'', u'\\\'').\
|
|
replace(u'\r', u'\\r').\
|
|
replace(u'\n', u'\\n')])).\
|
|
encode('utf-8'))
|
|
|
|
run(t_chain_replace_hexjoin, 's_esc1_hexj', comp_t, base_t)[2]
|
|
|
|
|
|
|
|
def t_plain_fmt(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
f.write(u'{0} {1} {2}\n'.format(
|
|
msg.ts, msg.user, msg.txt).\
|
|
encode('utf-8'))
|
|
|
|
run(t_plain_fmt, 's_plain_fmt', comp_t, base_t)[2]
|
|
|
|
|
|
|
|
def t_plain_hex(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
f.write(u'{0:x} {1} {2}\n'.format(
|
|
msg.ts, msg.user, msg.txt).\
|
|
encode('utf-8'))
|
|
|
|
run(t_plain_hex, 's_plain_hex', comp_t, base_t)[2]
|
|
|
|
|
|
|
|
def t_plain_hexjoin(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
f.write((u' '.join(
|
|
[hex(msg.ts)[2:], msg.user, msg.txt]\
|
|
) + u'\n').encode('utf-8'))
|
|
|
|
run(t_plain_hexjoin, 's_plain_hexj', comp_t, base_t)[2]
|
|
|
|
|
|
def t_plain_join(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
f.write((u' '.join(
|
|
[str(msg.ts)[2:], msg.user, msg.txt]\
|
|
) + u'\n').encode('utf-8'))
|
|
|
|
run(t_plain_join, 's_plain_join', comp_t, base_t)[2]
|
|
|
|
|
|
|
|
# py[23] identical: 1.49 1.40
|
|
#
|
|
def t_enumerate_replace(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
txt = msg.txt
|
|
for n, bad in enumerate(r_from):
|
|
txt = txt.replace(bad, r_to[n])
|
|
f.write(u'{0} {1} u\'{2}\'\n'.format(
|
|
msg.ts, msg.user, txt).\
|
|
encode('utf-8'))
|
|
|
|
run(t_enumerate_replace, 's_esc2a', comp_t, base_t)
|
|
|
|
|
|
|
|
# py[23] identical: 1.41 1.22
|
|
#
|
|
def t_foreach_dict_replace(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
txt = msg.txt
|
|
for bad in r_from:
|
|
txt = txt.replace(bad, r_map[bad])
|
|
f.write(u'{0} {1} u\'{2}\'\n'.format(
|
|
msg.ts, msg.user, txt).\
|
|
encode('utf-8'))
|
|
|
|
run(t_foreach_dict_replace, 's_esc2b', comp_t, base_t)
|
|
|
|
|
|
|
|
# py[23] identical: 1.71 1.69
|
|
#
|
|
def t_foreach_idx_replace(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
txt = msg.txt
|
|
for bad in r_from:
|
|
txt = txt.replace(bad, r_to[r_from.index(bad)])
|
|
f.write(u'{0} {1} u\'{2}\'\n'.format(
|
|
msg.ts, msg.user, txt).\
|
|
encode('utf-8'))
|
|
|
|
run(t_foreach_idx_replace, 's_esc2c', comp_t, base_t)
|
|
|
|
|
|
|
|
# py[23] identical: 1.27 1.10
|
|
#
|
|
def t_enumerate_replaceif(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
txt = msg.txt
|
|
for n, bad in enumerate(r_from):
|
|
if bad in txt:
|
|
txt = txt.replace(bad, r_to[n])
|
|
f.write(u'{0} {1} u\'{2}\'\n'.format(
|
|
msg.ts, msg.user, txt).\
|
|
encode('utf-8'))
|
|
|
|
run(t_enumerate_replaceif, 's_esc3', comp_t, base_t)
|
|
|
|
|
|
|
|
# py[23] identical: 1.13 0.91
|
|
#
|
|
def t_replaceif_dict(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
txt = msg.txt
|
|
for bad in r_from:
|
|
if bad in txt:
|
|
txt = txt.replace(bad, r_map[bad])
|
|
f.write(u'{0} {1} u\'{2}\'\n'.format(
|
|
msg.ts, msg.user, txt).\
|
|
encode('utf-8'))
|
|
|
|
run(t_replaceif_dict, 's_esc3b', comp_t, base_t)
|
|
|
|
|
|
|
|
# py[23] identical: 1.13 0.89
|
|
#
|
|
def t_replaceif_dict_loc(fn):
|
|
with open(fn, 'wb') as f:
|
|
lr_from = r_from
|
|
lr_map = r_map
|
|
for msg in stream_msgs():
|
|
txt = msg.txt
|
|
for bad in lr_from:
|
|
if bad in txt:
|
|
txt = txt.replace(bad, lr_map[bad])
|
|
f.write(u'{0} {1} u\'{2}\'\n'.format(
|
|
msg.ts, msg.user, txt).\
|
|
encode('utf-8'))
|
|
|
|
run(t_replaceif_dict_loc, 's_esc3c', comp_t, base_t)
|
|
|
|
|
|
|
|
# py[23] identical: 3.19 3.19
|
|
#
|
|
def t_condwrite_always_dict(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
txt = u''
|
|
for ch in msg.txt:
|
|
if ch in r_from:
|
|
txt += r_map[ch]
|
|
else:
|
|
txt += ch
|
|
f.write(u'{0} {1} u\'{2}\'\n'.format(
|
|
msg.ts, msg.user, txt).\
|
|
encode('utf-8'))
|
|
|
|
run(t_condwrite_always_dict, 's_esc4', comp_t, base_t)
|
|
|
|
|
|
|
|
# py[23] identical: 3.06 2.81
|
|
#
|
|
def t_condwrite_ifneed_list(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
txt = msg.txt
|
|
for bad in r_from:
|
|
if bad in msg.txt:
|
|
txt = u''
|
|
for ch in msg.txt:
|
|
if ch in r_from:
|
|
txt += r_map[ch]
|
|
else:
|
|
txt += ch
|
|
break
|
|
f.write(u'{0} {1} u\'{2}\'\n'.format(
|
|
msg.ts, msg.user, txt).\
|
|
encode('utf-8'))
|
|
|
|
run(t_condwrite_ifneed_list, 's_esc5', comp_t, base_t)
|
|
|
|
|
|
|
|
# py[23] identical: 3.38 2.99
|
|
#
|
|
def t_condwrite_ifneed_dict(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
txt = msg.txt
|
|
for bad in r_from:
|
|
if bad in msg.txt:
|
|
txt = u''
|
|
for ch in msg.txt:
|
|
if ch in r_map:
|
|
txt += r_map[ch]
|
|
else:
|
|
txt += ch
|
|
break
|
|
f.write(u'{0} {1} u\'{2}\'\n'.format(
|
|
msg.ts, msg.user, txt).\
|
|
encode('utf-8'))
|
|
|
|
run(t_condwrite_ifneed_dict, 's_esc5b', comp_t, base_t)
|
|
|
|
|
|
|
|
# Differ: 0.92 0.57
|
|
#
|
|
def t_msgtxt_repr(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
f.write(u'{0} {1} {2}\n'.format(
|
|
msg.ts, msg.user, repr(msg.txt)).\
|
|
encode('utf-8'))
|
|
|
|
run(t_msgtxt_repr, 'txt_repr', comp_t, base_t)
|
|
|
|
|
|
|
|
# Differ: 0.92 0.57
|
|
#
|
|
def t_msgtxt_repr_u(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
f.write(u'{0} {1} u{2}\n'.format(
|
|
msg.ts, msg.user, repr(msg.txt).lstrip('u')).\
|
|
encode('utf-8'))
|
|
|
|
run(t_msgtxt_repr_u, 'txt_repr', comp_t, base_t)
|
|
|
|
|
|
|
|
# Differ: ? ?
|
|
#
|
|
def t_fakelist_repr(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
f.write(u'[{0}, u\'{1}\', {2}]\n'.format(
|
|
msg.ts, msg.user, repr(msg.txt)).\
|
|
encode('utf-8'))
|
|
|
|
run(t_fakelist_repr, 'lst_repr_f', comp_t, base_t)
|
|
|
|
|
|
|
|
# Differ: 1.07 0.83
|
|
#
|
|
def t_list_repr(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
f.write(u'{0}\n'.format(
|
|
repr([msg.ts, msg.user, msg.txt])).\
|
|
encode('utf-8'))
|
|
|
|
run(t_list_repr, 'lst_repr', comp_t, base_t)
|
|
|
|
|
|
|
|
# NG: 1.26 1.35
|
|
#
|
|
def t_uesc(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
f.write(u'{0}\n'.format(
|
|
u'{0} {1} {2}'.format(
|
|
msg.ts, msg.user, msg.txt).\
|
|
encode('unicode_escape')).\
|
|
encode('utf-8'))
|
|
|
|
run(t_uesc, 'uesc', comp_t, base_t)
|
|
|
|
|
|
|
|
# Too slow + insecure: 3.15 2.09
|
|
#
|
|
def t_pickle2(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
pickle.dump(msg, f, 2)
|
|
|
|
run(t_pickle2, 'p2', comp_t, base_t)
|
|
|
|
|
|
|
|
# py[23] identical: 2.38 2.41
|
|
#
|
|
def t_json_str(fn):
|
|
with open(fn, 'wb') as f:
|
|
for msg in stream_msgs():
|
|
f.write(u'{0}\n'.format(json.dumps([msg.ts, msg.user, msg.txt])).encode('utf-8'))
|
|
|
|
run(t_json_str, 'json1', comp_t, base_t)
|
|
|
|
|
|
|
|
# py[23] different + 2slow: 5.5 5.6
|
|
#
|
|
def t_json_fh(fn):
|
|
with open(fn, 'w') as f:
|
|
for msg in stream_msgs():
|
|
json.dump([msg.ts, msg.user, msg.txt], f)
|
|
|
|
run(t_json_fh, 'json2', comp_t, base_t)
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
2.6.0.final.0 // Windows32 // Serialization
|
|
gen_txt_file 28.487s 0.000s 1.000 45024723 byte
|
|
stream_utf8 1.399s 0.000s 1.000 (1.399,1.399) 45024723 byte
|
|
stream_msgs 2.413s 1.014s 1.000 (2.413,1.399) 45024723 byte
|
|
chain_replace 7.011s 4.598s 1.000 (7.011,2.413) 63772299 byte
|
|
enumerate_replace 8.774s 4.176s 1.731 (2.413,4.598) 63772299 byte
|
|
foreach_dict_replace 8.316s 3.718s 1.541 (2.413,4.598) 63772299 byte
|
|
foreach_idx_replace 9.026s 4.428s 1.835 (2.413,4.598) 63772299 byte
|
|
enumerate_replaceif 8.295s 3.697s 1.532 (2.413,4.598) 63772299 byte
|
|
replaceif_dict 7.925s 3.327s 1.379 (2.413,4.598) 63772299 byte
|
|
replaceif_dict_loc 7.913s 3.315s 1.374 (2.413,4.598) 63772299 byte 1.37
|
|
condwrite_always_dict 13.482s 8.884s 3.682 (2.413,4.598) 63772299 byte
|
|
condwrite_ifneed_list 13.424s 8.826s 3.658 (2.413,4.598) 63772299 byte
|
|
condwrite_ifneed_dict 13.714s 9.116s 3.778 (2.413,4.598) 63772299 byte
|
|
msgtxt_repr 7.197s 2.599s 1.077 (2.413,4.598) 78927560 byte time size
|
|
fakelist_repr 7.248s 2.650s 1.098 (2.413,4.598) 86267592 byte 1.10, 1.35
|
|
list_repr 7.717s 3.119s 1.293 (2.413,4.598) 86267592 byte
|
|
uesc 8.259s 3.661s 1.517 (2.413,4.598) 75184474 byte
|
|
pickle2 13.299s 8.701s 3.606 (2.413,4.598) 128844837 byte
|
|
json_str 17.263s 12.665s 5.249 (2.413,4.598) 84047968 byte
|
|
json_fh 15.216s 10.618s 4.400 (2.413,4.598) 82999392 byte
|
|
|
|
3.6.2.final.0 // Windows64 // Serialization
|
|
gen_txt_file 42.314s 0.000s 1.000 45031335 byte
|
|
stream_utf8 0.780s 0.000s 1.000 (0.780,0.780) 45024723 byte
|
|
stream_msgs 1.920s 1.141s 1.000 (1.920,0.780) 45024723 byte
|
|
chain_replace 5.521s 3.600s 1.000 (5.521,1.920) 63772299 byte
|
|
enumerate_replace 6.637s 3.037s 1.581 (1.920,3.600) 63772299 byte
|
|
foreach_dict_replace 6.088s 2.487s 1.295 (1.920,3.600) 63772299 byte
|
|
foreach_idx_replace 7.056s 3.455s 1.799 (1.920,3.600) 63772299 byte
|
|
enumerate_replaceif 6.056s 2.455s 1.279 (1.920,3.600) 63772299 byte
|
|
replaceif_dict 5.555s 1.954s 1.018 (1.920,3.600) 63772299 byte
|
|
replaceif_dict_loc 5.599s 1.999s 1.041 (1.920,3.600) 63772299 byte 1.04
|
|
condwrite_always_dict 9.801s 6.201s 3.229 (1.920,3.600) 63772299 byte
|
|
condwrite_ifneed_list 9.608s 6.008s 3.128 (1.920,3.600) 63772299 byte
|
|
condwrite_ifneed_dict 9.960s 6.359s 3.312 (1.920,3.600) 63772299 byte
|
|
msgtxt_repr 4.681s 1.080s 0.563 (1.920,3.600) 65040655 byte time size
|
|
fakelist_repr 4.778s 1.177s 0.613 (1.920,3.600) 72380687 byte 0.61, 1.13
|
|
list_repr 5.337s 1.736s 0.904 (1.920,3.600) 71332111 byte
|
|
uesc 6.562s 2.962s 1.542 (1.920,3.600) 84223875 byte
|
|
pickle2 9.625s 6.025s 3.138 (1.920,3.600) 138282195 byte
|
|
json_str 9.394s 5.793s 3.017 (1.920,3.600) 84047968 byte
|
|
json_fh 19.304s 15.704s 8.178 (1.920,3.600) 82999392 byte
|
|
|
|
2.6.6.final.0 // Linux64 // Serialization
|
|
gen_txt_file 20.595s 0.000s 1.000 45013059 byte
|
|
stream_utf8 1.106s 0.000s 1.000 (1.106,1.106) 45013059 byte
|
|
stream_msgs 1.894s 0.787s 1.000 (1.894,1.106) 45013059 byte
|
|
chain_replace 4.846s 2.952s 1.000 (4.846,1.894) 63759315 byte
|
|
enumerate_replace 6.075s 3.123s 1.649 (1.894,2.952) 63759315 byte
|
|
foreach_dict_replace 5.888s 2.935s 1.550 (1.894,2.952) 63759315 byte
|
|
foreach_idx_replace 6.357s 3.404s 1.797 (1.894,2.952) 63759315 byte
|
|
enumerate_replaceif 5.744s 2.791s 1.474 (1.894,2.952) 63759315 byte
|
|
replaceif_dict 5.425s 2.472s 1.305 (1.894,2.952) 63759315 byte
|
|
replaceif_dict_loc 5.388s 2.435s 1.286 (1.894,2.952) 63759315 byte 1.29
|
|
condwrite_always_dict 9.718s 6.766s 3.572 (1.894,2.952) 63759315 byte
|
|
condwrite_ifneed_list 9.600s 6.647s 3.510 (1.894,2.952) 63759315 byte
|
|
condwrite_ifneed_dict 10.032s 7.080s 3.738 (1.894,2.952) 63759315 byte
|
|
msgtxt_repr 4.864s 1.912s 1.009 (1.894,2.952) 78917824 byte
|
|
fakelist_repr 4.903s 1.950s 1.030 (1.894,2.952) 86257856 byte 1.03
|
|
list_repr 5.204s 2.252s 1.189 (1.894,2.952) 86257856 byte
|
|
uesc 5.611s 2.658s 1.404 (1.894,2.952) 75173731 byte
|
|
pickle2 9.186s 6.234s 3.291 (1.894,2.952) 128833185 byte
|
|
json_str 12.212s 9.260s 4.889 (1.894,2.952) 84038028 byte
|
|
json_fh 9.866s 6.914s 3.650 (1.894,2.952) 82989452 byte
|
|
|
|
2.7.13.final.0 // Linux64 // Deserialization
|
|
stream_msgs 1.448s 0.609s 1.000 (1.448,0.838) 45004225 byte
|
|
dser_dummy 5.216s 3.769s 2.603 (1.448,1.448) 45004225 byte
|
|
d_split_ast_eval 13.481s 8.264s 1.000 (13.481,5.216) 66897095 byte
|
|
d_split_eval 13.723s 5.459s 1.046 (5.216,8.264) 66897095 byte
|
|
d_lst_eval 13.477s 5.212s 0.999 (5.216,8.264) 86246968 byte
|
|
d_lst_ast_eval 15.563s 7.298s 1.399 (5.216,8.264) 86246968 byte
|
|
d_split_repr_ast_e 11.164s 2.900s 1.000 (11.164,8.264) 78906936 byte
|
|
d_split_repr_eval 11.686s 3.421s 1.000 (11.686,8.264) 78906936 byte
|
|
|
|
2.7.13.final.0 // Linux64 // Serialization
|
|
gen lines 15.888s
|
|
stream_utf8 0.817s 0.000s 1.000 (0.817,0.817) 45012467 byte
|
|
stream_msgs 1.409s 0.592s 1.000 (1.409,0.817) 45012467 byte
|
|
chain_replace 3.641s 2.232s 1.000 (3.641,1.409) 63760646 byte
|
|
enumerate_replace 4.327s 2.095s 1.487 (1.409,2.232) 63760646 byte
|
|
foreach_dict_replace 4.213s 1.981s 1.406 (1.409,2.232) 63760646 byte
|
|
foreach_idx_replace 4.638s 2.406s 1.707 (1.409,2.232) 63760646 byte
|
|
enumerate_replaceif 4.021s 1.789s 1.269 (1.409,2.232) 63760646 byte
|
|
replaceif_dict x.xxxs 1.614s 1.145 (1.409,2.232) 63760646 byte
|
|
replaceif_dict_loc 3.820s 1.588s 1.127 (1.409,2.232) 63760646 byte 1.13
|
|
condwrite_always_dict 6.726s 4.494s 3.189 (1.409,2.232) 63760646 byte
|
|
condwrite_ifneed_list 6.541s 4.309s 3.058 (1.409,2.232) 63760646 byte
|
|
condwrite_ifneed_dict 6.999s 4.767s 3.382 (1.409,2.232) 63760646 byte
|
|
msgtxt_repr x.xxxs 1.448s 1.028 (1.409,2.232) 78921907 byte
|
|
fakelist_repr x.xxxs 1.449s 1.028 (1.409,2.232) 86261939 byte 1.03
|
|
list_repr x.xxxs 1.658s 1.177 (1.409,2.232) 86261939 byte
|
|
uesc 4.005s 1.773s 1.258 (1.409,2.232) 75177292 byte
|
|
pickle2 6.674s 4.442s 3.152 (1.409,2.232) 128832607 byte
|
|
json_str 5.591s 3.359s 2.383 (1.409,2.232) 84040832 byte
|
|
json_fh 9.970s 7.738s 5.490 (1.409,2.232) 82992256 byte
|
|
|
|
3.5.3.final.0 // Linux64 // Deserialization of py3 data
|
|
stream_msgs 1.290s 0.731s 1.000 (1.290,0.559) 45021263 byte
|
|
dser_dummy 2.868s 1.578s 1.224 (1.290,1.290) 45021263 byte
|
|
d_split_ast_eval 8.752s 5.884s 1.000 (8.752,2.868) 66914436 byte
|
|
d_split_eval 10.635s 4.751s 1.657 (2.868,5.884) 66914436 byte
|
|
d_lst_eval 13.332s 7.448s 2.597 (2.868,5.884) 72378292 byte
|
|
d_lst_ast_eval 12.996s 7.112s 2.480 (2.868,5.884) 72378292 byte
|
|
d_split_repr_ast_e 8.567s 2.683s 1.000 (8.567,5.884) 65038260 byte
|
|
d_split_repr_eval 10.520s 4.636s 1.000 (10.520,5.884) 65038260 byte
|
|
|
|
3.5.3.final.0 // Linux64 // Deserialization of py2 data
|
|
stream_msgs 1.263s 0.717s 1.000 (1.263,0.547) 45004225 byte
|
|
dser_dummy 2.849s 1.586s 1.255 (1.263,1.263) 45004225 byte
|
|
d_split_ast_eval 8.709s 5.860s 1.000 (8.709,2.849) 66897095 byte
|
|
d_split_eval 10.544s 4.684s 1.644 (2.849,5.860) 66897095 byte
|
|
d_lst_eval 11.550s 5.690s 1.997 (2.849,5.860) 86246968 byte
|
|
d_lst_ast_eval 11.752s 5.892s 2.068 (2.849,5.860) 86246968 byte
|
|
d_split_repr_ast_e 7.190s 1.331s 1.000 (7.190,5.860) 78906936 byte
|
|
d_split_repr_eval 8.862s 3.002s 1.000 (8.862,5.860) 78906936 byte
|
|
|
|
3.5.3.final.0 // Linux64 // Serialization
|
|
gen lines 25.908s
|
|
stream_utf8 0.551s 0.000s 1.000 (0.551,0.551) 45012467 byte
|
|
stream_msgs 1.298s 0.747s 1.000 (1.298,0.551) 45012467 byte
|
|
chain_replace 3.204s 1.906s 1.000 (3.204,1.298) 63760646 byte
|
|
enumerate_replace 3.727s 1.821s 1.403 (1.298,1.906) 63760646 byte
|
|
foreach_dict_replace 3.488s 1.582s 1.219 (1.298,1.906) 63760646 byte
|
|
foreach_idx_replace 4.098s 2.192s 1.689 (1.298,1.906) 63760646 byte
|
|
enumerate_replaceif 3.330s 1.424s 1.097 (1.298,1.906) 63760646 byte
|
|
replaceif_dict x.xxxs 1.104s 0.864 (1.298,1.906) 63760646 byte
|
|
replaceif_dict_loc x.xxxs 1.085s 0.849 (1.298,1.906) 63760646 byte 0.85
|
|
condwrite_always_dict 6.041s 4.136s 3.186 (1.298,1.906) 63760646 byte
|
|
condwrite_ifneed_list 5.556s 3.651s 2.813 (1.298,1.906) 63760646 byte
|
|
condwrite_ifneed_dict 5.783s 3.877s 2.987 (1.298,1.906) 63760646 byte
|
|
msgtxt_repr x.xxxs 0.710s 0.556 (1.298,1.906) 65030226 byte
|
|
fakelist_repr x.xxxs 0.760s 0.595 (1.298,1.906) 65030226 byte 0.60
|
|
list_repr x.xxxs 1.051s 0.823 (1.298,1.906) 71321682 byte
|
|
uesc 3.656s 1.751s 1.349 (1.298,1.906) 84220410 byte
|
|
pickle2 4.623s 2.717s 2.093 (1.298,1.906) 138269939 byte
|
|
json_str 5.036s 3.130s 2.411 (1.298,1.906) 84040832 byte
|
|
json_fh 9.213s 7.307s 5.630 (1.298,1.906) 82992256 byte
|
|
|
|
|
|
|
|
# check which serializations are identical across python versions
|
|
{ { find -type f | while read fn; do [[ $(head -n 3 "$fn" | wc -c) -gt 300 ]] && { sha256sum "$fn"; continue; }; head -n 1 "$fn" | grep -qE '[^a-zA-Z][a-zA-Z]{8}[^a-zA-Z]' || { sha256sum "$fn"; continue; }; printf '%s %s\n' "$(sed -r 's/([^a-zA-Z])[a-zA-Z]{8}([^a-zA-Z])/\1\2/' < "$fn" | sha256sum)" "$fn"; done; sleep 1; echo; } | tee /dev/stderr; } | sort
|
|
|
|
|
|
|
|
## TEST
|
|
with open('/dev/shm/py2.repr', 'rb') as f: eval(f.read().decode('utf-8'))
|
|
with open('/dev/shm/py3.repr', 'rb') as f: eval(f.read().decode('utf-8'))
|
|
with open('/dev/shm/py2.repr', 'rb') as f: __import__('json').dumps(eval(f.read().decode('utf-8')))
|
|
with open('/dev/shm/py3.repr', 'rb') as f: __import__('json').dumps(eval(f.read().decode('utf-8')))
|
|
|
|
## RESULT
|
|
Python 2.7.13 (default, Nov 24 2017, 17:33:09) Linux
|
|
Python 2.6 (r26:66721, Oct 2 2008, 11:35:03) Windows
|
|
[5, u'eyFEfvUb', u'\u591agt\u6d41LD GlONE\'r/u\u5b87FZX\u3057A\\iz iKhz ep"pOzwvA \\ah']
|
|
[5, 'RHrVSKcB', '\xe5\xa4\x9agt\xe6\xb5\x81LD GlONE\'r/u\xe5\xae\x87FZX\xe3\x81\x97A\\iz iKhz ep"pOzwvA \\ah']
|
|
'[5, "eyFEfvUb", "\\u591agt\\u6d41LD GlONE\'r/u\\u5b87FZX\\u3057A\\\\iz iKhz ep\\"pOzwvA \\\\ah"]'
|
|
'[5, "RHrVSKcB", "\\u591agt\\u6d41LD GlONE\'r/u\\u5b87FZX\\u3057A\\\\iz iKhz ep\\"pOzwvA \\\\ah"]'
|
|
|
|
## RESULT
|
|
Python 3.5.3 (default, Jan 19 2017, 14:11:04) Linux
|
|
Python 3.6.2 (v3.6.2:5fd33b5, Jul 8 2017, 04:57:36) Windows
|
|
[5, 'eyFEfvUb', '多gt流LD GlONE\'r/u宇FZXしA\\iz iKhz ep"pOzwvA \\ah']
|
|
[5, 'RHrVSKcB', '多gt流LD GlONE\'r/u宇FZXしA\\iz iKhz ep"pOzwvA \\ah']
|
|
'[5, "eyFEfvUb", "\\u591agt\\u6d41LD GlONE\'r/u\\u5b87FZX\\u3057A\\\\iz iKhz ep\\"pOzwvA \\\\ah"]'
|
|
'[5, "RHrVSKcB", "\\u591agt\\u6d41LD GlONE\'r/u\\u5b87FZX\\u3057A\\\\iz iKhz ep\\"pOzwvA \\\\ah"]'
|
|
|
|
|
|
|
|
## TEST
|
|
with open('/dev/shm/py2.repr', 'rb') as f: v2=eval(f.read().decode('utf-8'))[2]
|
|
with open('/dev/shm/py3.repr', 'rb') as f: v3=eval(f.read().decode('utf-8'))[2]
|
|
if v2==v3: print('eval(py2repr) == eval(py3repr)')
|
|
|
|
## py2 FAIL, fix:
|
|
if isinstance(v3,str): v3=v3.decode('utf-8')
|
|
|
|
## py3 SUCCESS
|
|
|
|
|
|
|
|
## TEST
|
|
with open('/dev/shm/py2.repr', 'rb') as f: v2=__import__('ast').literal_eval(f.read().decode('utf-8'))[2]
|
|
with open('/dev/shm/py3.repr', 'rb') as f: v3=__import__('ast').literal_eval(f.read().decode('utf-8'))[2]
|
|
if v2==v3: print('eval(py2repr) == eval(py3repr)')
|
|
|
|
# same results as with native eval
|
|
|
|
|
|
|
|
currently,
|
|
only d_split_ast_eval and d_split_eval succeed with py2 on py3 data
|
|
all deserializations succeed with py3 on py2 data
|
|
"""
|