r0c/test/prototyping/serialization.py

#!/usr/bin/env python3
# coding: utf-8
from __future__ import print_function


import os
import sys
import platform
import random
import struct
import time
import json
try:
	import cPickle as pickle  # py2
except:
	import pickle  # py3


ITERATIONS = 2
#ITERATIONS = 1


class Message(object):
	def __init__(self, ts, user, txt):
		self.ts   = ts          # int timestamp
		self.user = user        # str username
		self.txt  = txt         # str text


def result(desc, sec, sec2, mul, comp_t, base_t, fn=None):
	sz = os.path.getsize(fn) if fn else 'x'
	print(u'{0:24} {1:8.3f}s  {2:8.3f}s  {3:8.3f} ({4:.3f},{5:.3f})  {6:9} byte'.format(
		desc, sec, sec2, mul, comp_t, base_t, sz))


""" run a test function, compare time against comp_t after subtracting base_t """
def run(func, write_to, comp_t=None, base_t=None, iterations=ITERATIONS):
	mtd = 99999999
	desc = func.__name__[2:]
	is_windows = platform.system() == 'Windows'
	if not is_windows:
		print()

	best = []
	for iteration in range(iterations):
		t0 = time.time()
		func(write_to)
		td = time.time() - t0

		base_tv = base_t or td
		comp_tv = comp_t or td
		rel_tv = td - base_tv
		mul = rel_tv / comp_tv if comp_t else 1
		if mtd > td:
			mtd = td
			best = [desc, td, rel_tv, mul, comp_tv, base_tv, write_to]
			if not is_windows:
				print('\033[A', end='')
				result(*best)

	if is_windows:
		result(*best)

	return [ desc, write_to, mtd ]


import struct
all_chars = b''
for n in range(1,128):
	all_chars += struct.pack('B', n)
all_chars = all_chars.decode('utf-8').replace('\r', '\\r').replace('\n', '\\n') + u'宇多田ヒカル桜流し'
some_chars = letters = u'宇多田ヒカル桜流しABCDEFGHIJKLMNOPQRSTUVWXYZ\\\'\'\'"/abcdefghijklmnopqrstuvwxyz        '

def gen_sentence():
	charset = some_chars
	ret = u''
	retlen = random.randint(4, 64)
	for n in range(retlen):
		ret += random.choice(charset)
	if not ret:
		ret = u'a'
	return ret.strip()


users = []
letters = u'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
for n in range(12):
	ret = u''
	for n in range(8):
		ret += random.choice(letters)
	users.append(ret)


def stream_txt():
	with open('txt', 'rb') as f:
		for ln in f:
			yield ln.decode('utf-8').rstrip()


def stream_msgs_plain(dontcare=None):
	iuser = 0
	with open('txt', 'rb') as f:
		for n, ln in enumerate(f):
			txt = ln.decode('utf-8').rstrip()
			yield Message(n, users[iuser], txt)
			iuser += 1
			if iuser >= len(users):
				iuser = 0


def stream_msg_newlines(dontcare=None):
	iuser = 0
	with open('txt', 'rb') as f:
		for n, ln in enumerate(f):
			txt = ln.decode('utf-8').rstrip()
			mod = n % 32
			if mod == 31:
				mid = int(len(txt) / 2)
				txt = u'{0}\n{1}'.format(txt[:mid], txt[mid:])
			if mod == 15:
				mid = int(len(txt) / 2)
				txt = u'{0}\r{1}'.format(txt[:mid], txt[mid:])

			yield Message(n, users[iuser], txt)
			iuser += 1
			if iuser >= len(users):
				iuser = 0


stream_msgs = stream_msgs_plain


def t_gen_txt_file(dontcare):
	try:
		memes = xrange
	except:
		memes = range

	with open('txt', 'wb') as f:
		for n in memes(1048576):
			if n % 8192 == 0:
				print('{0}  {1:.2f}%'.format(n, n*100.0/1048576))
			f.write(u'{0}\n'.format(gen_sentence()).encode('utf-8'))

if not os.path.isfile('txt'):
	run(t_gen_txt_file, 'txt')


py_ver = '.'.join([str(x) for x in sys.version_info])
bitness = struct.calcsize('P') * 8
host_os = platform.system()
print('\n\n{0} // {1}{2} // Deserialization'.format(py_ver, host_os, bitness))


### takeaways:
#
# enumerate(list)  is slower than looking up each item in a dict
#
# chaining .replace beats most alternatives
#
# checking whether a string contains a character before trying to replace it saves surprisingly little time
#
# iterating over characters in source and conditionally writing ch or \ch is ~30% the speed of chained .replace
#
# loading global variables into a method before repeatedly using it saves a tiny amount of time
#


def t_stream_utf8(fn):
	for ln in stream_txt():
		pass

td_utf8 = run(t_stream_utf8, 'txt')[2]
base_t = td_utf8
comp_t = td_utf8


def t_stream_msgs(fn):
	for msg in stream_msgs():
		pass

td_msgs = run(t_stream_msgs, 'txt', None, comp_t)[2]
base_t = td_msgs
comp_t = td_msgs


if os.path.isfile('lst_repr_f'):

	def verify_deserialization(deserializer, fn):
		for m1, m2 in zip(stream_msgs(), deserializer(fn)):
			if m1.ts != m2.ts \
			or m1.txt != m2.txt \
			or len(m1.user) != len(m2.user):
				print('# FAIL\n# [{0}] [{1}] [{2}]\n# [{3}] [{4}] [{5}]  {6} {7} {8} {9}\n'.format(
					m1.ts, repr(m1.txt), m1.user,
					m2.ts, repr(m2.txt), m2.user,
					m1.ts == m2.ts,
					len(m1.user) == len(m2.user),
					repr(m1.txt) == repr(m2.txt),
					m1.txt == m2.txt))
				return False
		return True

	def t_dser_dummy(fn):
		verify_deserialization(stream_msgs, 'x')

	td_dser_dummy = run(t_dser_dummy, 'txt', comp_t, base_t)[2]
	base_t = td_dser_dummy
	comp_t = td_dser_dummy


	for redo in range(2):

		def t_d_split_ast_eval(fn):
			import ast
			def subroutine(fn):
				with open(fn, 'rb') as f:
					for ln in f:
						ts, user, txt = ln.decode('utf-8').rstrip().split(' ', 2)
						yield Message(int(ts), user, ast.literal_eval(txt))
			verify_deserialization(subroutine, fn)
		td_d_split_ast_eval = run(t_d_split_ast_eval, 's_esc3c', None, comp_t)[2]

		if redo == 0:
			comp_t = td_d_split_ast_eval - base_t


		def t_d_split_eval(fn):
			def subroutine(fn):
				with open(fn, 'rb') as f:
					for ln in f:
						ts, user, txt = ln.decode('utf-8').rstrip().split(' ', 2)
						yield Message(int(ts), user, eval(txt))
			verify_deserialization(subroutine, fn)
		run(t_d_split_eval, 's_esc3c', comp_t, base_t)[2]


		def t_d_lst_eval(fn):
			def subroutine(fn):
				with open(fn, 'rb') as f:
					for ln in f:
						yield Message(*eval(ln.decode('utf-8').rstrip()))
			verify_deserialization(subroutine, fn)
		run(t_d_lst_eval, 'lst_repr_f', comp_t, base_t)[2]


		def t_d_lst_ast_eval(fn):
			import ast
			def subroutine(fn):
				with open(fn, 'rb') as f:
					for ln in f:
						yield Message(*ast.literal_eval(ln.decode('utf-8').rstrip()))
			verify_deserialization(subroutine, fn)
		run(t_d_lst_ast_eval, 'lst_repr_f', comp_t, base_t)[2]


		def t_d_split_repr_ast_e(fn):
			import ast
			def subroutine(fn):
				with open(fn, 'rb') as f:
					for ln in f:
						ts, user, txt = ln.decode('utf-8').rstrip().split(' ', 2)
						yield Message(int(ts), user, ast.literal_eval(txt))
			verify_deserialization(subroutine, fn)
		run(t_d_split_repr_ast_e, 'txt_repr', comp_t, base_t)[2]


		def t_d_split_repr_eval(fn):
			def subroutine(fn):
				with open(fn, 'rb') as f:
					for ln in f:
						ts, user, txt = ln.decode('utf-8').rstrip().split(' ', 2)
						yield Message(int(ts), user, eval(txt))
			verify_deserialization(subroutine, fn)
		run(t_d_split_repr_eval, 'txt_repr', comp_t, base_t)[2]


		print()

	sys.exit(0)


print('\n\n{0} // {1}{2} // Serialization'.format(py_ver, host_os, bitness))

r_from = u'\\\'\r\n'
r_to = [ u'\\\\', u'\\\'', u'\\r', u'\\n' ]
r_map = {
	u'\\': u'\\\\',
	u'\'': u'\\\'',
	u'\r': u'\\r',
	u'\n': u'\\n'
}


# py[23] identical:  1.00  1.00
#
def t_chain_replace(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			f.write(u'{0} {1} u\'{2}\'\n'.format(
				msg.ts, msg.user, msg.txt.\
				replace(u'\\', u'\\\\').\
				replace(u'\'', u'\\\'').\
				replace(u'\r', u'\\r').\
				replace(u'\n', u'\\n')).\
				encode('utf-8'))

td_chain = run(t_chain_replace, 's_esc1', None, comp_t)[2]
comp_t = td_chain - base_t


def t_chain_replace_hex(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			f.write(u'{0:x} {1} u\'{2}\'\n'.format(
				msg.ts, msg.user, msg.txt.\
				replace(u'\\', u'\\\\').\
				replace(u'\'', u'\\\'').\
				replace(u'\r', u'\\r').\
				replace(u'\n', u'\\n')).\
				encode('utf-8'))

run(t_chain_replace_hex, 's_esc1_hex', comp_t, base_t)[2]


def t_chain_replace_hexjoin(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			f.write((u' '.join([hex(msg.ts)[2:], msg.user, msg.txt.\
				replace(u'\\', u'\\\\').\
				replace(u'\'', u'\\\'').\
				replace(u'\r', u'\\r').\
				replace(u'\n', u'\\n')])).\
				encode('utf-8'))

run(t_chain_replace_hexjoin, 's_esc1_hexj', comp_t, base_t)[2]


def t_plain_fmt(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			f.write(u'{0} {1} {2}\n'.format(
				msg.ts, msg.user, msg.txt).\
				encode('utf-8'))

run(t_plain_fmt, 's_plain_fmt', comp_t, base_t)[2]


def t_plain_hex(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			f.write(u'{0:x} {1} {2}\n'.format(
				msg.ts, msg.user, msg.txt).\
				encode('utf-8'))

run(t_plain_hex, 's_plain_hex', comp_t, base_t)[2]


def t_plain_hexjoin(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			f.write((u' '.join(
				[hex(msg.ts)[2:], msg.user, msg.txt]\
				) + u'\n').encode('utf-8'))

run(t_plain_hexjoin, 's_plain_hexj', comp_t, base_t)[2]


def t_plain_join(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			f.write((u' '.join(
				[str(msg.ts)[2:], msg.user, msg.txt]\
				) + u'\n').encode('utf-8'))

run(t_plain_join, 's_plain_join', comp_t, base_t)[2]


# py[23] identical:  1.49  1.40
#
def t_enumerate_replace(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			txt = msg.txt
			for n, bad in enumerate(r_from):
				txt = txt.replace(bad, r_to[n])
			f.write(u'{0} {1} u\'{2}\'\n'.format(
				msg.ts, msg.user, txt).\
				encode('utf-8'))

run(t_enumerate_replace, 's_esc2a', comp_t, base_t)


# py[23] identical:  1.41  1.22
#
def t_foreach_dict_replace(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			txt = msg.txt
			for bad in r_from:
				txt = txt.replace(bad, r_map[bad])
			f.write(u'{0} {1} u\'{2}\'\n'.format(
				msg.ts, msg.user, txt).\
				encode('utf-8'))

run(t_foreach_dict_replace, 's_esc2b', comp_t, base_t)


# py[23] identical:  1.71  1.69
#
def t_foreach_idx_replace(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			txt = msg.txt
			for bad in r_from:
				txt = txt.replace(bad, r_to[r_from.index(bad)])
			f.write(u'{0} {1} u\'{2}\'\n'.format(
				msg.ts, msg.user, txt).\
				encode('utf-8'))

run(t_foreach_idx_replace, 's_esc2c', comp_t, base_t)


# py[23] identical:  1.27  1.10
#
def t_enumerate_replaceif(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			txt = msg.txt
			for n, bad in enumerate(r_from):
				if bad in txt:
					txt = txt.replace(bad, r_to[n])
			f.write(u'{0} {1} u\'{2}\'\n'.format(
				msg.ts, msg.user, txt).\
				encode('utf-8'))

run(t_enumerate_replaceif, 's_esc3', comp_t, base_t)


# py[23] identical:  1.13  0.91
#
def t_replaceif_dict(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			txt = msg.txt
			for bad in r_from:
				if bad in txt:
					txt = txt.replace(bad, r_map[bad])
			f.write(u'{0} {1} u\'{2}\'\n'.format(
				msg.ts, msg.user, txt).\
				encode('utf-8'))

run(t_replaceif_dict, 's_esc3b', comp_t, base_t)


# py[23] identical:  1.13  0.89
#
def t_replaceif_dict_loc(fn):
	with open(fn, 'wb') as f:
		lr_from = r_from
		lr_map = r_map
		for msg in stream_msgs():
			txt = msg.txt
			for bad in lr_from:
				if bad in txt:
					txt = txt.replace(bad, lr_map[bad])
			f.write(u'{0} {1} u\'{2}\'\n'.format(
				msg.ts, msg.user, txt).\
				encode('utf-8'))

run(t_replaceif_dict_loc, 's_esc3c', comp_t, base_t)


# py[23] identical:  3.19  3.19
#
def t_condwrite_always_dict(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			txt = u''
			for ch in msg.txt:
				if ch in r_from:
					txt += r_map[ch]
				else:
					txt += ch
			f.write(u'{0} {1} u\'{2}\'\n'.format(
				msg.ts, msg.user, txt).\
				encode('utf-8'))

run(t_condwrite_always_dict, 's_esc4', comp_t, base_t)


# py[23] identical:  3.06  2.81
#
def t_condwrite_ifneed_list(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			txt = msg.txt
			for bad in r_from:
				if bad in msg.txt:
					txt = u''
					for ch in msg.txt:
						if ch in r_from:
							txt += r_map[ch]
						else:
							txt += ch
					break
			f.write(u'{0} {1} u\'{2}\'\n'.format(
				msg.ts, msg.user, txt).\
				encode('utf-8'))

run(t_condwrite_ifneed_list, 's_esc5', comp_t, base_t)


# py[23] identical:  3.38  2.99
#
def t_condwrite_ifneed_dict(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			txt = msg.txt
			for bad in r_from:
				if bad in msg.txt:
					txt = u''
					for ch in msg.txt:
						if ch in r_map:
							txt += r_map[ch]
						else:
							txt += ch
					break
			f.write(u'{0} {1} u\'{2}\'\n'.format(
				msg.ts, msg.user, txt).\
				encode('utf-8'))

run(t_condwrite_ifneed_dict, 's_esc5b', comp_t, base_t)


# Differ:  0.92  0.57
#
def t_msgtxt_repr(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			f.write(u'{0} {1} {2}\n'.format(
				msg.ts, msg.user, repr(msg.txt)).\
				encode('utf-8'))

run(t_msgtxt_repr, 'txt_repr', comp_t, base_t)


# Differ:  0.92  0.57
#
def t_msgtxt_repr_u(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			f.write(u'{0} {1} u{2}\n'.format(
				msg.ts, msg.user, repr(msg.txt).lstrip('u')).\
				encode('utf-8'))

run(t_msgtxt_repr_u, 'txt_repr', comp_t, base_t)


# Differ:  ?  ?
#
def t_fakelist_repr(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			f.write(u'[{0}, u\'{1}\', {2}]\n'.format(
				msg.ts, msg.user, repr(msg.txt)).\
				encode('utf-8'))

run(t_fakelist_repr, 'lst_repr_f', comp_t, base_t)


# Differ:  1.07  0.83
#
def t_list_repr(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			f.write(u'{0}\n'.format(
				repr([msg.ts, msg.user, msg.txt])).\
				encode('utf-8'))

run(t_list_repr, 'lst_repr', comp_t, base_t)


# NG:  1.26  1.35
#
def t_uesc(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			f.write(u'{0}\n'.format(
				u'{0} {1} {2}'.format(
					msg.ts, msg.user, msg.txt).\
					encode('unicode_escape')).\
				encode('utf-8'))

run(t_uesc, 'uesc', comp_t, base_t)


# Too slow + insecure:  3.15  2.09
#
def t_pickle2(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			pickle.dump(msg, f, 2)

run(t_pickle2, 'p2', comp_t, base_t)


# py[23] identical:  2.38  2.41
#
def t_json_str(fn):
	with open(fn, 'wb') as f:
		for msg in stream_msgs():
			f.write(u'{0}\n'.format(json.dumps([msg.ts, msg.user, msg.txt])).encode('utf-8'))

run(t_json_str, 'json1', comp_t, base_t)


# py[23] different + 2slow:  5.5  5.6
#
def t_json_fh(fn):
	with open(fn, 'w') as f:
		for msg in stream_msgs():
			json.dump([msg.ts, msg.user, msg.txt], f)

run(t_json_fh, 'json2', comp_t, base_t)


"""
2.6.0.final.0 // Windows32 // Serialization
gen_txt_file               28.487s     0.000s     1.000                 45024723 byte
stream_utf8                 1.399s     0.000s     1.000 (1.399,1.399)   45024723 byte
stream_msgs                 2.413s     1.014s     1.000 (2.413,1.399)   45024723 byte
chain_replace               7.011s     4.598s     1.000 (7.011,2.413)   63772299 byte
enumerate_replace           8.774s     4.176s     1.731 (2.413,4.598)   63772299 byte
foreach_dict_replace        8.316s     3.718s     1.541 (2.413,4.598)   63772299 byte
foreach_idx_replace         9.026s     4.428s     1.835 (2.413,4.598)   63772299 byte
enumerate_replaceif         8.295s     3.697s     1.532 (2.413,4.598)   63772299 byte
replaceif_dict              7.925s     3.327s     1.379 (2.413,4.598)   63772299 byte
replaceif_dict_loc          7.913s     3.315s     1.374 (2.413,4.598)   63772299 byte  1.37
condwrite_always_dict      13.482s     8.884s     3.682 (2.413,4.598)   63772299 byte
condwrite_ifneed_list      13.424s     8.826s     3.658 (2.413,4.598)   63772299 byte
condwrite_ifneed_dict      13.714s     9.116s     3.778 (2.413,4.598)   63772299 byte
msgtxt_repr                 7.197s     2.599s     1.077 (2.413,4.598)   78927560 byte  time  size
fakelist_repr               7.248s     2.650s     1.098 (2.413,4.598)   86267592 byte  1.10, 1.35
list_repr                   7.717s     3.119s     1.293 (2.413,4.598)   86267592 byte
uesc                        8.259s     3.661s     1.517 (2.413,4.598)   75184474 byte
pickle2                    13.299s     8.701s     3.606 (2.413,4.598)  128844837 byte
json_str                   17.263s    12.665s     5.249 (2.413,4.598)   84047968 byte
json_fh                    15.216s    10.618s     4.400 (2.413,4.598)   82999392 byte

3.6.2.final.0 // Windows64 // Serialization
gen_txt_file               42.314s     0.000s     1.000                 45031335 byte
stream_utf8                 0.780s     0.000s     1.000 (0.780,0.780)   45024723 byte
stream_msgs                 1.920s     1.141s     1.000 (1.920,0.780)   45024723 byte
chain_replace               5.521s     3.600s     1.000 (5.521,1.920)   63772299 byte
enumerate_replace           6.637s     3.037s     1.581 (1.920,3.600)   63772299 byte
foreach_dict_replace        6.088s     2.487s     1.295 (1.920,3.600)   63772299 byte
foreach_idx_replace         7.056s     3.455s     1.799 (1.920,3.600)   63772299 byte
enumerate_replaceif         6.056s     2.455s     1.279 (1.920,3.600)   63772299 byte
replaceif_dict              5.555s     1.954s     1.018 (1.920,3.600)   63772299 byte
replaceif_dict_loc          5.599s     1.999s     1.041 (1.920,3.600)   63772299 byte  1.04
condwrite_always_dict       9.801s     6.201s     3.229 (1.920,3.600)   63772299 byte
condwrite_ifneed_list       9.608s     6.008s     3.128 (1.920,3.600)   63772299 byte
condwrite_ifneed_dict       9.960s     6.359s     3.312 (1.920,3.600)   63772299 byte
msgtxt_repr                 4.681s     1.080s     0.563 (1.920,3.600)   65040655 byte  time  size
fakelist_repr               4.778s     1.177s     0.613 (1.920,3.600)   72380687 byte  0.61, 1.13
list_repr                   5.337s     1.736s     0.904 (1.920,3.600)   71332111 byte
uesc                        6.562s     2.962s     1.542 (1.920,3.600)   84223875 byte
pickle2                     9.625s     6.025s     3.138 (1.920,3.600)  138282195 byte
json_str                    9.394s     5.793s     3.017 (1.920,3.600)   84047968 byte
json_fh                    19.304s    15.704s     8.178 (1.920,3.600)   82999392 byte

2.6.6.final.0 // Linux64 // Serialization
gen_txt_file               20.595s     0.000s     1.000                 45013059 byte
stream_utf8                 1.106s     0.000s     1.000 (1.106,1.106)   45013059 byte
stream_msgs                 1.894s     0.787s     1.000 (1.894,1.106)   45013059 byte
chain_replace               4.846s     2.952s     1.000 (4.846,1.894)   63759315 byte
enumerate_replace           6.075s     3.123s     1.649 (1.894,2.952)   63759315 byte
foreach_dict_replace        5.888s     2.935s     1.550 (1.894,2.952)   63759315 byte
foreach_idx_replace         6.357s     3.404s     1.797 (1.894,2.952)   63759315 byte
enumerate_replaceif         5.744s     2.791s     1.474 (1.894,2.952)   63759315 byte
replaceif_dict              5.425s     2.472s     1.305 (1.894,2.952)   63759315 byte
replaceif_dict_loc          5.388s     2.435s     1.286 (1.894,2.952)   63759315 byte  1.29
condwrite_always_dict       9.718s     6.766s     3.572 (1.894,2.952)   63759315 byte
condwrite_ifneed_list       9.600s     6.647s     3.510 (1.894,2.952)   63759315 byte
condwrite_ifneed_dict      10.032s     7.080s     3.738 (1.894,2.952)   63759315 byte
msgtxt_repr                 4.864s     1.912s     1.009 (1.894,2.952)   78917824 byte
fakelist_repr               4.903s     1.950s     1.030 (1.894,2.952)   86257856 byte  1.03
list_repr                   5.204s     2.252s     1.189 (1.894,2.952)   86257856 byte
uesc                        5.611s     2.658s     1.404 (1.894,2.952)   75173731 byte
pickle2                     9.186s     6.234s     3.291 (1.894,2.952)  128833185 byte
json_str                   12.212s     9.260s     4.889 (1.894,2.952)   84038028 byte
json_fh                     9.866s     6.914s     3.650 (1.894,2.952)   82989452 byte

2.7.13.final.0 // Linux64 // Deserialization
stream_msgs                 1.448s     0.609s     1.000 (1.448,0.838)   45004225 byte
dser_dummy                  5.216s     3.769s     2.603 (1.448,1.448)   45004225 byte
d_split_ast_eval           13.481s     8.264s     1.000 (13.481,5.216)   66897095 byte
d_split_eval               13.723s     5.459s     1.046 (5.216,8.264)   66897095 byte
d_lst_eval                 13.477s     5.212s     0.999 (5.216,8.264)   86246968 byte
d_lst_ast_eval             15.563s     7.298s     1.399 (5.216,8.264)   86246968 byte
d_split_repr_ast_e         11.164s     2.900s     1.000 (11.164,8.264)   78906936 byte
d_split_repr_eval          11.686s     3.421s     1.000 (11.686,8.264)   78906936 byte

2.7.13.final.0 // Linux64 // Serialization
gen lines                  15.888s
stream_utf8                 0.817s     0.000s     1.000 (0.817,0.817)   45012467 byte
stream_msgs                 1.409s     0.592s     1.000 (1.409,0.817)   45012467 byte
chain_replace               3.641s     2.232s     1.000 (3.641,1.409)   63760646 byte
enumerate_replace           4.327s     2.095s     1.487 (1.409,2.232)   63760646 byte
foreach_dict_replace        4.213s     1.981s     1.406 (1.409,2.232)   63760646 byte
foreach_idx_replace         4.638s     2.406s     1.707 (1.409,2.232)   63760646 byte
enumerate_replaceif         4.021s     1.789s     1.269 (1.409,2.232)   63760646 byte
replaceif_dict              x.xxxs     1.614s     1.145 (1.409,2.232)   63760646 byte
replaceif_dict_loc          3.820s     1.588s     1.127 (1.409,2.232)   63760646 byte  1.13
condwrite_always_dict       6.726s     4.494s     3.189 (1.409,2.232)   63760646 byte
condwrite_ifneed_list       6.541s     4.309s     3.058 (1.409,2.232)   63760646 byte
condwrite_ifneed_dict       6.999s     4.767s     3.382 (1.409,2.232)   63760646 byte
msgtxt_repr                 x.xxxs     1.448s     1.028 (1.409,2.232)   78921907 byte
fakelist_repr               x.xxxs     1.449s     1.028 (1.409,2.232)   86261939 byte  1.03
list_repr                   x.xxxs     1.658s     1.177 (1.409,2.232)   86261939 byte
uesc                        4.005s     1.773s     1.258 (1.409,2.232)   75177292 byte
pickle2                     6.674s     4.442s     3.152 (1.409,2.232)  128832607 byte
json_str                    5.591s     3.359s     2.383 (1.409,2.232)   84040832 byte
json_fh                     9.970s     7.738s     5.490 (1.409,2.232)   82992256 byte

3.5.3.final.0 // Linux64 // Deserialization of py3 data
stream_msgs                 1.290s     0.731s     1.000 (1.290,0.559)   45021263 byte
dser_dummy                  2.868s     1.578s     1.224 (1.290,1.290)   45021263 byte
d_split_ast_eval            8.752s     5.884s     1.000 (8.752,2.868)   66914436 byte
d_split_eval               10.635s     4.751s     1.657 (2.868,5.884)   66914436 byte
d_lst_eval                 13.332s     7.448s     2.597 (2.868,5.884)   72378292 byte
d_lst_ast_eval             12.996s     7.112s     2.480 (2.868,5.884)   72378292 byte
d_split_repr_ast_e          8.567s     2.683s     1.000 (8.567,5.884)   65038260 byte
d_split_repr_eval          10.520s     4.636s     1.000 (10.520,5.884)   65038260 byte

3.5.3.final.0 // Linux64 // Deserialization of py2 data
stream_msgs                 1.263s     0.717s     1.000 (1.263,0.547)   45004225 byte
dser_dummy                  2.849s     1.586s     1.255 (1.263,1.263)   45004225 byte
d_split_ast_eval            8.709s     5.860s     1.000 (8.709,2.849)   66897095 byte
d_split_eval               10.544s     4.684s     1.644 (2.849,5.860)   66897095 byte
d_lst_eval                 11.550s     5.690s     1.997 (2.849,5.860)   86246968 byte
d_lst_ast_eval             11.752s     5.892s     2.068 (2.849,5.860)   86246968 byte
d_split_repr_ast_e          7.190s     1.331s     1.000 (7.190,5.860)   78906936 byte
d_split_repr_eval           8.862s     3.002s     1.000 (8.862,5.860)   78906936 byte

3.5.3.final.0 // Linux64 // Serialization
gen lines                  25.908s
stream_utf8                 0.551s     0.000s     1.000 (0.551,0.551)   45012467 byte
stream_msgs                 1.298s     0.747s     1.000 (1.298,0.551)   45012467 byte
chain_replace               3.204s     1.906s     1.000 (3.204,1.298)   63760646 byte
enumerate_replace           3.727s     1.821s     1.403 (1.298,1.906)   63760646 byte
foreach_dict_replace        3.488s     1.582s     1.219 (1.298,1.906)   63760646 byte
foreach_idx_replace         4.098s     2.192s     1.689 (1.298,1.906)   63760646 byte
enumerate_replaceif         3.330s     1.424s     1.097 (1.298,1.906)   63760646 byte
replaceif_dict              x.xxxs     1.104s     0.864 (1.298,1.906)   63760646 byte
replaceif_dict_loc          x.xxxs     1.085s     0.849 (1.298,1.906)   63760646 byte  0.85
condwrite_always_dict       6.041s     4.136s     3.186 (1.298,1.906)   63760646 byte
condwrite_ifneed_list       5.556s     3.651s     2.813 (1.298,1.906)   63760646 byte
condwrite_ifneed_dict       5.783s     3.877s     2.987 (1.298,1.906)   63760646 byte
msgtxt_repr                 x.xxxs     0.710s     0.556 (1.298,1.906)   65030226 byte
fakelist_repr               x.xxxs     0.760s     0.595 (1.298,1.906)   65030226 byte  0.60
list_repr                   x.xxxs     1.051s     0.823 (1.298,1.906)   71321682 byte
uesc                        3.656s     1.751s     1.349 (1.298,1.906)   84220410 byte
pickle2                     4.623s     2.717s     2.093 (1.298,1.906)  138269939 byte
json_str                    5.036s     3.130s     2.411 (1.298,1.906)   84040832 byte
json_fh                     9.213s     7.307s     5.630 (1.298,1.906)   82992256 byte


# check which serializations are identical across python versions
{ { find -type f | while read fn; do [[ $(head -n 3 "$fn" | wc -c) -gt 300 ]] && { sha256sum "$fn"; continue; }; head -n 1 "$fn" | grep -qE '[^a-zA-Z][a-zA-Z]{8}[^a-zA-Z]' || { sha256sum "$fn"; continue; }; printf '%s %s\n' "$(sed -r 's/([^a-zA-Z])[a-zA-Z]{8}([^a-zA-Z])/\1\2/' < "$fn" | sha256sum)" "$fn"; done; sleep 1; echo; } | tee /dev/stderr; } | sort


## TEST
with open('/dev/shm/py2.repr', 'rb') as f: eval(f.read().decode('utf-8'))
with open('/dev/shm/py3.repr', 'rb') as f: eval(f.read().decode('utf-8'))
with open('/dev/shm/py2.repr', 'rb') as f: __import__('json').dumps(eval(f.read().decode('utf-8')))
with open('/dev/shm/py3.repr', 'rb') as f: __import__('json').dumps(eval(f.read().decode('utf-8')))

## RESULT
Python 2.7.13 (default, Nov 24 2017, 17:33:09) Linux
Python 2.6 (r26:66721, Oct  2 2008, 11:35:03) Windows
[5, u'eyFEfvUb', u'\u591agt\u6d41LD GlONE\'r/u\u5b87FZX\u3057A\\iz  iKhz  ep"pOzwvA \\ah']
[5, 'RHrVSKcB', '\xe5\xa4\x9agt\xe6\xb5\x81LD GlONE\'r/u\xe5\xae\x87FZX\xe3\x81\x97A\\iz  iKhz  ep"pOzwvA \\ah']
'[5, "eyFEfvUb", "\\u591agt\\u6d41LD GlONE\'r/u\\u5b87FZX\\u3057A\\\\iz  iKhz  ep\\"pOzwvA \\\\ah"]'
'[5, "RHrVSKcB", "\\u591agt\\u6d41LD GlONE\'r/u\\u5b87FZX\\u3057A\\\\iz  iKhz  ep\\"pOzwvA \\\\ah"]'

## RESULT
Python 3.5.3 (default, Jan 19 2017, 14:11:04) Linux
Python 3.6.2 (v3.6.2:5fd33b5, Jul  8 2017, 04:57:36) Windows
[5, 'eyFEfvUb', '多gt流LD GlONE\'r/u宇FZXしA\\iz  iKhz  ep"pOzwvA \\ah']
[5, 'RHrVSKcB', '多gt流LD GlONE\'r/u宇FZXしA\\iz  iKhz  ep"pOzwvA \\ah']
'[5, "eyFEfvUb", "\\u591agt\\u6d41LD GlONE\'r/u\\u5b87FZX\\u3057A\\\\iz  iKhz  ep\\"pOzwvA \\\\ah"]'
'[5, "RHrVSKcB", "\\u591agt\\u6d41LD GlONE\'r/u\\u5b87FZX\\u3057A\\\\iz  iKhz  ep\\"pOzwvA \\\\ah"]'


## TEST
with open('/dev/shm/py2.repr', 'rb') as f: v2=eval(f.read().decode('utf-8'))[2]
with open('/dev/shm/py3.repr', 'rb') as f: v3=eval(f.read().decode('utf-8'))[2]
if v2==v3: print('eval(py2repr) == eval(py3repr)')

## py2 FAIL, fix:
if isinstance(v3,str): v3=v3.decode('utf-8')

## py3 SUCCESS


## TEST
with open('/dev/shm/py2.repr', 'rb') as f: v2=__import__('ast').literal_eval(f.read().decode('utf-8'))[2]
with open('/dev/shm/py3.repr', 'rb') as f: v3=__import__('ast').literal_eval(f.read().decode('utf-8'))[2]
if v2==v3: print('eval(py2repr) == eval(py3repr)')

# same results as with native eval


currently,
only d_split_ast_eval and d_split_eval succeed with py2 on py3 data
all deserializations succeed with py3 on py2 data
"""