diff --git a/Lib/test/test_marshal.py b/Lib/test/test_marshal.py new file mode 100644 index 00000000000..38b3cd4698c --- /dev/null +++ b/Lib/test/test_marshal.py @@ -0,0 +1,41 @@ +from test_support import TestFailed +import marshal +import sys + +# XXX Much more needed here. + +# Test the full range of Python ints. +n = sys.maxint +while n: + for expected in (-n, n): + s = marshal.dumps(expected) + got = marshal.loads(s) + if expected != got: + raise TestFailed("for int %d, marshal string is %r, loaded " + "back as %d" % (expected, s, got)) + n = n >> 1 + +# Simulate int marshaling on a 64-bit box. This is most interesting if +# we're running the test on a 32-bit box, of course. + +def to_little_endian_string(value, nbytes): + bytes = [] + for i in range(nbytes): + bytes.append(chr(value & 0xff)) + value >>= 8 + return ''.join(bytes) + +maxint64 = (1L << 63) - 1 +minint64 = -maxint64-1 + +for base in maxint64, minint64, -maxint64, -(minint64 >> 1): + while base: + s = 'I' + to_little_endian_string(base, 8) + got = marshal.loads(s) + if base != got: + raise TestFailed("for int %d, simulated marshal string is %r, " + "loaded back as %d" % (base, s, got)) + if base == -1: # a fixed-point for shifting right 1 + base = 0 + else: + base >>= 1 diff --git a/Misc/NEWS b/Misc/NEWS index bf8d1fff532..35737acc115 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -8,6 +8,11 @@ Core Library ++ pickle, cPickle and marshal on 32-bit platforms can now correctly read + dumps containing ints written on platforms where Python ints are 8 bytes. + When read on a box where Python ints are 4 bytes, such values are + converted to Python longs. + Tools Build diff --git a/Python/marshal.c b/Python/marshal.c index 5ef11ef2263..029f2b996df 100644 --- a/Python/marshal.c +++ b/Python/marshal.c @@ -342,23 +342,35 @@ r_long(RFILE *p) return x; } -static long +/* r_long64 deals with the TYPE_INT64 code. On a machine with + sizeof(long) > 4, it returns a Python int object, else a Python long + object. Note that w_long64 writes out TYPE_INT if 32 bits is enough, + so there's no inefficiency here in returning a PyLong on 32-bit boxes + for everything written via TYPE_INT64 (i.e., if an int is written via + TYPE_INT64, it *needs* more than 32 bits). +*/ +static PyObject * r_long64(RFILE *p) { - register long x; - x = r_long(p); + long lo4 = r_long(p); + long hi4 = r_long(p); #if SIZEOF_LONG > 4 - x = (x & 0xFFFFFFFFL) | (r_long(p) << 32); + long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL); + return PyInt_FromLong(x); #else - if (r_long(p) != 0) { - PyObject *f = PySys_GetObject("stderr"); - if (f != NULL) - (void) PyFile_WriteString( - "Warning: un-marshal 64-bit int in 32-bit mode\n", - f); + unsigned char buf[8]; + int one = 1; + int is_little_endian = (int)*(char*)&one; + if (is_little_endian) { + memcpy(buf, &lo4, 4); + memcpy(buf+4, &hi4, 4); } + else { + memcpy(buf, &hi4, 4); + memcpy(buf+4, &lo4, 4); + } + return _PyLong_FromByteArray(buf, 8, is_little_endian, 1); #endif - return x; } static PyObject * @@ -394,7 +406,7 @@ r_object(RFILE *p) return PyInt_FromLong(r_long(p)); case TYPE_INT64: - return PyInt_FromLong(r_long64(p)); + return r_long64(p); case TYPE_LONG: {