Get rid of the superstitious "~" in dict hashing's "i = (~hash) & mask".
The comment following used to say:
/* We use ~hash instead of hash, as degenerate hash functions, such
as for ints <sigh>, can have lots of leading zeros. It's not
really a performance risk, but better safe than sorry.
12-Dec-00 tim: so ~hash produces lots of leading ones instead --
what's the gain? */
That is, there was never a good reason for doing it. And to the contrary,
as explained on Python-Dev last December, it tended to make the *sum*
(i + incr) & mask (which is the first table index examined in case of
collison) the same "too often" across distinct hashes.
Changing to the simpler "i = hash & mask" reduced the number of string-dict
collisions (== # number of times we go around the lookup for-loop) from about
6 million to 5 million during a full run of the test suite (these are
approximate because the test suite does some random stuff from run to run).
The number of collisions in non-string dicts also decreased, but not as
dramatically.
Note that this may, for a given dict, change the order (wrt previous
releases) of entries exposed by .keys(), .values() and .items(). A number
of std tests suffered bogus failures as a result. For dicts keyed by
small ints, or (less so) by characters, the order is much more likely to be
in increasing order of key now; e.g.,
>>> d = {}
>>> for i in range(10):
... d[i] = i
...
>>> d
{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9}
>>>
Unfortunately. people may latch on to that in small examples and draw a
bogus conclusion.
test_support.py
Moved test_extcall's sortdict() into test_support, made it stronger,
and imported sortdict into other std tests that needed it.
test_unicode.py
Excluced cp875 from the "roundtrip over range(128)" test, because
cp875 doesn't have a well-defined inverse for unicode("?", "cp875").
See Python-Dev for excruciating details.
Cookie.py
Chaged various output functions to sort dicts before building
strings from them.
test_extcall
Fiddled the expected-result file. This remains sensitive to native
dict ordering, because, e.g., if there are multiple errors in a
keyword-arg dict (and test_extcall sets up many cases like that), the
specific error Python complains about first depends on native dict
ordering.
2001-05-13 00:19:31 +00:00
|
|
|
from test_support import verify, verbose, TestFailed, sortdict
|
2000-03-28 23:51:17 +00:00
|
|
|
from UserList import UserList
|
|
|
|
|
|
|
|
def f(*a, **k):
|
2001-01-21 18:52:02 +00:00
|
|
|
print a, sortdict(k)
|
2000-03-28 23:51:17 +00:00
|
|
|
|
|
|
|
def g(x, *y, **z):
|
2001-01-21 18:52:02 +00:00
|
|
|
print x, y, sortdict(z)
|
2000-03-28 23:51:17 +00:00
|
|
|
|
|
|
|
def h(j=1, a=2, h=3):
|
|
|
|
print j, a, h
|
|
|
|
|
|
|
|
f()
|
|
|
|
f(1)
|
|
|
|
f(1, 2)
|
|
|
|
f(1, 2, 3)
|
|
|
|
|
|
|
|
f(1, 2, 3, *(4, 5))
|
|
|
|
f(1, 2, 3, *[4, 5])
|
2000-03-28 23:53:22 +00:00
|
|
|
f(1, 2, 3, *UserList([4, 5]))
|
2000-03-28 23:51:17 +00:00
|
|
|
f(1, 2, 3, **{'a':4, 'b':5})
|
|
|
|
f(1, 2, 3, *(4, 5), **{'a':6, 'b':7})
|
|
|
|
f(1, 2, 3, x=4, y=5, *(6, 7), **{'a':8, 'b':9})
|
|
|
|
|
|
|
|
try:
|
|
|
|
g()
|
|
|
|
except TypeError, err:
|
|
|
|
print "TypeError:", err
|
|
|
|
else:
|
|
|
|
print "should raise TypeError: not enough arguments; expected 1, got 0"
|
2000-10-23 17:22:08 +00:00
|
|
|
|
2000-03-28 23:51:17 +00:00
|
|
|
try:
|
|
|
|
g(*())
|
|
|
|
except TypeError, err:
|
|
|
|
print "TypeError:", err
|
|
|
|
else:
|
|
|
|
print "should raise TypeError: not enough arguments; expected 1, got 0"
|
2000-10-23 17:22:08 +00:00
|
|
|
|
2000-03-28 23:51:17 +00:00
|
|
|
try:
|
|
|
|
g(*(), **{})
|
|
|
|
except TypeError, err:
|
|
|
|
print "TypeError:", err
|
|
|
|
else:
|
|
|
|
print "should raise TypeError: not enough arguments; expected 1, got 0"
|
2000-10-23 17:22:08 +00:00
|
|
|
|
2000-03-28 23:51:17 +00:00
|
|
|
g(1)
|
|
|
|
g(1, 2)
|
|
|
|
g(1, 2, 3)
|
|
|
|
g(1, 2, 3, *(4, 5))
|
2000-03-30 23:55:31 +00:00
|
|
|
class Nothing: pass
|
|
|
|
try:
|
|
|
|
g(*Nothing())
|
2001-05-05 03:56:37 +00:00
|
|
|
except TypeError, attr:
|
2000-04-10 13:37:14 +00:00
|
|
|
pass
|
2000-03-30 23:55:31 +00:00
|
|
|
else:
|
2001-05-05 03:56:37 +00:00
|
|
|
print "should raise TypeError"
|
2000-03-30 23:55:31 +00:00
|
|
|
|
|
|
|
class Nothing:
|
|
|
|
def __len__(self):
|
|
|
|
return 5
|
|
|
|
try:
|
|
|
|
g(*Nothing())
|
2001-05-05 03:56:37 +00:00
|
|
|
except TypeError, attr:
|
2000-04-10 13:37:14 +00:00
|
|
|
pass
|
2000-03-30 23:55:31 +00:00
|
|
|
else:
|
2001-05-05 03:56:37 +00:00
|
|
|
print "should raise TypeError"
|
2000-10-23 17:22:08 +00:00
|
|
|
|
2000-03-30 23:55:31 +00:00
|
|
|
class Nothing:
|
|
|
|
def __len__(self):
|
|
|
|
return 5
|
|
|
|
def __getitem__(self, i):
|
|
|
|
if i < 3:
|
|
|
|
return i
|
|
|
|
else:
|
|
|
|
raise IndexError, i
|
|
|
|
g(*Nothing())
|
|
|
|
|
|
|
|
# make sure the function call doesn't stomp on the dictionary?
|
|
|
|
d = {'a': 1, 'b': 2, 'c': 3}
|
|
|
|
d2 = d.copy()
|
2001-01-17 19:11:13 +00:00
|
|
|
verify(d == d2)
|
2000-03-30 23:55:31 +00:00
|
|
|
g(1, d=4, **d)
|
2001-01-21 18:52:02 +00:00
|
|
|
print sortdict(d)
|
|
|
|
print sortdict(d2)
|
2001-01-17 19:11:13 +00:00
|
|
|
verify(d == d2, "function call modified dictionary")
|
2000-03-30 23:55:31 +00:00
|
|
|
|
|
|
|
# what about willful misconduct?
|
|
|
|
def saboteur(**kw):
|
2000-07-15 00:42:09 +00:00
|
|
|
kw['x'] = locals() # yields a cyclic kw
|
|
|
|
return kw
|
2000-03-30 23:55:31 +00:00
|
|
|
d = {}
|
2000-07-15 00:42:09 +00:00
|
|
|
kw = saboteur(a=1, **d)
|
2001-01-17 19:11:13 +00:00
|
|
|
verify(d == {})
|
2000-07-15 00:42:09 +00:00
|
|
|
# break the cycle
|
|
|
|
del kw['x']
|
2000-10-23 17:22:08 +00:00
|
|
|
|
2000-03-28 23:51:17 +00:00
|
|
|
try:
|
|
|
|
g(1, 2, 3, **{'x':4, 'y':5})
|
|
|
|
except TypeError, err:
|
|
|
|
print err
|
|
|
|
else:
|
|
|
|
print "should raise TypeError: keyword parameter redefined"
|
2000-10-23 17:22:08 +00:00
|
|
|
|
2000-03-28 23:51:17 +00:00
|
|
|
try:
|
|
|
|
g(1, 2, 3, a=4, b=5, *(6, 7), **{'a':8, 'b':9})
|
|
|
|
except TypeError, err:
|
|
|
|
print err
|
|
|
|
else:
|
|
|
|
print "should raise TypeError: keyword parameter redefined"
|
|
|
|
|
|
|
|
try:
|
|
|
|
f(**{1:2})
|
|
|
|
except TypeError, err:
|
|
|
|
print err
|
|
|
|
else:
|
|
|
|
print "should raise TypeError: keywords must be strings"
|
|
|
|
|
|
|
|
try:
|
|
|
|
h(**{'e': 2})
|
|
|
|
except TypeError, err:
|
|
|
|
print err
|
|
|
|
else:
|
|
|
|
print "should raise TypeError: unexpected keyword argument: e"
|
|
|
|
|
|
|
|
try:
|
|
|
|
h(*h)
|
|
|
|
except TypeError, err:
|
|
|
|
print err
|
|
|
|
else:
|
|
|
|
print "should raise TypeError: * argument must be a tuple"
|
|
|
|
|
2001-04-11 13:53:35 +00:00
|
|
|
try:
|
|
|
|
dir(*h)
|
|
|
|
except TypeError, err:
|
|
|
|
print err
|
|
|
|
else:
|
|
|
|
print "should raise TypeError: * argument must be a tuple"
|
|
|
|
|
|
|
|
try:
|
|
|
|
None(*h)
|
|
|
|
except TypeError, err:
|
|
|
|
print err
|
|
|
|
else:
|
|
|
|
print "should raise TypeError: * argument must be a tuple"
|
|
|
|
|
2000-03-28 23:51:17 +00:00
|
|
|
try:
|
|
|
|
h(**h)
|
|
|
|
except TypeError, err:
|
|
|
|
print err
|
|
|
|
else:
|
|
|
|
print "should raise TypeError: ** argument must be a dictionary"
|
|
|
|
|
2001-04-11 13:53:35 +00:00
|
|
|
try:
|
|
|
|
dir(**h)
|
|
|
|
except TypeError, err:
|
|
|
|
print err
|
|
|
|
else:
|
|
|
|
print "should raise TypeError: ** argument must be a dictionary"
|
|
|
|
|
|
|
|
try:
|
|
|
|
None(**h)
|
|
|
|
except TypeError, err:
|
|
|
|
print err
|
|
|
|
else:
|
|
|
|
print "should raise TypeError: ** argument must be a dictionary"
|
|
|
|
|
|
|
|
try:
|
|
|
|
dir(b=1,**{'b':1})
|
|
|
|
except TypeError, err:
|
|
|
|
print err
|
|
|
|
else:
|
|
|
|
print "should raise TypeError: dir() got multiple values for keyword argument 'b'"
|
|
|
|
|
2000-03-28 23:51:17 +00:00
|
|
|
def f2(*a, **b):
|
|
|
|
return a, b
|
|
|
|
|
|
|
|
d = {}
|
|
|
|
for i in range(512):
|
|
|
|
key = 'k%d' % i
|
|
|
|
d[key] = i
|
|
|
|
a, b = f2(1, *(2, 3), **d)
|
|
|
|
print len(a), len(b), b == d
|
2000-10-30 17:15:20 +00:00
|
|
|
|
|
|
|
class Foo:
|
|
|
|
def method(self, arg1, arg2):
|
|
|
|
return arg1 + arg2
|
|
|
|
|
|
|
|
x = Foo()
|
|
|
|
print Foo.method(*(x, 1, 2))
|
|
|
|
print Foo.method(x, *(1, 2))
|
|
|
|
try:
|
|
|
|
print Foo.method(*(1, 2, 3))
|
|
|
|
except TypeError, err:
|
2001-08-24 19:11:57 +00:00
|
|
|
pass
|
|
|
|
else:
|
|
|
|
print 'expected a TypeError for unbound method call'
|
2000-10-30 17:15:20 +00:00
|
|
|
try:
|
|
|
|
print Foo.method(1, *(2, 3))
|
|
|
|
except TypeError, err:
|
2001-08-24 19:11:57 +00:00
|
|
|
pass
|
|
|
|
else:
|
|
|
|
print 'expected a TypeError for unbound method call'
|
2000-10-30 17:15:20 +00:00
|
|
|
|
2001-01-04 22:33:02 +00:00
|
|
|
# A PyCFunction that takes only positional parameters should allow an
|
|
|
|
# empty keyword dictionary to pass without a complaint, but raise a
|
|
|
|
# TypeError if the dictionary is non-empty.
|
|
|
|
id(1, **{})
|
|
|
|
try:
|
|
|
|
id(1, **{"foo": 1})
|
|
|
|
except TypeError:
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
raise TestFailed, 'expected TypeError; no exception raised'
|
2001-01-15 22:14:16 +00:00
|
|
|
|
|
|
|
a, b, d, e, v, k = 'A', 'B', 'D', 'E', 'V', 'K'
|
|
|
|
funcs = []
|
|
|
|
maxargs = {}
|
|
|
|
for args in ['', 'a', 'ab']:
|
|
|
|
for defargs in ['', 'd', 'de']:
|
|
|
|
for vararg in ['', 'v']:
|
|
|
|
for kwarg in ['', 'k']:
|
|
|
|
name = 'z' + args + defargs + vararg + kwarg
|
|
|
|
arglist = list(args) + map(
|
|
|
|
lambda x: '%s="%s"' % (x, x), defargs)
|
|
|
|
if vararg: arglist.append('*' + vararg)
|
|
|
|
if kwarg: arglist.append('**' + kwarg)
|
Get rid of the superstitious "~" in dict hashing's "i = (~hash) & mask".
The comment following used to say:
/* We use ~hash instead of hash, as degenerate hash functions, such
as for ints <sigh>, can have lots of leading zeros. It's not
really a performance risk, but better safe than sorry.
12-Dec-00 tim: so ~hash produces lots of leading ones instead --
what's the gain? */
That is, there was never a good reason for doing it. And to the contrary,
as explained on Python-Dev last December, it tended to make the *sum*
(i + incr) & mask (which is the first table index examined in case of
collison) the same "too often" across distinct hashes.
Changing to the simpler "i = hash & mask" reduced the number of string-dict
collisions (== # number of times we go around the lookup for-loop) from about
6 million to 5 million during a full run of the test suite (these are
approximate because the test suite does some random stuff from run to run).
The number of collisions in non-string dicts also decreased, but not as
dramatically.
Note that this may, for a given dict, change the order (wrt previous
releases) of entries exposed by .keys(), .values() and .items(). A number
of std tests suffered bogus failures as a result. For dicts keyed by
small ints, or (less so) by characters, the order is much more likely to be
in increasing order of key now; e.g.,
>>> d = {}
>>> for i in range(10):
... d[i] = i
...
>>> d
{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9}
>>>
Unfortunately. people may latch on to that in small examples and draw a
bogus conclusion.
test_support.py
Moved test_extcall's sortdict() into test_support, made it stronger,
and imported sortdict into other std tests that needed it.
test_unicode.py
Excluced cp875 from the "roundtrip over range(128)" test, because
cp875 doesn't have a well-defined inverse for unicode("?", "cp875").
See Python-Dev for excruciating details.
Cookie.py
Chaged various output functions to sort dicts before building
strings from them.
test_extcall
Fiddled the expected-result file. This remains sensitive to native
dict ordering, because, e.g., if there are multiple errors in a
keyword-arg dict (and test_extcall sets up many cases like that), the
specific error Python complains about first depends on native dict
ordering.
2001-05-13 00:19:31 +00:00
|
|
|
decl = (('def %s(%s): print "ok %s", a, b, d, e, v, ' +
|
|
|
|
'type(k) is type ("") and k or sortdict(k)')
|
|
|
|
% (name, ', '.join(arglist), name))
|
2001-01-15 22:14:16 +00:00
|
|
|
exec(decl)
|
|
|
|
func = eval(name)
|
|
|
|
funcs.append(func)
|
|
|
|
maxargs[func] = len(args + defargs)
|
|
|
|
|
|
|
|
for name in ['za', 'zade', 'zabk', 'zabdv', 'zabdevk']:
|
|
|
|
func = eval(name)
|
|
|
|
for args in [(), (1, 2), (1, 2, 3, 4, 5)]:
|
|
|
|
for kwargs in ['', 'a', 'd', 'ad', 'abde']:
|
|
|
|
kwdict = {}
|
|
|
|
for k in kwargs: kwdict[k] = k + k
|
2001-01-21 18:52:02 +00:00
|
|
|
print func.func_name, args, sortdict(kwdict), '->',
|
2001-01-15 22:14:16 +00:00
|
|
|
try: apply(func, args, kwdict)
|
|
|
|
except TypeError, err: print err
|