2010-03-08 22:17:58 +00:00
|
|
|
"""This script generates a Python codec module from a Windows Code Page.
|
|
|
|
|
|
|
|
It uses the function MultiByteToWideChar to generate a decoding table.
|
|
|
|
"""
|
|
|
|
|
|
|
|
import ctypes
|
|
|
|
from ctypes import wintypes
|
|
|
|
from gencodec import codegen
|
|
|
|
import unicodedata
|
|
|
|
|
|
|
|
def genwinmap(codepage):
|
|
|
|
MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar
|
|
|
|
MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD,
|
|
|
|
wintypes.LPCSTR, ctypes.c_int,
|
|
|
|
wintypes.LPWSTR, ctypes.c_int]
|
|
|
|
MultiByteToWideChar.restype = ctypes.c_int
|
|
|
|
|
|
|
|
enc2uni = {}
|
|
|
|
|
|
|
|
for i in list(range(32)) + [127]:
|
|
|
|
enc2uni[i] = (i, 'CONTROL CHARACTER')
|
|
|
|
|
|
|
|
for i in range(256):
|
|
|
|
buf = ctypes.create_unicode_buffer(2)
|
|
|
|
ret = MultiByteToWideChar(
|
|
|
|
codepage, 0,
|
|
|
|
bytes([i]), 1,
|
|
|
|
buf, 2)
|
|
|
|
assert ret == 1, "invalid code page"
|
|
|
|
assert buf[1] == '\x00'
|
|
|
|
try:
|
|
|
|
name = unicodedata.name(buf[0])
|
|
|
|
except ValueError:
|
|
|
|
try:
|
|
|
|
name = enc2uni[i][1]
|
|
|
|
except KeyError:
|
|
|
|
name = ''
|
|
|
|
|
|
|
|
enc2uni[i] = (ord(buf[0]), name)
|
|
|
|
|
|
|
|
return enc2uni
|
|
|
|
|
|
|
|
def genwincodec(codepage):
|
|
|
|
import platform
|
|
|
|
map = genwinmap(codepage)
|
|
|
|
encodingname = 'cp%d' % codepage
|
|
|
|
code = codegen("", map, encodingname)
|
|
|
|
# Replace first lines with our own docstring
|
|
|
|
code = '''\
|
|
|
|
"""Python Character Mapping Codec %s generated on Windows:
|
|
|
|
%s with the command:
|
|
|
|
python Tools/unicode/genwincodec.py %s
|
|
|
|
"""#"
|
|
|
|
''' % (encodingname, ' '.join(platform.win32_ver()), codepage
|
|
|
|
) + code.split('"""#"', 1)[1]
|
|
|
|
|
|
|
|
print(code)
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
import sys
|
|
|
|
genwincodec(int(sys.argv[1]))
|