mirror of https://github.com/python/cpython.git
Removed the decoding_map from the codecs where this is possible.
Replaced the tis_620, cp1140 and koi8_u codecs with new ones based on custom mapping files.
This commit is contained in:
parent
921fa8595e
commit
3c72ded23d
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,11 +1,8 @@
|
|||
""" Python Character Mapping Codec for cp1140
|
||||
""" Python Character Mapping Codec generated from 'python-mappings/CP1140.TXT' with gencodec.py.
|
||||
|
||||
Written by Brian Quinlan(brian@sweetapp.com). NO WARRANTY.
|
||||
"""
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
import copy
|
||||
import cp037
|
||||
|
||||
### Codec APIs
|
||||
|
||||
|
@ -17,8 +14,8 @@ def encode(self,input,errors='strict'):
|
|||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
|
@ -31,14 +28,525 @@ def getregentry():
|
|||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = copy.copy(cp037.decoding_map)
|
||||
### Decoding Table
|
||||
|
||||
decoding_map.update({
|
||||
0x009f: 0x20ac # EURO SIGN
|
||||
})
|
||||
decoding_table = (
|
||||
u'\x00' # 0x00 -> NULL
|
||||
u'\x01' # 0x01 -> START OF HEADING
|
||||
u'\x02' # 0x02 -> START OF TEXT
|
||||
u'\x03' # 0x03 -> END OF TEXT
|
||||
u'\x9c' # 0x04 -> CONTROL
|
||||
u'\t' # 0x05 -> HORIZONTAL TABULATION
|
||||
u'\x86' # 0x06 -> CONTROL
|
||||
u'\x7f' # 0x07 -> DELETE
|
||||
u'\x97' # 0x08 -> CONTROL
|
||||
u'\x8d' # 0x09 -> CONTROL
|
||||
u'\x8e' # 0x0a -> CONTROL
|
||||
u'\x0b' # 0x0b -> VERTICAL TABULATION
|
||||
u'\x0c' # 0x0c -> FORM FEED
|
||||
u'\r' # 0x0d -> CARRIAGE RETURN
|
||||
u'\x0e' # 0x0e -> SHIFT OUT
|
||||
u'\x0f' # 0x0f -> SHIFT IN
|
||||
u'\x10' # 0x10 -> DATA LINK ESCAPE
|
||||
u'\x11' # 0x11 -> DEVICE CONTROL ONE
|
||||
u'\x12' # 0x12 -> DEVICE CONTROL TWO
|
||||
u'\x13' # 0x13 -> DEVICE CONTROL THREE
|
||||
u'\x9d' # 0x14 -> CONTROL
|
||||
u'\x85' # 0x15 -> CONTROL
|
||||
u'\x08' # 0x16 -> BACKSPACE
|
||||
u'\x87' # 0x17 -> CONTROL
|
||||
u'\x18' # 0x18 -> CANCEL
|
||||
u'\x19' # 0x19 -> END OF MEDIUM
|
||||
u'\x92' # 0x1a -> CONTROL
|
||||
u'\x8f' # 0x1b -> CONTROL
|
||||
u'\x1c' # 0x1c -> FILE SEPARATOR
|
||||
u'\x1d' # 0x1d -> GROUP SEPARATOR
|
||||
u'\x1e' # 0x1e -> RECORD SEPARATOR
|
||||
u'\x1f' # 0x1f -> UNIT SEPARATOR
|
||||
u'\x80' # 0x20 -> CONTROL
|
||||
u'\x81' # 0x21 -> CONTROL
|
||||
u'\x82' # 0x22 -> CONTROL
|
||||
u'\x83' # 0x23 -> CONTROL
|
||||
u'\x84' # 0x24 -> CONTROL
|
||||
u'\n' # 0x25 -> LINE FEED
|
||||
u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK
|
||||
u'\x1b' # 0x27 -> ESCAPE
|
||||
u'\x88' # 0x28 -> CONTROL
|
||||
u'\x89' # 0x29 -> CONTROL
|
||||
u'\x8a' # 0x2a -> CONTROL
|
||||
u'\x8b' # 0x2b -> CONTROL
|
||||
u'\x8c' # 0x2c -> CONTROL
|
||||
u'\x05' # 0x2d -> ENQUIRY
|
||||
u'\x06' # 0x2e -> ACKNOWLEDGE
|
||||
u'\x07' # 0x2f -> BELL
|
||||
u'\x90' # 0x30 -> CONTROL
|
||||
u'\x91' # 0x31 -> CONTROL
|
||||
u'\x16' # 0x32 -> SYNCHRONOUS IDLE
|
||||
u'\x93' # 0x33 -> CONTROL
|
||||
u'\x94' # 0x34 -> CONTROL
|
||||
u'\x95' # 0x35 -> CONTROL
|
||||
u'\x96' # 0x36 -> CONTROL
|
||||
u'\x04' # 0x37 -> END OF TRANSMISSION
|
||||
u'\x98' # 0x38 -> CONTROL
|
||||
u'\x99' # 0x39 -> CONTROL
|
||||
u'\x9a' # 0x3a -> CONTROL
|
||||
u'\x9b' # 0x3b -> CONTROL
|
||||
u'\x14' # 0x3c -> DEVICE CONTROL FOUR
|
||||
u'\x15' # 0x3d -> NEGATIVE ACKNOWLEDGE
|
||||
u'\x9e' # 0x3e -> CONTROL
|
||||
u'\x1a' # 0x3f -> SUBSTITUTE
|
||||
u' ' # 0x40 -> SPACE
|
||||
u'\xa0' # 0x41 -> NO-BREAK SPACE
|
||||
u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
|
||||
u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE
|
||||
u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE
|
||||
u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE
|
||||
u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
|
||||
u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA
|
||||
u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE
|
||||
u'\xa2' # 0x4a -> CENT SIGN
|
||||
u'.' # 0x4b -> FULL STOP
|
||||
u'<' # 0x4c -> LESS-THAN SIGN
|
||||
u'(' # 0x4d -> LEFT PARENTHESIS
|
||||
u'+' # 0x4e -> PLUS SIGN
|
||||
u'|' # 0x4f -> VERTICAL LINE
|
||||
u'&' # 0x50 -> AMPERSAND
|
||||
u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE
|
||||
u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
|
||||
u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE
|
||||
u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE
|
||||
u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
|
||||
u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE
|
||||
u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
|
||||
u'!' # 0x5a -> EXCLAMATION MARK
|
||||
u'$' # 0x5b -> DOLLAR SIGN
|
||||
u'*' # 0x5c -> ASTERISK
|
||||
u')' # 0x5d -> RIGHT PARENTHESIS
|
||||
u';' # 0x5e -> SEMICOLON
|
||||
u'\xac' # 0x5f -> NOT SIGN
|
||||
u'-' # 0x60 -> HYPHEN-MINUS
|
||||
u'/' # 0x61 -> SOLIDUS
|
||||
u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
|
||||
u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
|
||||
u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE
|
||||
u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE
|
||||
u'\xa6' # 0x6a -> BROKEN BAR
|
||||
u',' # 0x6b -> COMMA
|
||||
u'%' # 0x6c -> PERCENT SIGN
|
||||
u'_' # 0x6d -> LOW LINE
|
||||
u'>' # 0x6e -> GREATER-THAN SIGN
|
||||
u'?' # 0x6f -> QUESTION MARK
|
||||
u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE
|
||||
u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
|
||||
u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
|
||||
u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
|
||||
u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
|
||||
u'`' # 0x79 -> GRAVE ACCENT
|
||||
u':' # 0x7a -> COLON
|
||||
u'#' # 0x7b -> NUMBER SIGN
|
||||
u'@' # 0x7c -> COMMERCIAL AT
|
||||
u"'" # 0x7d -> APOSTROPHE
|
||||
u'=' # 0x7e -> EQUALS SIGN
|
||||
u'"' # 0x7f -> QUOTATION MARK
|
||||
u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE
|
||||
u'a' # 0x81 -> LATIN SMALL LETTER A
|
||||
u'b' # 0x82 -> LATIN SMALL LETTER B
|
||||
u'c' # 0x83 -> LATIN SMALL LETTER C
|
||||
u'd' # 0x84 -> LATIN SMALL LETTER D
|
||||
u'e' # 0x85 -> LATIN SMALL LETTER E
|
||||
u'f' # 0x86 -> LATIN SMALL LETTER F
|
||||
u'g' # 0x87 -> LATIN SMALL LETTER G
|
||||
u'h' # 0x88 -> LATIN SMALL LETTER H
|
||||
u'i' # 0x89 -> LATIN SMALL LETTER I
|
||||
u'\xab' # 0x8a -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
u'\xbb' # 0x8b -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
u'\xf0' # 0x8c -> LATIN SMALL LETTER ETH (ICELANDIC)
|
||||
u'\xfd' # 0x8d -> LATIN SMALL LETTER Y WITH ACUTE
|
||||
u'\xfe' # 0x8e -> LATIN SMALL LETTER THORN (ICELANDIC)
|
||||
u'\xb1' # 0x8f -> PLUS-MINUS SIGN
|
||||
u'\xb0' # 0x90 -> DEGREE SIGN
|
||||
u'j' # 0x91 -> LATIN SMALL LETTER J
|
||||
u'k' # 0x92 -> LATIN SMALL LETTER K
|
||||
u'l' # 0x93 -> LATIN SMALL LETTER L
|
||||
u'm' # 0x94 -> LATIN SMALL LETTER M
|
||||
u'n' # 0x95 -> LATIN SMALL LETTER N
|
||||
u'o' # 0x96 -> LATIN SMALL LETTER O
|
||||
u'p' # 0x97 -> LATIN SMALL LETTER P
|
||||
u'q' # 0x98 -> LATIN SMALL LETTER Q
|
||||
u'r' # 0x99 -> LATIN SMALL LETTER R
|
||||
u'\xaa' # 0x9a -> FEMININE ORDINAL INDICATOR
|
||||
u'\xba' # 0x9b -> MASCULINE ORDINAL INDICATOR
|
||||
u'\xe6' # 0x9c -> LATIN SMALL LIGATURE AE
|
||||
u'\xb8' # 0x9d -> CEDILLA
|
||||
u'\xc6' # 0x9e -> LATIN CAPITAL LIGATURE AE
|
||||
u'\u20ac' # 0x9f -> EURO SIGN
|
||||
u'\xb5' # 0xa0 -> MICRO SIGN
|
||||
u'~' # 0xa1 -> TILDE
|
||||
u's' # 0xa2 -> LATIN SMALL LETTER S
|
||||
u't' # 0xa3 -> LATIN SMALL LETTER T
|
||||
u'u' # 0xa4 -> LATIN SMALL LETTER U
|
||||
u'v' # 0xa5 -> LATIN SMALL LETTER V
|
||||
u'w' # 0xa6 -> LATIN SMALL LETTER W
|
||||
u'x' # 0xa7 -> LATIN SMALL LETTER X
|
||||
u'y' # 0xa8 -> LATIN SMALL LETTER Y
|
||||
u'z' # 0xa9 -> LATIN SMALL LETTER Z
|
||||
u'\xa1' # 0xaa -> INVERTED EXCLAMATION MARK
|
||||
u'\xbf' # 0xab -> INVERTED QUESTION MARK
|
||||
u'\xd0' # 0xac -> LATIN CAPITAL LETTER ETH (ICELANDIC)
|
||||
u'\xdd' # 0xad -> LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
u'\xde' # 0xae -> LATIN CAPITAL LETTER THORN (ICELANDIC)
|
||||
u'\xae' # 0xaf -> REGISTERED SIGN
|
||||
u'^' # 0xb0 -> CIRCUMFLEX ACCENT
|
||||
u'\xa3' # 0xb1 -> POUND SIGN
|
||||
u'\xa5' # 0xb2 -> YEN SIGN
|
||||
u'\xb7' # 0xb3 -> MIDDLE DOT
|
||||
u'\xa9' # 0xb4 -> COPYRIGHT SIGN
|
||||
u'\xa7' # 0xb5 -> SECTION SIGN
|
||||
u'\xb6' # 0xb6 -> PILCROW SIGN
|
||||
u'\xbc' # 0xb7 -> VULGAR FRACTION ONE QUARTER
|
||||
u'\xbd' # 0xb8 -> VULGAR FRACTION ONE HALF
|
||||
u'\xbe' # 0xb9 -> VULGAR FRACTION THREE QUARTERS
|
||||
u'[' # 0xba -> LEFT SQUARE BRACKET
|
||||
u']' # 0xbb -> RIGHT SQUARE BRACKET
|
||||
u'\xaf' # 0xbc -> MACRON
|
||||
u'\xa8' # 0xbd -> DIAERESIS
|
||||
u'\xb4' # 0xbe -> ACUTE ACCENT
|
||||
u'\xd7' # 0xbf -> MULTIPLICATION SIGN
|
||||
u'{' # 0xc0 -> LEFT CURLY BRACKET
|
||||
u'A' # 0xc1 -> LATIN CAPITAL LETTER A
|
||||
u'B' # 0xc2 -> LATIN CAPITAL LETTER B
|
||||
u'C' # 0xc3 -> LATIN CAPITAL LETTER C
|
||||
u'D' # 0xc4 -> LATIN CAPITAL LETTER D
|
||||
u'E' # 0xc5 -> LATIN CAPITAL LETTER E
|
||||
u'F' # 0xc6 -> LATIN CAPITAL LETTER F
|
||||
u'G' # 0xc7 -> LATIN CAPITAL LETTER G
|
||||
u'H' # 0xc8 -> LATIN CAPITAL LETTER H
|
||||
u'I' # 0xc9 -> LATIN CAPITAL LETTER I
|
||||
u'\xad' # 0xca -> SOFT HYPHEN
|
||||
u'\xf4' # 0xcb -> LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
u'\xf6' # 0xcc -> LATIN SMALL LETTER O WITH DIAERESIS
|
||||
u'\xf2' # 0xcd -> LATIN SMALL LETTER O WITH GRAVE
|
||||
u'\xf3' # 0xce -> LATIN SMALL LETTER O WITH ACUTE
|
||||
u'\xf5' # 0xcf -> LATIN SMALL LETTER O WITH TILDE
|
||||
u'}' # 0xd0 -> RIGHT CURLY BRACKET
|
||||
u'J' # 0xd1 -> LATIN CAPITAL LETTER J
|
||||
u'K' # 0xd2 -> LATIN CAPITAL LETTER K
|
||||
u'L' # 0xd3 -> LATIN CAPITAL LETTER L
|
||||
u'M' # 0xd4 -> LATIN CAPITAL LETTER M
|
||||
u'N' # 0xd5 -> LATIN CAPITAL LETTER N
|
||||
u'O' # 0xd6 -> LATIN CAPITAL LETTER O
|
||||
u'P' # 0xd7 -> LATIN CAPITAL LETTER P
|
||||
u'Q' # 0xd8 -> LATIN CAPITAL LETTER Q
|
||||
u'R' # 0xd9 -> LATIN CAPITAL LETTER R
|
||||
u'\xb9' # 0xda -> SUPERSCRIPT ONE
|
||||
u'\xfb' # 0xdb -> LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
u'\xfc' # 0xdc -> LATIN SMALL LETTER U WITH DIAERESIS
|
||||
u'\xf9' # 0xdd -> LATIN SMALL LETTER U WITH GRAVE
|
||||
u'\xfa' # 0xde -> LATIN SMALL LETTER U WITH ACUTE
|
||||
u'\xff' # 0xdf -> LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
u'\\' # 0xe0 -> REVERSE SOLIDUS
|
||||
u'\xf7' # 0xe1 -> DIVISION SIGN
|
||||
u'S' # 0xe2 -> LATIN CAPITAL LETTER S
|
||||
u'T' # 0xe3 -> LATIN CAPITAL LETTER T
|
||||
u'U' # 0xe4 -> LATIN CAPITAL LETTER U
|
||||
u'V' # 0xe5 -> LATIN CAPITAL LETTER V
|
||||
u'W' # 0xe6 -> LATIN CAPITAL LETTER W
|
||||
u'X' # 0xe7 -> LATIN CAPITAL LETTER X
|
||||
u'Y' # 0xe8 -> LATIN CAPITAL LETTER Y
|
||||
u'Z' # 0xe9 -> LATIN CAPITAL LETTER Z
|
||||
u'\xb2' # 0xea -> SUPERSCRIPT TWO
|
||||
u'\xd4' # 0xeb -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
u'\xd6' # 0xec -> LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
u'\xd2' # 0xed -> LATIN CAPITAL LETTER O WITH GRAVE
|
||||
u'\xd3' # 0xee -> LATIN CAPITAL LETTER O WITH ACUTE
|
||||
u'\xd5' # 0xef -> LATIN CAPITAL LETTER O WITH TILDE
|
||||
u'0' # 0xf0 -> DIGIT ZERO
|
||||
u'1' # 0xf1 -> DIGIT ONE
|
||||
u'2' # 0xf2 -> DIGIT TWO
|
||||
u'3' # 0xf3 -> DIGIT THREE
|
||||
u'4' # 0xf4 -> DIGIT FOUR
|
||||
u'5' # 0xf5 -> DIGIT FIVE
|
||||
u'6' # 0xf6 -> DIGIT SIX
|
||||
u'7' # 0xf7 -> DIGIT SEVEN
|
||||
u'8' # 0xf8 -> DIGIT EIGHT
|
||||
u'9' # 0xf9 -> DIGIT NINE
|
||||
u'\xb3' # 0xfa -> SUPERSCRIPT THREE
|
||||
u'\xdb' # 0xfb -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
u'\xdc' # 0xfc -> LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
u'\xd9' # 0xfd -> LATIN CAPITAL LETTER U WITH GRAVE
|
||||
u'\xda' # 0xfe -> LATIN CAPITAL LETTER U WITH ACUTE
|
||||
u'\x9f' # 0xff -> CONTROL
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = codecs.make_encoding_map(decoding_map)
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x37, # END OF TRANSMISSION
|
||||
0x0005: 0x2d, # ENQUIRY
|
||||
0x0006: 0x2e, # ACKNOWLEDGE
|
||||
0x0007: 0x2f, # BELL
|
||||
0x0008: 0x16, # BACKSPACE
|
||||
0x0009: 0x05, # HORIZONTAL TABULATION
|
||||
0x000a: 0x25, # LINE FEED
|
||||
0x000b: 0x0b, # VERTICAL TABULATION
|
||||
0x000c: 0x0c, # FORM FEED
|
||||
0x000d: 0x0d, # CARRIAGE RETURN
|
||||
0x000e: 0x0e, # SHIFT OUT
|
||||
0x000f: 0x0f, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x3c, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x3d, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x32, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x26, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001a: 0x3f, # SUBSTITUTE
|
||||
0x001b: 0x27, # ESCAPE
|
||||
0x001c: 0x1c, # FILE SEPARATOR
|
||||
0x001d: 0x1d, # GROUP SEPARATOR
|
||||
0x001e: 0x1e, # RECORD SEPARATOR
|
||||
0x001f: 0x1f, # UNIT SEPARATOR
|
||||
0x0020: 0x40, # SPACE
|
||||
0x0021: 0x5a, # EXCLAMATION MARK
|
||||
0x0022: 0x7f, # QUOTATION MARK
|
||||
0x0023: 0x7b, # NUMBER SIGN
|
||||
0x0024: 0x5b, # DOLLAR SIGN
|
||||
0x0025: 0x6c, # PERCENT SIGN
|
||||
0x0026: 0x50, # AMPERSAND
|
||||
0x0027: 0x7d, # APOSTROPHE
|
||||
0x0028: 0x4d, # LEFT PARENTHESIS
|
||||
0x0029: 0x5d, # RIGHT PARENTHESIS
|
||||
0x002a: 0x5c, # ASTERISK
|
||||
0x002b: 0x4e, # PLUS SIGN
|
||||
0x002c: 0x6b, # COMMA
|
||||
0x002d: 0x60, # HYPHEN-MINUS
|
||||
0x002e: 0x4b, # FULL STOP
|
||||
0x002f: 0x61, # SOLIDUS
|
||||
0x0030: 0xf0, # DIGIT ZERO
|
||||
0x0031: 0xf1, # DIGIT ONE
|
||||
0x0032: 0xf2, # DIGIT TWO
|
||||
0x0033: 0xf3, # DIGIT THREE
|
||||
0x0034: 0xf4, # DIGIT FOUR
|
||||
0x0035: 0xf5, # DIGIT FIVE
|
||||
0x0036: 0xf6, # DIGIT SIX
|
||||
0x0037: 0xf7, # DIGIT SEVEN
|
||||
0x0038: 0xf8, # DIGIT EIGHT
|
||||
0x0039: 0xf9, # DIGIT NINE
|
||||
0x003a: 0x7a, # COLON
|
||||
0x003b: 0x5e, # SEMICOLON
|
||||
0x003c: 0x4c, # LESS-THAN SIGN
|
||||
0x003d: 0x7e, # EQUALS SIGN
|
||||
0x003e: 0x6e, # GREATER-THAN SIGN
|
||||
0x003f: 0x6f, # QUESTION MARK
|
||||
0x0040: 0x7c, # COMMERCIAL AT
|
||||
0x0041: 0xc1, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0xc2, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0xc3, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0xc4, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0xc5, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0xc6, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0xc7, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0xc8, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0xc9, # LATIN CAPITAL LETTER I
|
||||
0x004a: 0xd1, # LATIN CAPITAL LETTER J
|
||||
0x004b: 0xd2, # LATIN CAPITAL LETTER K
|
||||
0x004c: 0xd3, # LATIN CAPITAL LETTER L
|
||||
0x004d: 0xd4, # LATIN CAPITAL LETTER M
|
||||
0x004e: 0xd5, # LATIN CAPITAL LETTER N
|
||||
0x004f: 0xd6, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0xd7, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0xd8, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0xd9, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0xe2, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0xe3, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0xe4, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0xe5, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0xe6, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0xe7, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0xe8, # LATIN CAPITAL LETTER Y
|
||||
0x005a: 0xe9, # LATIN CAPITAL LETTER Z
|
||||
0x005b: 0xba, # LEFT SQUARE BRACKET
|
||||
0x005c: 0xe0, # REVERSE SOLIDUS
|
||||
0x005d: 0xbb, # RIGHT SQUARE BRACKET
|
||||
0x005e: 0xb0, # CIRCUMFLEX ACCENT
|
||||
0x005f: 0x6d, # LOW LINE
|
||||
0x0060: 0x79, # GRAVE ACCENT
|
||||
0x0061: 0x81, # LATIN SMALL LETTER A
|
||||
0x0062: 0x82, # LATIN SMALL LETTER B
|
||||
0x0063: 0x83, # LATIN SMALL LETTER C
|
||||
0x0064: 0x84, # LATIN SMALL LETTER D
|
||||
0x0065: 0x85, # LATIN SMALL LETTER E
|
||||
0x0066: 0x86, # LATIN SMALL LETTER F
|
||||
0x0067: 0x87, # LATIN SMALL LETTER G
|
||||
0x0068: 0x88, # LATIN SMALL LETTER H
|
||||
0x0069: 0x89, # LATIN SMALL LETTER I
|
||||
0x006a: 0x91, # LATIN SMALL LETTER J
|
||||
0x006b: 0x92, # LATIN SMALL LETTER K
|
||||
0x006c: 0x93, # LATIN SMALL LETTER L
|
||||
0x006d: 0x94, # LATIN SMALL LETTER M
|
||||
0x006e: 0x95, # LATIN SMALL LETTER N
|
||||
0x006f: 0x96, # LATIN SMALL LETTER O
|
||||
0x0070: 0x97, # LATIN SMALL LETTER P
|
||||
0x0071: 0x98, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x99, # LATIN SMALL LETTER R
|
||||
0x0073: 0xa2, # LATIN SMALL LETTER S
|
||||
0x0074: 0xa3, # LATIN SMALL LETTER T
|
||||
0x0075: 0xa4, # LATIN SMALL LETTER U
|
||||
0x0076: 0xa5, # LATIN SMALL LETTER V
|
||||
0x0077: 0xa6, # LATIN SMALL LETTER W
|
||||
0x0078: 0xa7, # LATIN SMALL LETTER X
|
||||
0x0079: 0xa8, # LATIN SMALL LETTER Y
|
||||
0x007a: 0xa9, # LATIN SMALL LETTER Z
|
||||
0x007b: 0xc0, # LEFT CURLY BRACKET
|
||||
0x007c: 0x4f, # VERTICAL LINE
|
||||
0x007d: 0xd0, # RIGHT CURLY BRACKET
|
||||
0x007e: 0xa1, # TILDE
|
||||
0x007f: 0x07, # DELETE
|
||||
0x0080: 0x20, # CONTROL
|
||||
0x0081: 0x21, # CONTROL
|
||||
0x0082: 0x22, # CONTROL
|
||||
0x0083: 0x23, # CONTROL
|
||||
0x0084: 0x24, # CONTROL
|
||||
0x0085: 0x15, # CONTROL
|
||||
0x0086: 0x06, # CONTROL
|
||||
0x0087: 0x17, # CONTROL
|
||||
0x0088: 0x28, # CONTROL
|
||||
0x0089: 0x29, # CONTROL
|
||||
0x008a: 0x2a, # CONTROL
|
||||
0x008b: 0x2b, # CONTROL
|
||||
0x008c: 0x2c, # CONTROL
|
||||
0x008d: 0x09, # CONTROL
|
||||
0x008e: 0x0a, # CONTROL
|
||||
0x008f: 0x1b, # CONTROL
|
||||
0x0090: 0x30, # CONTROL
|
||||
0x0091: 0x31, # CONTROL
|
||||
0x0092: 0x1a, # CONTROL
|
||||
0x0093: 0x33, # CONTROL
|
||||
0x0094: 0x34, # CONTROL
|
||||
0x0095: 0x35, # CONTROL
|
||||
0x0096: 0x36, # CONTROL
|
||||
0x0097: 0x08, # CONTROL
|
||||
0x0098: 0x38, # CONTROL
|
||||
0x0099: 0x39, # CONTROL
|
||||
0x009a: 0x3a, # CONTROL
|
||||
0x009b: 0x3b, # CONTROL
|
||||
0x009c: 0x04, # CONTROL
|
||||
0x009d: 0x14, # CONTROL
|
||||
0x009e: 0x3e, # CONTROL
|
||||
0x009f: 0xff, # CONTROL
|
||||
0x00a0: 0x41, # NO-BREAK SPACE
|
||||
0x00a1: 0xaa, # INVERTED EXCLAMATION MARK
|
||||
0x00a2: 0x4a, # CENT SIGN
|
||||
0x00a3: 0xb1, # POUND SIGN
|
||||
0x00a5: 0xb2, # YEN SIGN
|
||||
0x00a6: 0x6a, # BROKEN BAR
|
||||
0x00a7: 0xb5, # SECTION SIGN
|
||||
0x00a8: 0xbd, # DIAERESIS
|
||||
0x00a9: 0xb4, # COPYRIGHT SIGN
|
||||
0x00aa: 0x9a, # FEMININE ORDINAL INDICATOR
|
||||
0x00ab: 0x8a, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00ac: 0x5f, # NOT SIGN
|
||||
0x00ad: 0xca, # SOFT HYPHEN
|
||||
0x00ae: 0xaf, # REGISTERED SIGN
|
||||
0x00af: 0xbc, # MACRON
|
||||
0x00b0: 0x90, # DEGREE SIGN
|
||||
0x00b1: 0x8f, # PLUS-MINUS SIGN
|
||||
0x00b2: 0xea, # SUPERSCRIPT TWO
|
||||
0x00b3: 0xfa, # SUPERSCRIPT THREE
|
||||
0x00b4: 0xbe, # ACUTE ACCENT
|
||||
0x00b5: 0xa0, # MICRO SIGN
|
||||
0x00b6: 0xb6, # PILCROW SIGN
|
||||
0x00b7: 0xb3, # MIDDLE DOT
|
||||
0x00b8: 0x9d, # CEDILLA
|
||||
0x00b9: 0xda, # SUPERSCRIPT ONE
|
||||
0x00ba: 0x9b, # MASCULINE ORDINAL INDICATOR
|
||||
0x00bb: 0x8b, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00bc: 0xb7, # VULGAR FRACTION ONE QUARTER
|
||||
0x00bd: 0xb8, # VULGAR FRACTION ONE HALF
|
||||
0x00be: 0xb9, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00bf: 0xab, # INVERTED QUESTION MARK
|
||||
0x00c0: 0x64, # LATIN CAPITAL LETTER A WITH GRAVE
|
||||
0x00c1: 0x65, # LATIN CAPITAL LETTER A WITH ACUTE
|
||||
0x00c2: 0x62, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
0x00c3: 0x66, # LATIN CAPITAL LETTER A WITH TILDE
|
||||
0x00c4: 0x63, # LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
0x00c5: 0x67, # LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
0x00c6: 0x9e, # LATIN CAPITAL LIGATURE AE
|
||||
0x00c7: 0x68, # LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
0x00c8: 0x74, # LATIN CAPITAL LETTER E WITH GRAVE
|
||||
0x00c9: 0x71, # LATIN CAPITAL LETTER E WITH ACUTE
|
||||
0x00ca: 0x72, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
0x00cb: 0x73, # LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
0x00cc: 0x78, # LATIN CAPITAL LETTER I WITH GRAVE
|
||||
0x00cd: 0x75, # LATIN CAPITAL LETTER I WITH ACUTE
|
||||
0x00ce: 0x76, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
0x00cf: 0x77, # LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
0x00d0: 0xac, # LATIN CAPITAL LETTER ETH (ICELANDIC)
|
||||
0x00d1: 0x69, # LATIN CAPITAL LETTER N WITH TILDE
|
||||
0x00d2: 0xed, # LATIN CAPITAL LETTER O WITH GRAVE
|
||||
0x00d3: 0xee, # LATIN CAPITAL LETTER O WITH ACUTE
|
||||
0x00d4: 0xeb, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
0x00d5: 0xef, # LATIN CAPITAL LETTER O WITH TILDE
|
||||
0x00d6: 0xec, # LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
0x00d7: 0xbf, # MULTIPLICATION SIGN
|
||||
0x00d8: 0x80, # LATIN CAPITAL LETTER O WITH STROKE
|
||||
0x00d9: 0xfd, # LATIN CAPITAL LETTER U WITH GRAVE
|
||||
0x00da: 0xfe, # LATIN CAPITAL LETTER U WITH ACUTE
|
||||
0x00db: 0xfb, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
0x00dc: 0xfc, # LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
0x00dd: 0xad, # LATIN CAPITAL LETTER Y WITH ACUTE
|
||||
0x00de: 0xae, # LATIN CAPITAL LETTER THORN (ICELANDIC)
|
||||
0x00df: 0x59, # LATIN SMALL LETTER SHARP S (GERMAN)
|
||||
0x00e0: 0x44, # LATIN SMALL LETTER A WITH GRAVE
|
||||
0x00e1: 0x45, # LATIN SMALL LETTER A WITH ACUTE
|
||||
0x00e2: 0x42, # LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
0x00e3: 0x46, # LATIN SMALL LETTER A WITH TILDE
|
||||
0x00e4: 0x43, # LATIN SMALL LETTER A WITH DIAERESIS
|
||||
0x00e5: 0x47, # LATIN SMALL LETTER A WITH RING ABOVE
|
||||
0x00e6: 0x9c, # LATIN SMALL LIGATURE AE
|
||||
0x00e7: 0x48, # LATIN SMALL LETTER C WITH CEDILLA
|
||||
0x00e8: 0x54, # LATIN SMALL LETTER E WITH GRAVE
|
||||
0x00e9: 0x51, # LATIN SMALL LETTER E WITH ACUTE
|
||||
0x00ea: 0x52, # LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
0x00eb: 0x53, # LATIN SMALL LETTER E WITH DIAERESIS
|
||||
0x00ec: 0x58, # LATIN SMALL LETTER I WITH GRAVE
|
||||
0x00ed: 0x55, # LATIN SMALL LETTER I WITH ACUTE
|
||||
0x00ee: 0x56, # LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
0x00ef: 0x57, # LATIN SMALL LETTER I WITH DIAERESIS
|
||||
0x00f0: 0x8c, # LATIN SMALL LETTER ETH (ICELANDIC)
|
||||
0x00f1: 0x49, # LATIN SMALL LETTER N WITH TILDE
|
||||
0x00f2: 0xcd, # LATIN SMALL LETTER O WITH GRAVE
|
||||
0x00f3: 0xce, # LATIN SMALL LETTER O WITH ACUTE
|
||||
0x00f4: 0xcb, # LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
0x00f5: 0xcf, # LATIN SMALL LETTER O WITH TILDE
|
||||
0x00f6: 0xcc, # LATIN SMALL LETTER O WITH DIAERESIS
|
||||
0x00f7: 0xe1, # DIVISION SIGN
|
||||
0x00f8: 0x70, # LATIN SMALL LETTER O WITH STROKE
|
||||
0x00f9: 0xdd, # LATIN SMALL LETTER U WITH GRAVE
|
||||
0x00fa: 0xde, # LATIN SMALL LETTER U WITH ACUTE
|
||||
0x00fb: 0xdb, # LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
0x00fc: 0xdc, # LATIN SMALL LETTER U WITH DIAERESIS
|
||||
0x00fd: 0x8d, # LATIN SMALL LETTER Y WITH ACUTE
|
||||
0x00fe: 0x8e, # LATIN SMALL LETTER THORN (ICELANDIC)
|
||||
0x00ff: 0xdf, # LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
0x20ac: 0x9f, # EURO SIGN
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
|||
""" Python Character Mapping Codec generated from 'ISO8859/8859-6.TXT' with gencodec.py.
|
||||
""" Python Character Mapping Codec generated from 'MAPPINGS/ISO8859/8859-6.TXT' with gencodec.py.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -28,273 +28,175 @@ def getregentry():
|
|||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x00a1: None,
|
||||
0x00a2: None,
|
||||
0x00a3: None,
|
||||
0x00a5: None,
|
||||
0x00a6: None,
|
||||
0x00a7: None,
|
||||
0x00a8: None,
|
||||
0x00a9: None,
|
||||
0x00aa: None,
|
||||
0x00ab: None,
|
||||
0x00ac: 0x060c, # ARABIC COMMA
|
||||
0x00ae: None,
|
||||
0x00af: None,
|
||||
0x00b0: None,
|
||||
0x00b1: None,
|
||||
0x00b2: None,
|
||||
0x00b3: None,
|
||||
0x00b4: None,
|
||||
0x00b5: None,
|
||||
0x00b6: None,
|
||||
0x00b7: None,
|
||||
0x00b8: None,
|
||||
0x00b9: None,
|
||||
0x00ba: None,
|
||||
0x00bb: 0x061b, # ARABIC SEMICOLON
|
||||
0x00bc: None,
|
||||
0x00bd: None,
|
||||
0x00be: None,
|
||||
0x00bf: 0x061f, # ARABIC QUESTION MARK
|
||||
0x00c0: None,
|
||||
0x00c1: 0x0621, # ARABIC LETTER HAMZA
|
||||
0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE
|
||||
0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE
|
||||
0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE
|
||||
0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW
|
||||
0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE
|
||||
0x00c7: 0x0627, # ARABIC LETTER ALEF
|
||||
0x00c8: 0x0628, # ARABIC LETTER BEH
|
||||
0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA
|
||||
0x00ca: 0x062a, # ARABIC LETTER TEH
|
||||
0x00cb: 0x062b, # ARABIC LETTER THEH
|
||||
0x00cc: 0x062c, # ARABIC LETTER JEEM
|
||||
0x00cd: 0x062d, # ARABIC LETTER HAH
|
||||
0x00ce: 0x062e, # ARABIC LETTER KHAH
|
||||
0x00cf: 0x062f, # ARABIC LETTER DAL
|
||||
0x00d0: 0x0630, # ARABIC LETTER THAL
|
||||
0x00d1: 0x0631, # ARABIC LETTER REH
|
||||
0x00d2: 0x0632, # ARABIC LETTER ZAIN
|
||||
0x00d3: 0x0633, # ARABIC LETTER SEEN
|
||||
0x00d4: 0x0634, # ARABIC LETTER SHEEN
|
||||
0x00d5: 0x0635, # ARABIC LETTER SAD
|
||||
0x00d6: 0x0636, # ARABIC LETTER DAD
|
||||
0x00d7: 0x0637, # ARABIC LETTER TAH
|
||||
0x00d8: 0x0638, # ARABIC LETTER ZAH
|
||||
0x00d9: 0x0639, # ARABIC LETTER AIN
|
||||
0x00da: 0x063a, # ARABIC LETTER GHAIN
|
||||
0x00db: None,
|
||||
0x00dc: None,
|
||||
0x00dd: None,
|
||||
0x00de: None,
|
||||
0x00df: None,
|
||||
0x00e0: 0x0640, # ARABIC TATWEEL
|
||||
0x00e1: 0x0641, # ARABIC LETTER FEH
|
||||
0x00e2: 0x0642, # ARABIC LETTER QAF
|
||||
0x00e3: 0x0643, # ARABIC LETTER KAF
|
||||
0x00e4: 0x0644, # ARABIC LETTER LAM
|
||||
0x00e5: 0x0645, # ARABIC LETTER MEEM
|
||||
0x00e6: 0x0646, # ARABIC LETTER NOON
|
||||
0x00e7: 0x0647, # ARABIC LETTER HEH
|
||||
0x00e8: 0x0648, # ARABIC LETTER WAW
|
||||
0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA
|
||||
0x00ea: 0x064a, # ARABIC LETTER YEH
|
||||
0x00eb: 0x064b, # ARABIC FATHATAN
|
||||
0x00ec: 0x064c, # ARABIC DAMMATAN
|
||||
0x00ed: 0x064d, # ARABIC KASRATAN
|
||||
0x00ee: 0x064e, # ARABIC FATHA
|
||||
0x00ef: 0x064f, # ARABIC DAMMA
|
||||
0x00f0: 0x0650, # ARABIC KASRA
|
||||
0x00f1: 0x0651, # ARABIC SHADDA
|
||||
0x00f2: 0x0652, # ARABIC SUKUN
|
||||
0x00f3: None,
|
||||
0x00f4: None,
|
||||
0x00f5: None,
|
||||
0x00f6: None,
|
||||
0x00f7: None,
|
||||
0x00f8: None,
|
||||
0x00f9: None,
|
||||
0x00fa: None,
|
||||
0x00fb: None,
|
||||
0x00fc: None,
|
||||
0x00fd: None,
|
||||
0x00fe: None,
|
||||
0x00ff: None,
|
||||
})
|
||||
|
||||
### Decoding Table
|
||||
|
||||
decoding_table = (
|
||||
u'\x00' # 0x0000 -> NULL
|
||||
u'\x01' # 0x0001 -> START OF HEADING
|
||||
u'\x02' # 0x0002 -> START OF TEXT
|
||||
u'\x03' # 0x0003 -> END OF TEXT
|
||||
u'\x04' # 0x0004 -> END OF TRANSMISSION
|
||||
u'\x05' # 0x0005 -> ENQUIRY
|
||||
u'\x06' # 0x0006 -> ACKNOWLEDGE
|
||||
u'\x07' # 0x0007 -> BELL
|
||||
u'\x08' # 0x0008 -> BACKSPACE
|
||||
u'\t' # 0x0009 -> HORIZONTAL TABULATION
|
||||
u'\n' # 0x000a -> LINE FEED
|
||||
u'\x0b' # 0x000b -> VERTICAL TABULATION
|
||||
u'\x0c' # 0x000c -> FORM FEED
|
||||
u'\r' # 0x000d -> CARRIAGE RETURN
|
||||
u'\x0e' # 0x000e -> SHIFT OUT
|
||||
u'\x0f' # 0x000f -> SHIFT IN
|
||||
u'\x10' # 0x0010 -> DATA LINK ESCAPE
|
||||
u'\x11' # 0x0011 -> DEVICE CONTROL ONE
|
||||
u'\x12' # 0x0012 -> DEVICE CONTROL TWO
|
||||
u'\x13' # 0x0013 -> DEVICE CONTROL THREE
|
||||
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
|
||||
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
|
||||
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
|
||||
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
|
||||
u'\x18' # 0x0018 -> CANCEL
|
||||
u'\x19' # 0x0019 -> END OF MEDIUM
|
||||
u'\x1a' # 0x001a -> SUBSTITUTE
|
||||
u'\x1b' # 0x001b -> ESCAPE
|
||||
u'\x1c' # 0x001c -> FILE SEPARATOR
|
||||
u'\x1d' # 0x001d -> GROUP SEPARATOR
|
||||
u'\x1e' # 0x001e -> RECORD SEPARATOR
|
||||
u'\x1f' # 0x001f -> UNIT SEPARATOR
|
||||
u' ' # 0x0020 -> SPACE
|
||||
u'!' # 0x0021 -> EXCLAMATION MARK
|
||||
u'"' # 0x0022 -> QUOTATION MARK
|
||||
u'#' # 0x0023 -> NUMBER SIGN
|
||||
u'$' # 0x0024 -> DOLLAR SIGN
|
||||
u'%' # 0x0025 -> PERCENT SIGN
|
||||
u'&' # 0x0026 -> AMPERSAND
|
||||
u"'" # 0x0027 -> APOSTROPHE
|
||||
u'(' # 0x0028 -> LEFT PARENTHESIS
|
||||
u')' # 0x0029 -> RIGHT PARENTHESIS
|
||||
u'*' # 0x002a -> ASTERISK
|
||||
u'+' # 0x002b -> PLUS SIGN
|
||||
u',' # 0x002c -> COMMA
|
||||
u'-' # 0x002d -> HYPHEN-MINUS
|
||||
u'.' # 0x002e -> FULL STOP
|
||||
u'/' # 0x002f -> SOLIDUS
|
||||
u'0' # 0x0030 -> DIGIT ZERO
|
||||
u'1' # 0x0031 -> DIGIT ONE
|
||||
u'2' # 0x0032 -> DIGIT TWO
|
||||
u'3' # 0x0033 -> DIGIT THREE
|
||||
u'4' # 0x0034 -> DIGIT FOUR
|
||||
u'5' # 0x0035 -> DIGIT FIVE
|
||||
u'6' # 0x0036 -> DIGIT SIX
|
||||
u'7' # 0x0037 -> DIGIT SEVEN
|
||||
u'8' # 0x0038 -> DIGIT EIGHT
|
||||
u'9' # 0x0039 -> DIGIT NINE
|
||||
u':' # 0x003a -> COLON
|
||||
u';' # 0x003b -> SEMICOLON
|
||||
u'<' # 0x003c -> LESS-THAN SIGN
|
||||
u'=' # 0x003d -> EQUALS SIGN
|
||||
u'>' # 0x003e -> GREATER-THAN SIGN
|
||||
u'?' # 0x003f -> QUESTION MARK
|
||||
u'@' # 0x0040 -> COMMERCIAL AT
|
||||
u'A' # 0x0041 -> LATIN CAPITAL LETTER A
|
||||
u'B' # 0x0042 -> LATIN CAPITAL LETTER B
|
||||
u'C' # 0x0043 -> LATIN CAPITAL LETTER C
|
||||
u'D' # 0x0044 -> LATIN CAPITAL LETTER D
|
||||
u'E' # 0x0045 -> LATIN CAPITAL LETTER E
|
||||
u'F' # 0x0046 -> LATIN CAPITAL LETTER F
|
||||
u'G' # 0x0047 -> LATIN CAPITAL LETTER G
|
||||
u'H' # 0x0048 -> LATIN CAPITAL LETTER H
|
||||
u'I' # 0x0049 -> LATIN CAPITAL LETTER I
|
||||
u'J' # 0x004a -> LATIN CAPITAL LETTER J
|
||||
u'K' # 0x004b -> LATIN CAPITAL LETTER K
|
||||
u'L' # 0x004c -> LATIN CAPITAL LETTER L
|
||||
u'M' # 0x004d -> LATIN CAPITAL LETTER M
|
||||
u'N' # 0x004e -> LATIN CAPITAL LETTER N
|
||||
u'O' # 0x004f -> LATIN CAPITAL LETTER O
|
||||
u'P' # 0x0050 -> LATIN CAPITAL LETTER P
|
||||
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
|
||||
u'R' # 0x0052 -> LATIN CAPITAL LETTER R
|
||||
u'S' # 0x0053 -> LATIN CAPITAL LETTER S
|
||||
u'T' # 0x0054 -> LATIN CAPITAL LETTER T
|
||||
u'U' # 0x0055 -> LATIN CAPITAL LETTER U
|
||||
u'V' # 0x0056 -> LATIN CAPITAL LETTER V
|
||||
u'W' # 0x0057 -> LATIN CAPITAL LETTER W
|
||||
u'X' # 0x0058 -> LATIN CAPITAL LETTER X
|
||||
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
|
||||
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
|
||||
u'[' # 0x005b -> LEFT SQUARE BRACKET
|
||||
u'\\' # 0x005c -> REVERSE SOLIDUS
|
||||
u']' # 0x005d -> RIGHT SQUARE BRACKET
|
||||
u'^' # 0x005e -> CIRCUMFLEX ACCENT
|
||||
u'_' # 0x005f -> LOW LINE
|
||||
u'`' # 0x0060 -> GRAVE ACCENT
|
||||
u'a' # 0x0061 -> LATIN SMALL LETTER A
|
||||
u'b' # 0x0062 -> LATIN SMALL LETTER B
|
||||
u'c' # 0x0063 -> LATIN SMALL LETTER C
|
||||
u'd' # 0x0064 -> LATIN SMALL LETTER D
|
||||
u'e' # 0x0065 -> LATIN SMALL LETTER E
|
||||
u'f' # 0x0066 -> LATIN SMALL LETTER F
|
||||
u'g' # 0x0067 -> LATIN SMALL LETTER G
|
||||
u'h' # 0x0068 -> LATIN SMALL LETTER H
|
||||
u'i' # 0x0069 -> LATIN SMALL LETTER I
|
||||
u'j' # 0x006a -> LATIN SMALL LETTER J
|
||||
u'k' # 0x006b -> LATIN SMALL LETTER K
|
||||
u'l' # 0x006c -> LATIN SMALL LETTER L
|
||||
u'm' # 0x006d -> LATIN SMALL LETTER M
|
||||
u'n' # 0x006e -> LATIN SMALL LETTER N
|
||||
u'o' # 0x006f -> LATIN SMALL LETTER O
|
||||
u'p' # 0x0070 -> LATIN SMALL LETTER P
|
||||
u'q' # 0x0071 -> LATIN SMALL LETTER Q
|
||||
u'r' # 0x0072 -> LATIN SMALL LETTER R
|
||||
u's' # 0x0073 -> LATIN SMALL LETTER S
|
||||
u't' # 0x0074 -> LATIN SMALL LETTER T
|
||||
u'u' # 0x0075 -> LATIN SMALL LETTER U
|
||||
u'v' # 0x0076 -> LATIN SMALL LETTER V
|
||||
u'w' # 0x0077 -> LATIN SMALL LETTER W
|
||||
u'x' # 0x0078 -> LATIN SMALL LETTER X
|
||||
u'y' # 0x0079 -> LATIN SMALL LETTER Y
|
||||
u'z' # 0x007a -> LATIN SMALL LETTER Z
|
||||
u'{' # 0x007b -> LEFT CURLY BRACKET
|
||||
u'|' # 0x007c -> VERTICAL LINE
|
||||
u'}' # 0x007d -> RIGHT CURLY BRACKET
|
||||
u'~' # 0x007e -> TILDE
|
||||
u'\x7f' # 0x007f -> DELETE
|
||||
u'\x80' # 0x0080 -> <control>
|
||||
u'\x81' # 0x0081 -> <control>
|
||||
u'\x82' # 0x0082 -> <control>
|
||||
u'\x83' # 0x0083 -> <control>
|
||||
u'\x84' # 0x0084 -> <control>
|
||||
u'\x85' # 0x0085 -> <control>
|
||||
u'\x86' # 0x0086 -> <control>
|
||||
u'\x87' # 0x0087 -> <control>
|
||||
u'\x88' # 0x0088 -> <control>
|
||||
u'\x89' # 0x0089 -> <control>
|
||||
u'\x8a' # 0x008a -> <control>
|
||||
u'\x8b' # 0x008b -> <control>
|
||||
u'\x8c' # 0x008c -> <control>
|
||||
u'\x8d' # 0x008d -> <control>
|
||||
u'\x8e' # 0x008e -> <control>
|
||||
u'\x8f' # 0x008f -> <control>
|
||||
u'\x90' # 0x0090 -> <control>
|
||||
u'\x91' # 0x0091 -> <control>
|
||||
u'\x92' # 0x0092 -> <control>
|
||||
u'\x93' # 0x0093 -> <control>
|
||||
u'\x94' # 0x0094 -> <control>
|
||||
u'\x95' # 0x0095 -> <control>
|
||||
u'\x96' # 0x0096 -> <control>
|
||||
u'\x97' # 0x0097 -> <control>
|
||||
u'\x98' # 0x0098 -> <control>
|
||||
u'\x99' # 0x0099 -> <control>
|
||||
u'\x9a' # 0x009a -> <control>
|
||||
u'\x9b' # 0x009b -> <control>
|
||||
u'\x9c' # 0x009c -> <control>
|
||||
u'\x9d' # 0x009d -> <control>
|
||||
u'\x9e' # 0x009e -> <control>
|
||||
u'\x9f' # 0x009f -> <control>
|
||||
u'\xa0' # 0x00a0 -> NO-BREAK SPACE
|
||||
u'\x00' # 0x00 -> NULL
|
||||
u'\x01' # 0x01 -> START OF HEADING
|
||||
u'\x02' # 0x02 -> START OF TEXT
|
||||
u'\x03' # 0x03 -> END OF TEXT
|
||||
u'\x04' # 0x04 -> END OF TRANSMISSION
|
||||
u'\x05' # 0x05 -> ENQUIRY
|
||||
u'\x06' # 0x06 -> ACKNOWLEDGE
|
||||
u'\x07' # 0x07 -> BELL
|
||||
u'\x08' # 0x08 -> BACKSPACE
|
||||
u'\t' # 0x09 -> HORIZONTAL TABULATION
|
||||
u'\n' # 0x0a -> LINE FEED
|
||||
u'\x0b' # 0x0b -> VERTICAL TABULATION
|
||||
u'\x0c' # 0x0c -> FORM FEED
|
||||
u'\r' # 0x0d -> CARRIAGE RETURN
|
||||
u'\x0e' # 0x0e -> SHIFT OUT
|
||||
u'\x0f' # 0x0f -> SHIFT IN
|
||||
u'\x10' # 0x10 -> DATA LINK ESCAPE
|
||||
u'\x11' # 0x11 -> DEVICE CONTROL ONE
|
||||
u'\x12' # 0x12 -> DEVICE CONTROL TWO
|
||||
u'\x13' # 0x13 -> DEVICE CONTROL THREE
|
||||
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
|
||||
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
|
||||
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
|
||||
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
|
||||
u'\x18' # 0x18 -> CANCEL
|
||||
u'\x19' # 0x19 -> END OF MEDIUM
|
||||
u'\x1a' # 0x1a -> SUBSTITUTE
|
||||
u'\x1b' # 0x1b -> ESCAPE
|
||||
u'\x1c' # 0x1c -> FILE SEPARATOR
|
||||
u'\x1d' # 0x1d -> GROUP SEPARATOR
|
||||
u'\x1e' # 0x1e -> RECORD SEPARATOR
|
||||
u'\x1f' # 0x1f -> UNIT SEPARATOR
|
||||
u' ' # 0x20 -> SPACE
|
||||
u'!' # 0x21 -> EXCLAMATION MARK
|
||||
u'"' # 0x22 -> QUOTATION MARK
|
||||
u'#' # 0x23 -> NUMBER SIGN
|
||||
u'$' # 0x24 -> DOLLAR SIGN
|
||||
u'%' # 0x25 -> PERCENT SIGN
|
||||
u'&' # 0x26 -> AMPERSAND
|
||||
u"'" # 0x27 -> APOSTROPHE
|
||||
u'(' # 0x28 -> LEFT PARENTHESIS
|
||||
u')' # 0x29 -> RIGHT PARENTHESIS
|
||||
u'*' # 0x2a -> ASTERISK
|
||||
u'+' # 0x2b -> PLUS SIGN
|
||||
u',' # 0x2c -> COMMA
|
||||
u'-' # 0x2d -> HYPHEN-MINUS
|
||||
u'.' # 0x2e -> FULL STOP
|
||||
u'/' # 0x2f -> SOLIDUS
|
||||
u'0' # 0x30 -> DIGIT ZERO
|
||||
u'1' # 0x31 -> DIGIT ONE
|
||||
u'2' # 0x32 -> DIGIT TWO
|
||||
u'3' # 0x33 -> DIGIT THREE
|
||||
u'4' # 0x34 -> DIGIT FOUR
|
||||
u'5' # 0x35 -> DIGIT FIVE
|
||||
u'6' # 0x36 -> DIGIT SIX
|
||||
u'7' # 0x37 -> DIGIT SEVEN
|
||||
u'8' # 0x38 -> DIGIT EIGHT
|
||||
u'9' # 0x39 -> DIGIT NINE
|
||||
u':' # 0x3a -> COLON
|
||||
u';' # 0x3b -> SEMICOLON
|
||||
u'<' # 0x3c -> LESS-THAN SIGN
|
||||
u'=' # 0x3d -> EQUALS SIGN
|
||||
u'>' # 0x3e -> GREATER-THAN SIGN
|
||||
u'?' # 0x3f -> QUESTION MARK
|
||||
u'@' # 0x40 -> COMMERCIAL AT
|
||||
u'A' # 0x41 -> LATIN CAPITAL LETTER A
|
||||
u'B' # 0x42 -> LATIN CAPITAL LETTER B
|
||||
u'C' # 0x43 -> LATIN CAPITAL LETTER C
|
||||
u'D' # 0x44 -> LATIN CAPITAL LETTER D
|
||||
u'E' # 0x45 -> LATIN CAPITAL LETTER E
|
||||
u'F' # 0x46 -> LATIN CAPITAL LETTER F
|
||||
u'G' # 0x47 -> LATIN CAPITAL LETTER G
|
||||
u'H' # 0x48 -> LATIN CAPITAL LETTER H
|
||||
u'I' # 0x49 -> LATIN CAPITAL LETTER I
|
||||
u'J' # 0x4a -> LATIN CAPITAL LETTER J
|
||||
u'K' # 0x4b -> LATIN CAPITAL LETTER K
|
||||
u'L' # 0x4c -> LATIN CAPITAL LETTER L
|
||||
u'M' # 0x4d -> LATIN CAPITAL LETTER M
|
||||
u'N' # 0x4e -> LATIN CAPITAL LETTER N
|
||||
u'O' # 0x4f -> LATIN CAPITAL LETTER O
|
||||
u'P' # 0x50 -> LATIN CAPITAL LETTER P
|
||||
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
|
||||
u'R' # 0x52 -> LATIN CAPITAL LETTER R
|
||||
u'S' # 0x53 -> LATIN CAPITAL LETTER S
|
||||
u'T' # 0x54 -> LATIN CAPITAL LETTER T
|
||||
u'U' # 0x55 -> LATIN CAPITAL LETTER U
|
||||
u'V' # 0x56 -> LATIN CAPITAL LETTER V
|
||||
u'W' # 0x57 -> LATIN CAPITAL LETTER W
|
||||
u'X' # 0x58 -> LATIN CAPITAL LETTER X
|
||||
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
|
||||
u'Z' # 0x5a -> LATIN CAPITAL LETTER Z
|
||||
u'[' # 0x5b -> LEFT SQUARE BRACKET
|
||||
u'\\' # 0x5c -> REVERSE SOLIDUS
|
||||
u']' # 0x5d -> RIGHT SQUARE BRACKET
|
||||
u'^' # 0x5e -> CIRCUMFLEX ACCENT
|
||||
u'_' # 0x5f -> LOW LINE
|
||||
u'`' # 0x60 -> GRAVE ACCENT
|
||||
u'a' # 0x61 -> LATIN SMALL LETTER A
|
||||
u'b' # 0x62 -> LATIN SMALL LETTER B
|
||||
u'c' # 0x63 -> LATIN SMALL LETTER C
|
||||
u'd' # 0x64 -> LATIN SMALL LETTER D
|
||||
u'e' # 0x65 -> LATIN SMALL LETTER E
|
||||
u'f' # 0x66 -> LATIN SMALL LETTER F
|
||||
u'g' # 0x67 -> LATIN SMALL LETTER G
|
||||
u'h' # 0x68 -> LATIN SMALL LETTER H
|
||||
u'i' # 0x69 -> LATIN SMALL LETTER I
|
||||
u'j' # 0x6a -> LATIN SMALL LETTER J
|
||||
u'k' # 0x6b -> LATIN SMALL LETTER K
|
||||
u'l' # 0x6c -> LATIN SMALL LETTER L
|
||||
u'm' # 0x6d -> LATIN SMALL LETTER M
|
||||
u'n' # 0x6e -> LATIN SMALL LETTER N
|
||||
u'o' # 0x6f -> LATIN SMALL LETTER O
|
||||
u'p' # 0x70 -> LATIN SMALL LETTER P
|
||||
u'q' # 0x71 -> LATIN SMALL LETTER Q
|
||||
u'r' # 0x72 -> LATIN SMALL LETTER R
|
||||
u's' # 0x73 -> LATIN SMALL LETTER S
|
||||
u't' # 0x74 -> LATIN SMALL LETTER T
|
||||
u'u' # 0x75 -> LATIN SMALL LETTER U
|
||||
u'v' # 0x76 -> LATIN SMALL LETTER V
|
||||
u'w' # 0x77 -> LATIN SMALL LETTER W
|
||||
u'x' # 0x78 -> LATIN SMALL LETTER X
|
||||
u'y' # 0x79 -> LATIN SMALL LETTER Y
|
||||
u'z' # 0x7a -> LATIN SMALL LETTER Z
|
||||
u'{' # 0x7b -> LEFT CURLY BRACKET
|
||||
u'|' # 0x7c -> VERTICAL LINE
|
||||
u'}' # 0x7d -> RIGHT CURLY BRACKET
|
||||
u'~' # 0x7e -> TILDE
|
||||
u'\x7f' # 0x7f -> DELETE
|
||||
u'\x80' # 0x80 -> <control>
|
||||
u'\x81' # 0x81 -> <control>
|
||||
u'\x82' # 0x82 -> <control>
|
||||
u'\x83' # 0x83 -> <control>
|
||||
u'\x84' # 0x84 -> <control>
|
||||
u'\x85' # 0x85 -> <control>
|
||||
u'\x86' # 0x86 -> <control>
|
||||
u'\x87' # 0x87 -> <control>
|
||||
u'\x88' # 0x88 -> <control>
|
||||
u'\x89' # 0x89 -> <control>
|
||||
u'\x8a' # 0x8a -> <control>
|
||||
u'\x8b' # 0x8b -> <control>
|
||||
u'\x8c' # 0x8c -> <control>
|
||||
u'\x8d' # 0x8d -> <control>
|
||||
u'\x8e' # 0x8e -> <control>
|
||||
u'\x8f' # 0x8f -> <control>
|
||||
u'\x90' # 0x90 -> <control>
|
||||
u'\x91' # 0x91 -> <control>
|
||||
u'\x92' # 0x92 -> <control>
|
||||
u'\x93' # 0x93 -> <control>
|
||||
u'\x94' # 0x94 -> <control>
|
||||
u'\x95' # 0x95 -> <control>
|
||||
u'\x96' # 0x96 -> <control>
|
||||
u'\x97' # 0x97 -> <control>
|
||||
u'\x98' # 0x98 -> <control>
|
||||
u'\x99' # 0x99 -> <control>
|
||||
u'\x9a' # 0x9a -> <control>
|
||||
u'\x9b' # 0x9b -> <control>
|
||||
u'\x9c' # 0x9c -> <control>
|
||||
u'\x9d' # 0x9d -> <control>
|
||||
u'\x9e' # 0x9e -> <control>
|
||||
u'\x9f' # 0x9f -> <control>
|
||||
u'\xa0' # 0xa0 -> NO-BREAK SPACE
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\xa4' # 0x00a4 -> CURRENCY SIGN
|
||||
u'\xa4' # 0xa4 -> CURRENCY SIGN
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
|
@ -302,8 +204,8 @@ def getregentry():
|
|||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\u060c' # 0x00ac -> ARABIC COMMA
|
||||
u'\xad' # 0x00ad -> SOFT HYPHEN
|
||||
u'\u060c' # 0xac -> ARABIC COMMA
|
||||
u'\xad' # 0xad -> SOFT HYPHEN
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
|
@ -317,62 +219,62 @@ def getregentry():
|
|||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\u061b' # 0x00bb -> ARABIC SEMICOLON
|
||||
u'\u061b' # 0xbb -> ARABIC SEMICOLON
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\u061f' # 0x00bf -> ARABIC QUESTION MARK
|
||||
u'\u061f' # 0xbf -> ARABIC QUESTION MARK
|
||||
u'\ufffe'
|
||||
u'\u0621' # 0x00c1 -> ARABIC LETTER HAMZA
|
||||
u'\u0622' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE
|
||||
u'\u0623' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE
|
||||
u'\u0624' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE
|
||||
u'\u0625' # 0x00c5 -> ARABIC LETTER ALEF WITH HAMZA BELOW
|
||||
u'\u0626' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE
|
||||
u'\u0627' # 0x00c7 -> ARABIC LETTER ALEF
|
||||
u'\u0628' # 0x00c8 -> ARABIC LETTER BEH
|
||||
u'\u0629' # 0x00c9 -> ARABIC LETTER TEH MARBUTA
|
||||
u'\u062a' # 0x00ca -> ARABIC LETTER TEH
|
||||
u'\u062b' # 0x00cb -> ARABIC LETTER THEH
|
||||
u'\u062c' # 0x00cc -> ARABIC LETTER JEEM
|
||||
u'\u062d' # 0x00cd -> ARABIC LETTER HAH
|
||||
u'\u062e' # 0x00ce -> ARABIC LETTER KHAH
|
||||
u'\u062f' # 0x00cf -> ARABIC LETTER DAL
|
||||
u'\u0630' # 0x00d0 -> ARABIC LETTER THAL
|
||||
u'\u0631' # 0x00d1 -> ARABIC LETTER REH
|
||||
u'\u0632' # 0x00d2 -> ARABIC LETTER ZAIN
|
||||
u'\u0633' # 0x00d3 -> ARABIC LETTER SEEN
|
||||
u'\u0634' # 0x00d4 -> ARABIC LETTER SHEEN
|
||||
u'\u0635' # 0x00d5 -> ARABIC LETTER SAD
|
||||
u'\u0636' # 0x00d6 -> ARABIC LETTER DAD
|
||||
u'\u0637' # 0x00d7 -> ARABIC LETTER TAH
|
||||
u'\u0638' # 0x00d8 -> ARABIC LETTER ZAH
|
||||
u'\u0639' # 0x00d9 -> ARABIC LETTER AIN
|
||||
u'\u063a' # 0x00da -> ARABIC LETTER GHAIN
|
||||
u'\u0621' # 0xc1 -> ARABIC LETTER HAMZA
|
||||
u'\u0622' # 0xc2 -> ARABIC LETTER ALEF WITH MADDA ABOVE
|
||||
u'\u0623' # 0xc3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE
|
||||
u'\u0624' # 0xc4 -> ARABIC LETTER WAW WITH HAMZA ABOVE
|
||||
u'\u0625' # 0xc5 -> ARABIC LETTER ALEF WITH HAMZA BELOW
|
||||
u'\u0626' # 0xc6 -> ARABIC LETTER YEH WITH HAMZA ABOVE
|
||||
u'\u0627' # 0xc7 -> ARABIC LETTER ALEF
|
||||
u'\u0628' # 0xc8 -> ARABIC LETTER BEH
|
||||
u'\u0629' # 0xc9 -> ARABIC LETTER TEH MARBUTA
|
||||
u'\u062a' # 0xca -> ARABIC LETTER TEH
|
||||
u'\u062b' # 0xcb -> ARABIC LETTER THEH
|
||||
u'\u062c' # 0xcc -> ARABIC LETTER JEEM
|
||||
u'\u062d' # 0xcd -> ARABIC LETTER HAH
|
||||
u'\u062e' # 0xce -> ARABIC LETTER KHAH
|
||||
u'\u062f' # 0xcf -> ARABIC LETTER DAL
|
||||
u'\u0630' # 0xd0 -> ARABIC LETTER THAL
|
||||
u'\u0631' # 0xd1 -> ARABIC LETTER REH
|
||||
u'\u0632' # 0xd2 -> ARABIC LETTER ZAIN
|
||||
u'\u0633' # 0xd3 -> ARABIC LETTER SEEN
|
||||
u'\u0634' # 0xd4 -> ARABIC LETTER SHEEN
|
||||
u'\u0635' # 0xd5 -> ARABIC LETTER SAD
|
||||
u'\u0636' # 0xd6 -> ARABIC LETTER DAD
|
||||
u'\u0637' # 0xd7 -> ARABIC LETTER TAH
|
||||
u'\u0638' # 0xd8 -> ARABIC LETTER ZAH
|
||||
u'\u0639' # 0xd9 -> ARABIC LETTER AIN
|
||||
u'\u063a' # 0xda -> ARABIC LETTER GHAIN
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\u0640' # 0x00e0 -> ARABIC TATWEEL
|
||||
u'\u0641' # 0x00e1 -> ARABIC LETTER FEH
|
||||
u'\u0642' # 0x00e2 -> ARABIC LETTER QAF
|
||||
u'\u0643' # 0x00e3 -> ARABIC LETTER KAF
|
||||
u'\u0644' # 0x00e4 -> ARABIC LETTER LAM
|
||||
u'\u0645' # 0x00e5 -> ARABIC LETTER MEEM
|
||||
u'\u0646' # 0x00e6 -> ARABIC LETTER NOON
|
||||
u'\u0647' # 0x00e7 -> ARABIC LETTER HEH
|
||||
u'\u0648' # 0x00e8 -> ARABIC LETTER WAW
|
||||
u'\u0649' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA
|
||||
u'\u064a' # 0x00ea -> ARABIC LETTER YEH
|
||||
u'\u064b' # 0x00eb -> ARABIC FATHATAN
|
||||
u'\u064c' # 0x00ec -> ARABIC DAMMATAN
|
||||
u'\u064d' # 0x00ed -> ARABIC KASRATAN
|
||||
u'\u064e' # 0x00ee -> ARABIC FATHA
|
||||
u'\u064f' # 0x00ef -> ARABIC DAMMA
|
||||
u'\u0650' # 0x00f0 -> ARABIC KASRA
|
||||
u'\u0651' # 0x00f1 -> ARABIC SHADDA
|
||||
u'\u0652' # 0x00f2 -> ARABIC SUKUN
|
||||
u'\u0640' # 0xe0 -> ARABIC TATWEEL
|
||||
u'\u0641' # 0xe1 -> ARABIC LETTER FEH
|
||||
u'\u0642' # 0xe2 -> ARABIC LETTER QAF
|
||||
u'\u0643' # 0xe3 -> ARABIC LETTER KAF
|
||||
u'\u0644' # 0xe4 -> ARABIC LETTER LAM
|
||||
u'\u0645' # 0xe5 -> ARABIC LETTER MEEM
|
||||
u'\u0646' # 0xe6 -> ARABIC LETTER NOON
|
||||
u'\u0647' # 0xe7 -> ARABIC LETTER HEH
|
||||
u'\u0648' # 0xe8 -> ARABIC LETTER WAW
|
||||
u'\u0649' # 0xe9 -> ARABIC LETTER ALEF MAKSURA
|
||||
u'\u064a' # 0xea -> ARABIC LETTER YEH
|
||||
u'\u064b' # 0xeb -> ARABIC FATHATAN
|
||||
u'\u064c' # 0xec -> ARABIC DAMMATAN
|
||||
u'\u064d' # 0xed -> ARABIC KASRATAN
|
||||
u'\u064e' # 0xee -> ARABIC FATHA
|
||||
u'\u064f' # 0xef -> ARABIC DAMMA
|
||||
u'\u0650' # 0xf0 -> ARABIC KASRA
|
||||
u'\u0651' # 0xf1 -> ARABIC SHADDA
|
||||
u'\u0652' # 0xf2 -> ARABIC SUKUN
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
|
@ -391,215 +293,215 @@ def getregentry():
|
|||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x0000, # NULL
|
||||
0x0001: 0x0001, # START OF HEADING
|
||||
0x0002: 0x0002, # START OF TEXT
|
||||
0x0003: 0x0003, # END OF TEXT
|
||||
0x0004: 0x0004, # END OF TRANSMISSION
|
||||
0x0005: 0x0005, # ENQUIRY
|
||||
0x0006: 0x0006, # ACKNOWLEDGE
|
||||
0x0007: 0x0007, # BELL
|
||||
0x0008: 0x0008, # BACKSPACE
|
||||
0x0009: 0x0009, # HORIZONTAL TABULATION
|
||||
0x000a: 0x000a, # LINE FEED
|
||||
0x000b: 0x000b, # VERTICAL TABULATION
|
||||
0x000c: 0x000c, # FORM FEED
|
||||
0x000d: 0x000d, # CARRIAGE RETURN
|
||||
0x000e: 0x000e, # SHIFT OUT
|
||||
0x000f: 0x000f, # SHIFT IN
|
||||
0x0010: 0x0010, # DATA LINK ESCAPE
|
||||
0x0011: 0x0011, # DEVICE CONTROL ONE
|
||||
0x0012: 0x0012, # DEVICE CONTROL TWO
|
||||
0x0013: 0x0013, # DEVICE CONTROL THREE
|
||||
0x0014: 0x0014, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x0016, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x0017, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x0018, # CANCEL
|
||||
0x0019: 0x0019, # END OF MEDIUM
|
||||
0x001a: 0x001a, # SUBSTITUTE
|
||||
0x001b: 0x001b, # ESCAPE
|
||||
0x001c: 0x001c, # FILE SEPARATOR
|
||||
0x001d: 0x001d, # GROUP SEPARATOR
|
||||
0x001e: 0x001e, # RECORD SEPARATOR
|
||||
0x001f: 0x001f, # UNIT SEPARATOR
|
||||
0x0020: 0x0020, # SPACE
|
||||
0x0021: 0x0021, # EXCLAMATION MARK
|
||||
0x0022: 0x0022, # QUOTATION MARK
|
||||
0x0023: 0x0023, # NUMBER SIGN
|
||||
0x0024: 0x0024, # DOLLAR SIGN
|
||||
0x0025: 0x0025, # PERCENT SIGN
|
||||
0x0026: 0x0026, # AMPERSAND
|
||||
0x0027: 0x0027, # APOSTROPHE
|
||||
0x0028: 0x0028, # LEFT PARENTHESIS
|
||||
0x0029: 0x0029, # RIGHT PARENTHESIS
|
||||
0x002a: 0x002a, # ASTERISK
|
||||
0x002b: 0x002b, # PLUS SIGN
|
||||
0x002c: 0x002c, # COMMA
|
||||
0x002d: 0x002d, # HYPHEN-MINUS
|
||||
0x002e: 0x002e, # FULL STOP
|
||||
0x002f: 0x002f, # SOLIDUS
|
||||
0x0030: 0x0030, # DIGIT ZERO
|
||||
0x0031: 0x0031, # DIGIT ONE
|
||||
0x0032: 0x0032, # DIGIT TWO
|
||||
0x0033: 0x0033, # DIGIT THREE
|
||||
0x0034: 0x0034, # DIGIT FOUR
|
||||
0x0035: 0x0035, # DIGIT FIVE
|
||||
0x0036: 0x0036, # DIGIT SIX
|
||||
0x0037: 0x0037, # DIGIT SEVEN
|
||||
0x0038: 0x0038, # DIGIT EIGHT
|
||||
0x0039: 0x0039, # DIGIT NINE
|
||||
0x003a: 0x003a, # COLON
|
||||
0x003b: 0x003b, # SEMICOLON
|
||||
0x003c: 0x003c, # LESS-THAN SIGN
|
||||
0x003d: 0x003d, # EQUALS SIGN
|
||||
0x003e: 0x003e, # GREATER-THAN SIGN
|
||||
0x003f: 0x003f, # QUESTION MARK
|
||||
0x0040: 0x0040, # COMMERCIAL AT
|
||||
0x0041: 0x0041, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x0042, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x0043, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x0044, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x0045, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x0046, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x0047, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x0048, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x0049, # LATIN CAPITAL LETTER I
|
||||
0x004a: 0x004a, # LATIN CAPITAL LETTER J
|
||||
0x004b: 0x004b, # LATIN CAPITAL LETTER K
|
||||
0x004c: 0x004c, # LATIN CAPITAL LETTER L
|
||||
0x004d: 0x004d, # LATIN CAPITAL LETTER M
|
||||
0x004e: 0x004e, # LATIN CAPITAL LETTER N
|
||||
0x004f: 0x004f, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x0050, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x0051, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x0052, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x0053, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x0054, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x0055, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x0056, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x0057, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x0058, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x0059, # LATIN CAPITAL LETTER Y
|
||||
0x005a: 0x005a, # LATIN CAPITAL LETTER Z
|
||||
0x005b: 0x005b, # LEFT SQUARE BRACKET
|
||||
0x005c: 0x005c, # REVERSE SOLIDUS
|
||||
0x005d: 0x005d, # RIGHT SQUARE BRACKET
|
||||
0x005e: 0x005e, # CIRCUMFLEX ACCENT
|
||||
0x005f: 0x005f, # LOW LINE
|
||||
0x0060: 0x0060, # GRAVE ACCENT
|
||||
0x0061: 0x0061, # LATIN SMALL LETTER A
|
||||
0x0062: 0x0062, # LATIN SMALL LETTER B
|
||||
0x0063: 0x0063, # LATIN SMALL LETTER C
|
||||
0x0064: 0x0064, # LATIN SMALL LETTER D
|
||||
0x0065: 0x0065, # LATIN SMALL LETTER E
|
||||
0x0066: 0x0066, # LATIN SMALL LETTER F
|
||||
0x0067: 0x0067, # LATIN SMALL LETTER G
|
||||
0x0068: 0x0068, # LATIN SMALL LETTER H
|
||||
0x0069: 0x0069, # LATIN SMALL LETTER I
|
||||
0x006a: 0x006a, # LATIN SMALL LETTER J
|
||||
0x006b: 0x006b, # LATIN SMALL LETTER K
|
||||
0x006c: 0x006c, # LATIN SMALL LETTER L
|
||||
0x006d: 0x006d, # LATIN SMALL LETTER M
|
||||
0x006e: 0x006e, # LATIN SMALL LETTER N
|
||||
0x006f: 0x006f, # LATIN SMALL LETTER O
|
||||
0x0070: 0x0070, # LATIN SMALL LETTER P
|
||||
0x0071: 0x0071, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x0072, # LATIN SMALL LETTER R
|
||||
0x0073: 0x0073, # LATIN SMALL LETTER S
|
||||
0x0074: 0x0074, # LATIN SMALL LETTER T
|
||||
0x0075: 0x0075, # LATIN SMALL LETTER U
|
||||
0x0076: 0x0076, # LATIN SMALL LETTER V
|
||||
0x0077: 0x0077, # LATIN SMALL LETTER W
|
||||
0x0078: 0x0078, # LATIN SMALL LETTER X
|
||||
0x0079: 0x0079, # LATIN SMALL LETTER Y
|
||||
0x007a: 0x007a, # LATIN SMALL LETTER Z
|
||||
0x007b: 0x007b, # LEFT CURLY BRACKET
|
||||
0x007c: 0x007c, # VERTICAL LINE
|
||||
0x007d: 0x007d, # RIGHT CURLY BRACKET
|
||||
0x007e: 0x007e, # TILDE
|
||||
0x007f: 0x007f, # DELETE
|
||||
0x0080: 0x0080, # <control>
|
||||
0x0081: 0x0081, # <control>
|
||||
0x0082: 0x0082, # <control>
|
||||
0x0083: 0x0083, # <control>
|
||||
0x0084: 0x0084, # <control>
|
||||
0x0085: 0x0085, # <control>
|
||||
0x0086: 0x0086, # <control>
|
||||
0x0087: 0x0087, # <control>
|
||||
0x0088: 0x0088, # <control>
|
||||
0x0089: 0x0089, # <control>
|
||||
0x008a: 0x008a, # <control>
|
||||
0x008b: 0x008b, # <control>
|
||||
0x008c: 0x008c, # <control>
|
||||
0x008d: 0x008d, # <control>
|
||||
0x008e: 0x008e, # <control>
|
||||
0x008f: 0x008f, # <control>
|
||||
0x0090: 0x0090, # <control>
|
||||
0x0091: 0x0091, # <control>
|
||||
0x0092: 0x0092, # <control>
|
||||
0x0093: 0x0093, # <control>
|
||||
0x0094: 0x0094, # <control>
|
||||
0x0095: 0x0095, # <control>
|
||||
0x0096: 0x0096, # <control>
|
||||
0x0097: 0x0097, # <control>
|
||||
0x0098: 0x0098, # <control>
|
||||
0x0099: 0x0099, # <control>
|
||||
0x009a: 0x009a, # <control>
|
||||
0x009b: 0x009b, # <control>
|
||||
0x009c: 0x009c, # <control>
|
||||
0x009d: 0x009d, # <control>
|
||||
0x009e: 0x009e, # <control>
|
||||
0x009f: 0x009f, # <control>
|
||||
0x00a0: 0x00a0, # NO-BREAK SPACE
|
||||
0x00a4: 0x00a4, # CURRENCY SIGN
|
||||
0x00ad: 0x00ad, # SOFT HYPHEN
|
||||
0x060c: 0x00ac, # ARABIC COMMA
|
||||
0x061b: 0x00bb, # ARABIC SEMICOLON
|
||||
0x061f: 0x00bf, # ARABIC QUESTION MARK
|
||||
0x0621: 0x00c1, # ARABIC LETTER HAMZA
|
||||
0x0622: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE
|
||||
0x0623: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE
|
||||
0x0624: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE
|
||||
0x0625: 0x00c5, # ARABIC LETTER ALEF WITH HAMZA BELOW
|
||||
0x0626: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE
|
||||
0x0627: 0x00c7, # ARABIC LETTER ALEF
|
||||
0x0628: 0x00c8, # ARABIC LETTER BEH
|
||||
0x0629: 0x00c9, # ARABIC LETTER TEH MARBUTA
|
||||
0x062a: 0x00ca, # ARABIC LETTER TEH
|
||||
0x062b: 0x00cb, # ARABIC LETTER THEH
|
||||
0x062c: 0x00cc, # ARABIC LETTER JEEM
|
||||
0x062d: 0x00cd, # ARABIC LETTER HAH
|
||||
0x062e: 0x00ce, # ARABIC LETTER KHAH
|
||||
0x062f: 0x00cf, # ARABIC LETTER DAL
|
||||
0x0630: 0x00d0, # ARABIC LETTER THAL
|
||||
0x0631: 0x00d1, # ARABIC LETTER REH
|
||||
0x0632: 0x00d2, # ARABIC LETTER ZAIN
|
||||
0x0633: 0x00d3, # ARABIC LETTER SEEN
|
||||
0x0634: 0x00d4, # ARABIC LETTER SHEEN
|
||||
0x0635: 0x00d5, # ARABIC LETTER SAD
|
||||
0x0636: 0x00d6, # ARABIC LETTER DAD
|
||||
0x0637: 0x00d7, # ARABIC LETTER TAH
|
||||
0x0638: 0x00d8, # ARABIC LETTER ZAH
|
||||
0x0639: 0x00d9, # ARABIC LETTER AIN
|
||||
0x063a: 0x00da, # ARABIC LETTER GHAIN
|
||||
0x0640: 0x00e0, # ARABIC TATWEEL
|
||||
0x0641: 0x00e1, # ARABIC LETTER FEH
|
||||
0x0642: 0x00e2, # ARABIC LETTER QAF
|
||||
0x0643: 0x00e3, # ARABIC LETTER KAF
|
||||
0x0644: 0x00e4, # ARABIC LETTER LAM
|
||||
0x0645: 0x00e5, # ARABIC LETTER MEEM
|
||||
0x0646: 0x00e6, # ARABIC LETTER NOON
|
||||
0x0647: 0x00e7, # ARABIC LETTER HEH
|
||||
0x0648: 0x00e8, # ARABIC LETTER WAW
|
||||
0x0649: 0x00e9, # ARABIC LETTER ALEF MAKSURA
|
||||
0x064a: 0x00ea, # ARABIC LETTER YEH
|
||||
0x064b: 0x00eb, # ARABIC FATHATAN
|
||||
0x064c: 0x00ec, # ARABIC DAMMATAN
|
||||
0x064d: 0x00ed, # ARABIC KASRATAN
|
||||
0x064e: 0x00ee, # ARABIC FATHA
|
||||
0x064f: 0x00ef, # ARABIC DAMMA
|
||||
0x0650: 0x00f0, # ARABIC KASRA
|
||||
0x0651: 0x00f1, # ARABIC SHADDA
|
||||
0x0652: 0x00f2, # ARABIC SUKUN
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000a: 0x0a, # LINE FEED
|
||||
0x000b: 0x0b, # VERTICAL TABULATION
|
||||
0x000c: 0x0c, # FORM FEED
|
||||
0x000d: 0x0d, # CARRIAGE RETURN
|
||||
0x000e: 0x0e, # SHIFT OUT
|
||||
0x000f: 0x0f, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001a: 0x1a, # SUBSTITUTE
|
||||
0x001b: 0x1b, # ESCAPE
|
||||
0x001c: 0x1c, # FILE SEPARATOR
|
||||
0x001d: 0x1d, # GROUP SEPARATOR
|
||||
0x001e: 0x1e, # RECORD SEPARATOR
|
||||
0x001f: 0x1f, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002a: 0x2a, # ASTERISK
|
||||
0x002b: 0x2b, # PLUS SIGN
|
||||
0x002c: 0x2c, # COMMA
|
||||
0x002d: 0x2d, # HYPHEN-MINUS
|
||||
0x002e: 0x2e, # FULL STOP
|
||||
0x002f: 0x2f, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003a: 0x3a, # COLON
|
||||
0x003b: 0x3b, # SEMICOLON
|
||||
0x003c: 0x3c, # LESS-THAN SIGN
|
||||
0x003d: 0x3d, # EQUALS SIGN
|
||||
0x003e: 0x3e, # GREATER-THAN SIGN
|
||||
0x003f: 0x3f, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004a: 0x4a, # LATIN CAPITAL LETTER J
|
||||
0x004b: 0x4b, # LATIN CAPITAL LETTER K
|
||||
0x004c: 0x4c, # LATIN CAPITAL LETTER L
|
||||
0x004d: 0x4d, # LATIN CAPITAL LETTER M
|
||||
0x004e: 0x4e, # LATIN CAPITAL LETTER N
|
||||
0x004f: 0x4f, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005a: 0x5a, # LATIN CAPITAL LETTER Z
|
||||
0x005b: 0x5b, # LEFT SQUARE BRACKET
|
||||
0x005c: 0x5c, # REVERSE SOLIDUS
|
||||
0x005d: 0x5d, # RIGHT SQUARE BRACKET
|
||||
0x005e: 0x5e, # CIRCUMFLEX ACCENT
|
||||
0x005f: 0x5f, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006a: 0x6a, # LATIN SMALL LETTER J
|
||||
0x006b: 0x6b, # LATIN SMALL LETTER K
|
||||
0x006c: 0x6c, # LATIN SMALL LETTER L
|
||||
0x006d: 0x6d, # LATIN SMALL LETTER M
|
||||
0x006e: 0x6e, # LATIN SMALL LETTER N
|
||||
0x006f: 0x6f, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007a: 0x7a, # LATIN SMALL LETTER Z
|
||||
0x007b: 0x7b, # LEFT CURLY BRACKET
|
||||
0x007c: 0x7c, # VERTICAL LINE
|
||||
0x007d: 0x7d, # RIGHT CURLY BRACKET
|
||||
0x007e: 0x7e, # TILDE
|
||||
0x007f: 0x7f, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008a: 0x8a, # <control>
|
||||
0x008b: 0x8b, # <control>
|
||||
0x008c: 0x8c, # <control>
|
||||
0x008d: 0x8d, # <control>
|
||||
0x008e: 0x8e, # <control>
|
||||
0x008f: 0x8f, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009a: 0x9a, # <control>
|
||||
0x009b: 0x9b, # <control>
|
||||
0x009c: 0x9c, # <control>
|
||||
0x009d: 0x9d, # <control>
|
||||
0x009e: 0x9e, # <control>
|
||||
0x009f: 0x9f, # <control>
|
||||
0x00a0: 0xa0, # NO-BREAK SPACE
|
||||
0x00a4: 0xa4, # CURRENCY SIGN
|
||||
0x00ad: 0xad, # SOFT HYPHEN
|
||||
0x060c: 0xac, # ARABIC COMMA
|
||||
0x061b: 0xbb, # ARABIC SEMICOLON
|
||||
0x061f: 0xbf, # ARABIC QUESTION MARK
|
||||
0x0621: 0xc1, # ARABIC LETTER HAMZA
|
||||
0x0622: 0xc2, # ARABIC LETTER ALEF WITH MADDA ABOVE
|
||||
0x0623: 0xc3, # ARABIC LETTER ALEF WITH HAMZA ABOVE
|
||||
0x0624: 0xc4, # ARABIC LETTER WAW WITH HAMZA ABOVE
|
||||
0x0625: 0xc5, # ARABIC LETTER ALEF WITH HAMZA BELOW
|
||||
0x0626: 0xc6, # ARABIC LETTER YEH WITH HAMZA ABOVE
|
||||
0x0627: 0xc7, # ARABIC LETTER ALEF
|
||||
0x0628: 0xc8, # ARABIC LETTER BEH
|
||||
0x0629: 0xc9, # ARABIC LETTER TEH MARBUTA
|
||||
0x062a: 0xca, # ARABIC LETTER TEH
|
||||
0x062b: 0xcb, # ARABIC LETTER THEH
|
||||
0x062c: 0xcc, # ARABIC LETTER JEEM
|
||||
0x062d: 0xcd, # ARABIC LETTER HAH
|
||||
0x062e: 0xce, # ARABIC LETTER KHAH
|
||||
0x062f: 0xcf, # ARABIC LETTER DAL
|
||||
0x0630: 0xd0, # ARABIC LETTER THAL
|
||||
0x0631: 0xd1, # ARABIC LETTER REH
|
||||
0x0632: 0xd2, # ARABIC LETTER ZAIN
|
||||
0x0633: 0xd3, # ARABIC LETTER SEEN
|
||||
0x0634: 0xd4, # ARABIC LETTER SHEEN
|
||||
0x0635: 0xd5, # ARABIC LETTER SAD
|
||||
0x0636: 0xd6, # ARABIC LETTER DAD
|
||||
0x0637: 0xd7, # ARABIC LETTER TAH
|
||||
0x0638: 0xd8, # ARABIC LETTER ZAH
|
||||
0x0639: 0xd9, # ARABIC LETTER AIN
|
||||
0x063a: 0xda, # ARABIC LETTER GHAIN
|
||||
0x0640: 0xe0, # ARABIC TATWEEL
|
||||
0x0641: 0xe1, # ARABIC LETTER FEH
|
||||
0x0642: 0xe2, # ARABIC LETTER QAF
|
||||
0x0643: 0xe3, # ARABIC LETTER KAF
|
||||
0x0644: 0xe4, # ARABIC LETTER LAM
|
||||
0x0645: 0xe5, # ARABIC LETTER MEEM
|
||||
0x0646: 0xe6, # ARABIC LETTER NOON
|
||||
0x0647: 0xe7, # ARABIC LETTER HEH
|
||||
0x0648: 0xe8, # ARABIC LETTER WAW
|
||||
0x0649: 0xe9, # ARABIC LETTER ALEF MAKSURA
|
||||
0x064a: 0xea, # ARABIC LETTER YEH
|
||||
0x064b: 0xeb, # ARABIC FATHATAN
|
||||
0x064c: 0xec, # ARABIC DAMMATAN
|
||||
0x064d: 0xed, # ARABIC KASRATAN
|
||||
0x064e: 0xee, # ARABIC FATHA
|
||||
0x064f: 0xef, # ARABIC DAMMA
|
||||
0x0650: 0xf0, # ARABIC KASRA
|
||||
0x0651: 0xf1, # ARABIC SHADDA
|
||||
0x0652: 0xf2, # ARABIC SUKUN
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
|||
""" Python Character Mapping Codec generated from 'ISO8859/8859-8.TXT' with gencodec.py.
|
||||
""" Python Character Mapping Codec generated from 'MAPPINGS/ISO8859/8859-8.TXT' with gencodec.py.
|
||||
|
||||
"""#"
|
||||
|
||||
|
@ -28,274 +28,201 @@ def getregentry():
|
|||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = codecs.make_identity_dict(range(256))
|
||||
decoding_map.update({
|
||||
0x00a1: None,
|
||||
0x00aa: 0x00d7, # MULTIPLICATION SIGN
|
||||
0x00ba: 0x00f7, # DIVISION SIGN
|
||||
0x00bf: None,
|
||||
0x00c0: None,
|
||||
0x00c1: None,
|
||||
0x00c2: None,
|
||||
0x00c3: None,
|
||||
0x00c4: None,
|
||||
0x00c5: None,
|
||||
0x00c6: None,
|
||||
0x00c7: None,
|
||||
0x00c8: None,
|
||||
0x00c9: None,
|
||||
0x00ca: None,
|
||||
0x00cb: None,
|
||||
0x00cc: None,
|
||||
0x00cd: None,
|
||||
0x00ce: None,
|
||||
0x00cf: None,
|
||||
0x00d0: None,
|
||||
0x00d1: None,
|
||||
0x00d2: None,
|
||||
0x00d3: None,
|
||||
0x00d4: None,
|
||||
0x00d5: None,
|
||||
0x00d6: None,
|
||||
0x00d7: None,
|
||||
0x00d8: None,
|
||||
0x00d9: None,
|
||||
0x00da: None,
|
||||
0x00db: None,
|
||||
0x00dc: None,
|
||||
0x00dd: None,
|
||||
0x00de: None,
|
||||
0x00df: 0x2017, # DOUBLE LOW LINE
|
||||
0x00e0: 0x05d0, # HEBREW LETTER ALEF
|
||||
0x00e1: 0x05d1, # HEBREW LETTER BET
|
||||
0x00e2: 0x05d2, # HEBREW LETTER GIMEL
|
||||
0x00e3: 0x05d3, # HEBREW LETTER DALET
|
||||
0x00e4: 0x05d4, # HEBREW LETTER HE
|
||||
0x00e5: 0x05d5, # HEBREW LETTER VAV
|
||||
0x00e6: 0x05d6, # HEBREW LETTER ZAYIN
|
||||
0x00e7: 0x05d7, # HEBREW LETTER HET
|
||||
0x00e8: 0x05d8, # HEBREW LETTER TET
|
||||
0x00e9: 0x05d9, # HEBREW LETTER YOD
|
||||
0x00ea: 0x05da, # HEBREW LETTER FINAL KAF
|
||||
0x00eb: 0x05db, # HEBREW LETTER KAF
|
||||
0x00ec: 0x05dc, # HEBREW LETTER LAMED
|
||||
0x00ed: 0x05dd, # HEBREW LETTER FINAL MEM
|
||||
0x00ee: 0x05de, # HEBREW LETTER MEM
|
||||
0x00ef: 0x05df, # HEBREW LETTER FINAL NUN
|
||||
0x00f0: 0x05e0, # HEBREW LETTER NUN
|
||||
0x00f1: 0x05e1, # HEBREW LETTER SAMEKH
|
||||
0x00f2: 0x05e2, # HEBREW LETTER AYIN
|
||||
0x00f3: 0x05e3, # HEBREW LETTER FINAL PE
|
||||
0x00f4: 0x05e4, # HEBREW LETTER PE
|
||||
0x00f5: 0x05e5, # HEBREW LETTER FINAL TSADI
|
||||
0x00f6: 0x05e6, # HEBREW LETTER TSADI
|
||||
0x00f7: 0x05e7, # HEBREW LETTER QOF
|
||||
0x00f8: 0x05e8, # HEBREW LETTER RESH
|
||||
0x00f9: 0x05e9, # HEBREW LETTER SHIN
|
||||
0x00fa: 0x05ea, # HEBREW LETTER TAV
|
||||
0x00fb: None,
|
||||
0x00fc: None,
|
||||
0x00fd: 0x200e, # LEFT-TO-RIGHT MARK
|
||||
0x00fe: 0x200f, # RIGHT-TO-LEFT MARK
|
||||
0x00ff: None,
|
||||
})
|
||||
|
||||
### Decoding Table
|
||||
|
||||
decoding_table = (
|
||||
u'\x00' # 0x0000 -> NULL
|
||||
u'\x01' # 0x0001 -> START OF HEADING
|
||||
u'\x02' # 0x0002 -> START OF TEXT
|
||||
u'\x03' # 0x0003 -> END OF TEXT
|
||||
u'\x04' # 0x0004 -> END OF TRANSMISSION
|
||||
u'\x05' # 0x0005 -> ENQUIRY
|
||||
u'\x06' # 0x0006 -> ACKNOWLEDGE
|
||||
u'\x07' # 0x0007 -> BELL
|
||||
u'\x08' # 0x0008 -> BACKSPACE
|
||||
u'\t' # 0x0009 -> HORIZONTAL TABULATION
|
||||
u'\n' # 0x000a -> LINE FEED
|
||||
u'\x0b' # 0x000b -> VERTICAL TABULATION
|
||||
u'\x0c' # 0x000c -> FORM FEED
|
||||
u'\r' # 0x000d -> CARRIAGE RETURN
|
||||
u'\x0e' # 0x000e -> SHIFT OUT
|
||||
u'\x0f' # 0x000f -> SHIFT IN
|
||||
u'\x10' # 0x0010 -> DATA LINK ESCAPE
|
||||
u'\x11' # 0x0011 -> DEVICE CONTROL ONE
|
||||
u'\x12' # 0x0012 -> DEVICE CONTROL TWO
|
||||
u'\x13' # 0x0013 -> DEVICE CONTROL THREE
|
||||
u'\x14' # 0x0014 -> DEVICE CONTROL FOUR
|
||||
u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE
|
||||
u'\x16' # 0x0016 -> SYNCHRONOUS IDLE
|
||||
u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK
|
||||
u'\x18' # 0x0018 -> CANCEL
|
||||
u'\x19' # 0x0019 -> END OF MEDIUM
|
||||
u'\x1a' # 0x001a -> SUBSTITUTE
|
||||
u'\x1b' # 0x001b -> ESCAPE
|
||||
u'\x1c' # 0x001c -> FILE SEPARATOR
|
||||
u'\x1d' # 0x001d -> GROUP SEPARATOR
|
||||
u'\x1e' # 0x001e -> RECORD SEPARATOR
|
||||
u'\x1f' # 0x001f -> UNIT SEPARATOR
|
||||
u' ' # 0x0020 -> SPACE
|
||||
u'!' # 0x0021 -> EXCLAMATION MARK
|
||||
u'"' # 0x0022 -> QUOTATION MARK
|
||||
u'#' # 0x0023 -> NUMBER SIGN
|
||||
u'$' # 0x0024 -> DOLLAR SIGN
|
||||
u'%' # 0x0025 -> PERCENT SIGN
|
||||
u'&' # 0x0026 -> AMPERSAND
|
||||
u"'" # 0x0027 -> APOSTROPHE
|
||||
u'(' # 0x0028 -> LEFT PARENTHESIS
|
||||
u')' # 0x0029 -> RIGHT PARENTHESIS
|
||||
u'*' # 0x002a -> ASTERISK
|
||||
u'+' # 0x002b -> PLUS SIGN
|
||||
u',' # 0x002c -> COMMA
|
||||
u'-' # 0x002d -> HYPHEN-MINUS
|
||||
u'.' # 0x002e -> FULL STOP
|
||||
u'/' # 0x002f -> SOLIDUS
|
||||
u'0' # 0x0030 -> DIGIT ZERO
|
||||
u'1' # 0x0031 -> DIGIT ONE
|
||||
u'2' # 0x0032 -> DIGIT TWO
|
||||
u'3' # 0x0033 -> DIGIT THREE
|
||||
u'4' # 0x0034 -> DIGIT FOUR
|
||||
u'5' # 0x0035 -> DIGIT FIVE
|
||||
u'6' # 0x0036 -> DIGIT SIX
|
||||
u'7' # 0x0037 -> DIGIT SEVEN
|
||||
u'8' # 0x0038 -> DIGIT EIGHT
|
||||
u'9' # 0x0039 -> DIGIT NINE
|
||||
u':' # 0x003a -> COLON
|
||||
u';' # 0x003b -> SEMICOLON
|
||||
u'<' # 0x003c -> LESS-THAN SIGN
|
||||
u'=' # 0x003d -> EQUALS SIGN
|
||||
u'>' # 0x003e -> GREATER-THAN SIGN
|
||||
u'?' # 0x003f -> QUESTION MARK
|
||||
u'@' # 0x0040 -> COMMERCIAL AT
|
||||
u'A' # 0x0041 -> LATIN CAPITAL LETTER A
|
||||
u'B' # 0x0042 -> LATIN CAPITAL LETTER B
|
||||
u'C' # 0x0043 -> LATIN CAPITAL LETTER C
|
||||
u'D' # 0x0044 -> LATIN CAPITAL LETTER D
|
||||
u'E' # 0x0045 -> LATIN CAPITAL LETTER E
|
||||
u'F' # 0x0046 -> LATIN CAPITAL LETTER F
|
||||
u'G' # 0x0047 -> LATIN CAPITAL LETTER G
|
||||
u'H' # 0x0048 -> LATIN CAPITAL LETTER H
|
||||
u'I' # 0x0049 -> LATIN CAPITAL LETTER I
|
||||
u'J' # 0x004a -> LATIN CAPITAL LETTER J
|
||||
u'K' # 0x004b -> LATIN CAPITAL LETTER K
|
||||
u'L' # 0x004c -> LATIN CAPITAL LETTER L
|
||||
u'M' # 0x004d -> LATIN CAPITAL LETTER M
|
||||
u'N' # 0x004e -> LATIN CAPITAL LETTER N
|
||||
u'O' # 0x004f -> LATIN CAPITAL LETTER O
|
||||
u'P' # 0x0050 -> LATIN CAPITAL LETTER P
|
||||
u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q
|
||||
u'R' # 0x0052 -> LATIN CAPITAL LETTER R
|
||||
u'S' # 0x0053 -> LATIN CAPITAL LETTER S
|
||||
u'T' # 0x0054 -> LATIN CAPITAL LETTER T
|
||||
u'U' # 0x0055 -> LATIN CAPITAL LETTER U
|
||||
u'V' # 0x0056 -> LATIN CAPITAL LETTER V
|
||||
u'W' # 0x0057 -> LATIN CAPITAL LETTER W
|
||||
u'X' # 0x0058 -> LATIN CAPITAL LETTER X
|
||||
u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y
|
||||
u'Z' # 0x005a -> LATIN CAPITAL LETTER Z
|
||||
u'[' # 0x005b -> LEFT SQUARE BRACKET
|
||||
u'\\' # 0x005c -> REVERSE SOLIDUS
|
||||
u']' # 0x005d -> RIGHT SQUARE BRACKET
|
||||
u'^' # 0x005e -> CIRCUMFLEX ACCENT
|
||||
u'_' # 0x005f -> LOW LINE
|
||||
u'`' # 0x0060 -> GRAVE ACCENT
|
||||
u'a' # 0x0061 -> LATIN SMALL LETTER A
|
||||
u'b' # 0x0062 -> LATIN SMALL LETTER B
|
||||
u'c' # 0x0063 -> LATIN SMALL LETTER C
|
||||
u'd' # 0x0064 -> LATIN SMALL LETTER D
|
||||
u'e' # 0x0065 -> LATIN SMALL LETTER E
|
||||
u'f' # 0x0066 -> LATIN SMALL LETTER F
|
||||
u'g' # 0x0067 -> LATIN SMALL LETTER G
|
||||
u'h' # 0x0068 -> LATIN SMALL LETTER H
|
||||
u'i' # 0x0069 -> LATIN SMALL LETTER I
|
||||
u'j' # 0x006a -> LATIN SMALL LETTER J
|
||||
u'k' # 0x006b -> LATIN SMALL LETTER K
|
||||
u'l' # 0x006c -> LATIN SMALL LETTER L
|
||||
u'm' # 0x006d -> LATIN SMALL LETTER M
|
||||
u'n' # 0x006e -> LATIN SMALL LETTER N
|
||||
u'o' # 0x006f -> LATIN SMALL LETTER O
|
||||
u'p' # 0x0070 -> LATIN SMALL LETTER P
|
||||
u'q' # 0x0071 -> LATIN SMALL LETTER Q
|
||||
u'r' # 0x0072 -> LATIN SMALL LETTER R
|
||||
u's' # 0x0073 -> LATIN SMALL LETTER S
|
||||
u't' # 0x0074 -> LATIN SMALL LETTER T
|
||||
u'u' # 0x0075 -> LATIN SMALL LETTER U
|
||||
u'v' # 0x0076 -> LATIN SMALL LETTER V
|
||||
u'w' # 0x0077 -> LATIN SMALL LETTER W
|
||||
u'x' # 0x0078 -> LATIN SMALL LETTER X
|
||||
u'y' # 0x0079 -> LATIN SMALL LETTER Y
|
||||
u'z' # 0x007a -> LATIN SMALL LETTER Z
|
||||
u'{' # 0x007b -> LEFT CURLY BRACKET
|
||||
u'|' # 0x007c -> VERTICAL LINE
|
||||
u'}' # 0x007d -> RIGHT CURLY BRACKET
|
||||
u'~' # 0x007e -> TILDE
|
||||
u'\x7f' # 0x007f -> DELETE
|
||||
u'\x80' # 0x0080 -> <control>
|
||||
u'\x81' # 0x0081 -> <control>
|
||||
u'\x82' # 0x0082 -> <control>
|
||||
u'\x83' # 0x0083 -> <control>
|
||||
u'\x84' # 0x0084 -> <control>
|
||||
u'\x85' # 0x0085 -> <control>
|
||||
u'\x86' # 0x0086 -> <control>
|
||||
u'\x87' # 0x0087 -> <control>
|
||||
u'\x88' # 0x0088 -> <control>
|
||||
u'\x89' # 0x0089 -> <control>
|
||||
u'\x8a' # 0x008a -> <control>
|
||||
u'\x8b' # 0x008b -> <control>
|
||||
u'\x8c' # 0x008c -> <control>
|
||||
u'\x8d' # 0x008d -> <control>
|
||||
u'\x8e' # 0x008e -> <control>
|
||||
u'\x8f' # 0x008f -> <control>
|
||||
u'\x90' # 0x0090 -> <control>
|
||||
u'\x91' # 0x0091 -> <control>
|
||||
u'\x92' # 0x0092 -> <control>
|
||||
u'\x93' # 0x0093 -> <control>
|
||||
u'\x94' # 0x0094 -> <control>
|
||||
u'\x95' # 0x0095 -> <control>
|
||||
u'\x96' # 0x0096 -> <control>
|
||||
u'\x97' # 0x0097 -> <control>
|
||||
u'\x98' # 0x0098 -> <control>
|
||||
u'\x99' # 0x0099 -> <control>
|
||||
u'\x9a' # 0x009a -> <control>
|
||||
u'\x9b' # 0x009b -> <control>
|
||||
u'\x9c' # 0x009c -> <control>
|
||||
u'\x9d' # 0x009d -> <control>
|
||||
u'\x9e' # 0x009e -> <control>
|
||||
u'\x9f' # 0x009f -> <control>
|
||||
u'\xa0' # 0x00a0 -> NO-BREAK SPACE
|
||||
u'\x00' # 0x00 -> NULL
|
||||
u'\x01' # 0x01 -> START OF HEADING
|
||||
u'\x02' # 0x02 -> START OF TEXT
|
||||
u'\x03' # 0x03 -> END OF TEXT
|
||||
u'\x04' # 0x04 -> END OF TRANSMISSION
|
||||
u'\x05' # 0x05 -> ENQUIRY
|
||||
u'\x06' # 0x06 -> ACKNOWLEDGE
|
||||
u'\x07' # 0x07 -> BELL
|
||||
u'\x08' # 0x08 -> BACKSPACE
|
||||
u'\t' # 0x09 -> HORIZONTAL TABULATION
|
||||
u'\n' # 0x0a -> LINE FEED
|
||||
u'\x0b' # 0x0b -> VERTICAL TABULATION
|
||||
u'\x0c' # 0x0c -> FORM FEED
|
||||
u'\r' # 0x0d -> CARRIAGE RETURN
|
||||
u'\x0e' # 0x0e -> SHIFT OUT
|
||||
u'\x0f' # 0x0f -> SHIFT IN
|
||||
u'\x10' # 0x10 -> DATA LINK ESCAPE
|
||||
u'\x11' # 0x11 -> DEVICE CONTROL ONE
|
||||
u'\x12' # 0x12 -> DEVICE CONTROL TWO
|
||||
u'\x13' # 0x13 -> DEVICE CONTROL THREE
|
||||
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
|
||||
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
|
||||
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
|
||||
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
|
||||
u'\x18' # 0x18 -> CANCEL
|
||||
u'\x19' # 0x19 -> END OF MEDIUM
|
||||
u'\x1a' # 0x1a -> SUBSTITUTE
|
||||
u'\x1b' # 0x1b -> ESCAPE
|
||||
u'\x1c' # 0x1c -> FILE SEPARATOR
|
||||
u'\x1d' # 0x1d -> GROUP SEPARATOR
|
||||
u'\x1e' # 0x1e -> RECORD SEPARATOR
|
||||
u'\x1f' # 0x1f -> UNIT SEPARATOR
|
||||
u' ' # 0x20 -> SPACE
|
||||
u'!' # 0x21 -> EXCLAMATION MARK
|
||||
u'"' # 0x22 -> QUOTATION MARK
|
||||
u'#' # 0x23 -> NUMBER SIGN
|
||||
u'$' # 0x24 -> DOLLAR SIGN
|
||||
u'%' # 0x25 -> PERCENT SIGN
|
||||
u'&' # 0x26 -> AMPERSAND
|
||||
u"'" # 0x27 -> APOSTROPHE
|
||||
u'(' # 0x28 -> LEFT PARENTHESIS
|
||||
u')' # 0x29 -> RIGHT PARENTHESIS
|
||||
u'*' # 0x2a -> ASTERISK
|
||||
u'+' # 0x2b -> PLUS SIGN
|
||||
u',' # 0x2c -> COMMA
|
||||
u'-' # 0x2d -> HYPHEN-MINUS
|
||||
u'.' # 0x2e -> FULL STOP
|
||||
u'/' # 0x2f -> SOLIDUS
|
||||
u'0' # 0x30 -> DIGIT ZERO
|
||||
u'1' # 0x31 -> DIGIT ONE
|
||||
u'2' # 0x32 -> DIGIT TWO
|
||||
u'3' # 0x33 -> DIGIT THREE
|
||||
u'4' # 0x34 -> DIGIT FOUR
|
||||
u'5' # 0x35 -> DIGIT FIVE
|
||||
u'6' # 0x36 -> DIGIT SIX
|
||||
u'7' # 0x37 -> DIGIT SEVEN
|
||||
u'8' # 0x38 -> DIGIT EIGHT
|
||||
u'9' # 0x39 -> DIGIT NINE
|
||||
u':' # 0x3a -> COLON
|
||||
u';' # 0x3b -> SEMICOLON
|
||||
u'<' # 0x3c -> LESS-THAN SIGN
|
||||
u'=' # 0x3d -> EQUALS SIGN
|
||||
u'>' # 0x3e -> GREATER-THAN SIGN
|
||||
u'?' # 0x3f -> QUESTION MARK
|
||||
u'@' # 0x40 -> COMMERCIAL AT
|
||||
u'A' # 0x41 -> LATIN CAPITAL LETTER A
|
||||
u'B' # 0x42 -> LATIN CAPITAL LETTER B
|
||||
u'C' # 0x43 -> LATIN CAPITAL LETTER C
|
||||
u'D' # 0x44 -> LATIN CAPITAL LETTER D
|
||||
u'E' # 0x45 -> LATIN CAPITAL LETTER E
|
||||
u'F' # 0x46 -> LATIN CAPITAL LETTER F
|
||||
u'G' # 0x47 -> LATIN CAPITAL LETTER G
|
||||
u'H' # 0x48 -> LATIN CAPITAL LETTER H
|
||||
u'I' # 0x49 -> LATIN CAPITAL LETTER I
|
||||
u'J' # 0x4a -> LATIN CAPITAL LETTER J
|
||||
u'K' # 0x4b -> LATIN CAPITAL LETTER K
|
||||
u'L' # 0x4c -> LATIN CAPITAL LETTER L
|
||||
u'M' # 0x4d -> LATIN CAPITAL LETTER M
|
||||
u'N' # 0x4e -> LATIN CAPITAL LETTER N
|
||||
u'O' # 0x4f -> LATIN CAPITAL LETTER O
|
||||
u'P' # 0x50 -> LATIN CAPITAL LETTER P
|
||||
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
|
||||
u'R' # 0x52 -> LATIN CAPITAL LETTER R
|
||||
u'S' # 0x53 -> LATIN CAPITAL LETTER S
|
||||
u'T' # 0x54 -> LATIN CAPITAL LETTER T
|
||||
u'U' # 0x55 -> LATIN CAPITAL LETTER U
|
||||
u'V' # 0x56 -> LATIN CAPITAL LETTER V
|
||||
u'W' # 0x57 -> LATIN CAPITAL LETTER W
|
||||
u'X' # 0x58 -> LATIN CAPITAL LETTER X
|
||||
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
|
||||
u'Z' # 0x5a -> LATIN CAPITAL LETTER Z
|
||||
u'[' # 0x5b -> LEFT SQUARE BRACKET
|
||||
u'\\' # 0x5c -> REVERSE SOLIDUS
|
||||
u']' # 0x5d -> RIGHT SQUARE BRACKET
|
||||
u'^' # 0x5e -> CIRCUMFLEX ACCENT
|
||||
u'_' # 0x5f -> LOW LINE
|
||||
u'`' # 0x60 -> GRAVE ACCENT
|
||||
u'a' # 0x61 -> LATIN SMALL LETTER A
|
||||
u'b' # 0x62 -> LATIN SMALL LETTER B
|
||||
u'c' # 0x63 -> LATIN SMALL LETTER C
|
||||
u'd' # 0x64 -> LATIN SMALL LETTER D
|
||||
u'e' # 0x65 -> LATIN SMALL LETTER E
|
||||
u'f' # 0x66 -> LATIN SMALL LETTER F
|
||||
u'g' # 0x67 -> LATIN SMALL LETTER G
|
||||
u'h' # 0x68 -> LATIN SMALL LETTER H
|
||||
u'i' # 0x69 -> LATIN SMALL LETTER I
|
||||
u'j' # 0x6a -> LATIN SMALL LETTER J
|
||||
u'k' # 0x6b -> LATIN SMALL LETTER K
|
||||
u'l' # 0x6c -> LATIN SMALL LETTER L
|
||||
u'm' # 0x6d -> LATIN SMALL LETTER M
|
||||
u'n' # 0x6e -> LATIN SMALL LETTER N
|
||||
u'o' # 0x6f -> LATIN SMALL LETTER O
|
||||
u'p' # 0x70 -> LATIN SMALL LETTER P
|
||||
u'q' # 0x71 -> LATIN SMALL LETTER Q
|
||||
u'r' # 0x72 -> LATIN SMALL LETTER R
|
||||
u's' # 0x73 -> LATIN SMALL LETTER S
|
||||
u't' # 0x74 -> LATIN SMALL LETTER T
|
||||
u'u' # 0x75 -> LATIN SMALL LETTER U
|
||||
u'v' # 0x76 -> LATIN SMALL LETTER V
|
||||
u'w' # 0x77 -> LATIN SMALL LETTER W
|
||||
u'x' # 0x78 -> LATIN SMALL LETTER X
|
||||
u'y' # 0x79 -> LATIN SMALL LETTER Y
|
||||
u'z' # 0x7a -> LATIN SMALL LETTER Z
|
||||
u'{' # 0x7b -> LEFT CURLY BRACKET
|
||||
u'|' # 0x7c -> VERTICAL LINE
|
||||
u'}' # 0x7d -> RIGHT CURLY BRACKET
|
||||
u'~' # 0x7e -> TILDE
|
||||
u'\x7f' # 0x7f -> DELETE
|
||||
u'\x80' # 0x80 -> <control>
|
||||
u'\x81' # 0x81 -> <control>
|
||||
u'\x82' # 0x82 -> <control>
|
||||
u'\x83' # 0x83 -> <control>
|
||||
u'\x84' # 0x84 -> <control>
|
||||
u'\x85' # 0x85 -> <control>
|
||||
u'\x86' # 0x86 -> <control>
|
||||
u'\x87' # 0x87 -> <control>
|
||||
u'\x88' # 0x88 -> <control>
|
||||
u'\x89' # 0x89 -> <control>
|
||||
u'\x8a' # 0x8a -> <control>
|
||||
u'\x8b' # 0x8b -> <control>
|
||||
u'\x8c' # 0x8c -> <control>
|
||||
u'\x8d' # 0x8d -> <control>
|
||||
u'\x8e' # 0x8e -> <control>
|
||||
u'\x8f' # 0x8f -> <control>
|
||||
u'\x90' # 0x90 -> <control>
|
||||
u'\x91' # 0x91 -> <control>
|
||||
u'\x92' # 0x92 -> <control>
|
||||
u'\x93' # 0x93 -> <control>
|
||||
u'\x94' # 0x94 -> <control>
|
||||
u'\x95' # 0x95 -> <control>
|
||||
u'\x96' # 0x96 -> <control>
|
||||
u'\x97' # 0x97 -> <control>
|
||||
u'\x98' # 0x98 -> <control>
|
||||
u'\x99' # 0x99 -> <control>
|
||||
u'\x9a' # 0x9a -> <control>
|
||||
u'\x9b' # 0x9b -> <control>
|
||||
u'\x9c' # 0x9c -> <control>
|
||||
u'\x9d' # 0x9d -> <control>
|
||||
u'\x9e' # 0x9e -> <control>
|
||||
u'\x9f' # 0x9f -> <control>
|
||||
u'\xa0' # 0xa0 -> NO-BREAK SPACE
|
||||
u'\ufffe'
|
||||
u'\xa2' # 0x00a2 -> CENT SIGN
|
||||
u'\xa3' # 0x00a3 -> POUND SIGN
|
||||
u'\xa4' # 0x00a4 -> CURRENCY SIGN
|
||||
u'\xa5' # 0x00a5 -> YEN SIGN
|
||||
u'\xa6' # 0x00a6 -> BROKEN BAR
|
||||
u'\xa7' # 0x00a7 -> SECTION SIGN
|
||||
u'\xa8' # 0x00a8 -> DIAERESIS
|
||||
u'\xa9' # 0x00a9 -> COPYRIGHT SIGN
|
||||
u'\xd7' # 0x00aa -> MULTIPLICATION SIGN
|
||||
u'\xab' # 0x00ab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
u'\xac' # 0x00ac -> NOT SIGN
|
||||
u'\xad' # 0x00ad -> SOFT HYPHEN
|
||||
u'\xae' # 0x00ae -> REGISTERED SIGN
|
||||
u'\xaf' # 0x00af -> MACRON
|
||||
u'\xb0' # 0x00b0 -> DEGREE SIGN
|
||||
u'\xb1' # 0x00b1 -> PLUS-MINUS SIGN
|
||||
u'\xb2' # 0x00b2 -> SUPERSCRIPT TWO
|
||||
u'\xb3' # 0x00b3 -> SUPERSCRIPT THREE
|
||||
u'\xb4' # 0x00b4 -> ACUTE ACCENT
|
||||
u'\xb5' # 0x00b5 -> MICRO SIGN
|
||||
u'\xb6' # 0x00b6 -> PILCROW SIGN
|
||||
u'\xb7' # 0x00b7 -> MIDDLE DOT
|
||||
u'\xb8' # 0x00b8 -> CEDILLA
|
||||
u'\xb9' # 0x00b9 -> SUPERSCRIPT ONE
|
||||
u'\xf7' # 0x00ba -> DIVISION SIGN
|
||||
u'\xbb' # 0x00bb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
u'\xbc' # 0x00bc -> VULGAR FRACTION ONE QUARTER
|
||||
u'\xbd' # 0x00bd -> VULGAR FRACTION ONE HALF
|
||||
u'\xbe' # 0x00be -> VULGAR FRACTION THREE QUARTERS
|
||||
u'\xa2' # 0xa2 -> CENT SIGN
|
||||
u'\xa3' # 0xa3 -> POUND SIGN
|
||||
u'\xa4' # 0xa4 -> CURRENCY SIGN
|
||||
u'\xa5' # 0xa5 -> YEN SIGN
|
||||
u'\xa6' # 0xa6 -> BROKEN BAR
|
||||
u'\xa7' # 0xa7 -> SECTION SIGN
|
||||
u'\xa8' # 0xa8 -> DIAERESIS
|
||||
u'\xa9' # 0xa9 -> COPYRIGHT SIGN
|
||||
u'\xd7' # 0xaa -> MULTIPLICATION SIGN
|
||||
u'\xab' # 0xab -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
u'\xac' # 0xac -> NOT SIGN
|
||||
u'\xad' # 0xad -> SOFT HYPHEN
|
||||
u'\xae' # 0xae -> REGISTERED SIGN
|
||||
u'\xaf' # 0xaf -> MACRON
|
||||
u'\xb0' # 0xb0 -> DEGREE SIGN
|
||||
u'\xb1' # 0xb1 -> PLUS-MINUS SIGN
|
||||
u'\xb2' # 0xb2 -> SUPERSCRIPT TWO
|
||||
u'\xb3' # 0xb3 -> SUPERSCRIPT THREE
|
||||
u'\xb4' # 0xb4 -> ACUTE ACCENT
|
||||
u'\xb5' # 0xb5 -> MICRO SIGN
|
||||
u'\xb6' # 0xb6 -> PILCROW SIGN
|
||||
u'\xb7' # 0xb7 -> MIDDLE DOT
|
||||
u'\xb8' # 0xb8 -> CEDILLA
|
||||
u'\xb9' # 0xb9 -> SUPERSCRIPT ONE
|
||||
u'\xf7' # 0xba -> DIVISION SIGN
|
||||
u'\xbb' # 0xbb -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
u'\xbc' # 0xbc -> VULGAR FRACTION ONE QUARTER
|
||||
u'\xbd' # 0xbd -> VULGAR FRACTION ONE HALF
|
||||
u'\xbe' # 0xbe -> VULGAR FRACTION THREE QUARTERS
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
|
@ -328,262 +255,262 @@ def getregentry():
|
|||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\u2017' # 0x00df -> DOUBLE LOW LINE
|
||||
u'\u05d0' # 0x00e0 -> HEBREW LETTER ALEF
|
||||
u'\u05d1' # 0x00e1 -> HEBREW LETTER BET
|
||||
u'\u05d2' # 0x00e2 -> HEBREW LETTER GIMEL
|
||||
u'\u05d3' # 0x00e3 -> HEBREW LETTER DALET
|
||||
u'\u05d4' # 0x00e4 -> HEBREW LETTER HE
|
||||
u'\u05d5' # 0x00e5 -> HEBREW LETTER VAV
|
||||
u'\u05d6' # 0x00e6 -> HEBREW LETTER ZAYIN
|
||||
u'\u05d7' # 0x00e7 -> HEBREW LETTER HET
|
||||
u'\u05d8' # 0x00e8 -> HEBREW LETTER TET
|
||||
u'\u05d9' # 0x00e9 -> HEBREW LETTER YOD
|
||||
u'\u05da' # 0x00ea -> HEBREW LETTER FINAL KAF
|
||||
u'\u05db' # 0x00eb -> HEBREW LETTER KAF
|
||||
u'\u05dc' # 0x00ec -> HEBREW LETTER LAMED
|
||||
u'\u05dd' # 0x00ed -> HEBREW LETTER FINAL MEM
|
||||
u'\u05de' # 0x00ee -> HEBREW LETTER MEM
|
||||
u'\u05df' # 0x00ef -> HEBREW LETTER FINAL NUN
|
||||
u'\u05e0' # 0x00f0 -> HEBREW LETTER NUN
|
||||
u'\u05e1' # 0x00f1 -> HEBREW LETTER SAMEKH
|
||||
u'\u05e2' # 0x00f2 -> HEBREW LETTER AYIN
|
||||
u'\u05e3' # 0x00f3 -> HEBREW LETTER FINAL PE
|
||||
u'\u05e4' # 0x00f4 -> HEBREW LETTER PE
|
||||
u'\u05e5' # 0x00f5 -> HEBREW LETTER FINAL TSADI
|
||||
u'\u05e6' # 0x00f6 -> HEBREW LETTER TSADI
|
||||
u'\u05e7' # 0x00f7 -> HEBREW LETTER QOF
|
||||
u'\u05e8' # 0x00f8 -> HEBREW LETTER RESH
|
||||
u'\u05e9' # 0x00f9 -> HEBREW LETTER SHIN
|
||||
u'\u05ea' # 0x00fa -> HEBREW LETTER TAV
|
||||
u'\u2017' # 0xdf -> DOUBLE LOW LINE
|
||||
u'\u05d0' # 0xe0 -> HEBREW LETTER ALEF
|
||||
u'\u05d1' # 0xe1 -> HEBREW LETTER BET
|
||||
u'\u05d2' # 0xe2 -> HEBREW LETTER GIMEL
|
||||
u'\u05d3' # 0xe3 -> HEBREW LETTER DALET
|
||||
u'\u05d4' # 0xe4 -> HEBREW LETTER HE
|
||||
u'\u05d5' # 0xe5 -> HEBREW LETTER VAV
|
||||
u'\u05d6' # 0xe6 -> HEBREW LETTER ZAYIN
|
||||
u'\u05d7' # 0xe7 -> HEBREW LETTER HET
|
||||
u'\u05d8' # 0xe8 -> HEBREW LETTER TET
|
||||
u'\u05d9' # 0xe9 -> HEBREW LETTER YOD
|
||||
u'\u05da' # 0xea -> HEBREW LETTER FINAL KAF
|
||||
u'\u05db' # 0xeb -> HEBREW LETTER KAF
|
||||
u'\u05dc' # 0xec -> HEBREW LETTER LAMED
|
||||
u'\u05dd' # 0xed -> HEBREW LETTER FINAL MEM
|
||||
u'\u05de' # 0xee -> HEBREW LETTER MEM
|
||||
u'\u05df' # 0xef -> HEBREW LETTER FINAL NUN
|
||||
u'\u05e0' # 0xf0 -> HEBREW LETTER NUN
|
||||
u'\u05e1' # 0xf1 -> HEBREW LETTER SAMEKH
|
||||
u'\u05e2' # 0xf2 -> HEBREW LETTER AYIN
|
||||
u'\u05e3' # 0xf3 -> HEBREW LETTER FINAL PE
|
||||
u'\u05e4' # 0xf4 -> HEBREW LETTER PE
|
||||
u'\u05e5' # 0xf5 -> HEBREW LETTER FINAL TSADI
|
||||
u'\u05e6' # 0xf6 -> HEBREW LETTER TSADI
|
||||
u'\u05e7' # 0xf7 -> HEBREW LETTER QOF
|
||||
u'\u05e8' # 0xf8 -> HEBREW LETTER RESH
|
||||
u'\u05e9' # 0xf9 -> HEBREW LETTER SHIN
|
||||
u'\u05ea' # 0xfa -> HEBREW LETTER TAV
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\u200e' # 0x00fd -> LEFT-TO-RIGHT MARK
|
||||
u'\u200f' # 0x00fe -> RIGHT-TO-LEFT MARK
|
||||
u'\u200e' # 0xfd -> LEFT-TO-RIGHT MARK
|
||||
u'\u200f' # 0xfe -> RIGHT-TO-LEFT MARK
|
||||
u'\ufffe'
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = {
|
||||
0x0000: 0x0000, # NULL
|
||||
0x0001: 0x0001, # START OF HEADING
|
||||
0x0002: 0x0002, # START OF TEXT
|
||||
0x0003: 0x0003, # END OF TEXT
|
||||
0x0004: 0x0004, # END OF TRANSMISSION
|
||||
0x0005: 0x0005, # ENQUIRY
|
||||
0x0006: 0x0006, # ACKNOWLEDGE
|
||||
0x0007: 0x0007, # BELL
|
||||
0x0008: 0x0008, # BACKSPACE
|
||||
0x0009: 0x0009, # HORIZONTAL TABULATION
|
||||
0x000a: 0x000a, # LINE FEED
|
||||
0x000b: 0x000b, # VERTICAL TABULATION
|
||||
0x000c: 0x000c, # FORM FEED
|
||||
0x000d: 0x000d, # CARRIAGE RETURN
|
||||
0x000e: 0x000e, # SHIFT OUT
|
||||
0x000f: 0x000f, # SHIFT IN
|
||||
0x0010: 0x0010, # DATA LINK ESCAPE
|
||||
0x0011: 0x0011, # DEVICE CONTROL ONE
|
||||
0x0012: 0x0012, # DEVICE CONTROL TWO
|
||||
0x0013: 0x0013, # DEVICE CONTROL THREE
|
||||
0x0014: 0x0014, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x0016, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x0017, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x0018, # CANCEL
|
||||
0x0019: 0x0019, # END OF MEDIUM
|
||||
0x001a: 0x001a, # SUBSTITUTE
|
||||
0x001b: 0x001b, # ESCAPE
|
||||
0x001c: 0x001c, # FILE SEPARATOR
|
||||
0x001d: 0x001d, # GROUP SEPARATOR
|
||||
0x001e: 0x001e, # RECORD SEPARATOR
|
||||
0x001f: 0x001f, # UNIT SEPARATOR
|
||||
0x0020: 0x0020, # SPACE
|
||||
0x0021: 0x0021, # EXCLAMATION MARK
|
||||
0x0022: 0x0022, # QUOTATION MARK
|
||||
0x0023: 0x0023, # NUMBER SIGN
|
||||
0x0024: 0x0024, # DOLLAR SIGN
|
||||
0x0025: 0x0025, # PERCENT SIGN
|
||||
0x0026: 0x0026, # AMPERSAND
|
||||
0x0027: 0x0027, # APOSTROPHE
|
||||
0x0028: 0x0028, # LEFT PARENTHESIS
|
||||
0x0029: 0x0029, # RIGHT PARENTHESIS
|
||||
0x002a: 0x002a, # ASTERISK
|
||||
0x002b: 0x002b, # PLUS SIGN
|
||||
0x002c: 0x002c, # COMMA
|
||||
0x002d: 0x002d, # HYPHEN-MINUS
|
||||
0x002e: 0x002e, # FULL STOP
|
||||
0x002f: 0x002f, # SOLIDUS
|
||||
0x0030: 0x0030, # DIGIT ZERO
|
||||
0x0031: 0x0031, # DIGIT ONE
|
||||
0x0032: 0x0032, # DIGIT TWO
|
||||
0x0033: 0x0033, # DIGIT THREE
|
||||
0x0034: 0x0034, # DIGIT FOUR
|
||||
0x0035: 0x0035, # DIGIT FIVE
|
||||
0x0036: 0x0036, # DIGIT SIX
|
||||
0x0037: 0x0037, # DIGIT SEVEN
|
||||
0x0038: 0x0038, # DIGIT EIGHT
|
||||
0x0039: 0x0039, # DIGIT NINE
|
||||
0x003a: 0x003a, # COLON
|
||||
0x003b: 0x003b, # SEMICOLON
|
||||
0x003c: 0x003c, # LESS-THAN SIGN
|
||||
0x003d: 0x003d, # EQUALS SIGN
|
||||
0x003e: 0x003e, # GREATER-THAN SIGN
|
||||
0x003f: 0x003f, # QUESTION MARK
|
||||
0x0040: 0x0040, # COMMERCIAL AT
|
||||
0x0041: 0x0041, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x0042, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x0043, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x0044, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x0045, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x0046, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x0047, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x0048, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x0049, # LATIN CAPITAL LETTER I
|
||||
0x004a: 0x004a, # LATIN CAPITAL LETTER J
|
||||
0x004b: 0x004b, # LATIN CAPITAL LETTER K
|
||||
0x004c: 0x004c, # LATIN CAPITAL LETTER L
|
||||
0x004d: 0x004d, # LATIN CAPITAL LETTER M
|
||||
0x004e: 0x004e, # LATIN CAPITAL LETTER N
|
||||
0x004f: 0x004f, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x0050, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x0051, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x0052, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x0053, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x0054, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x0055, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x0056, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x0057, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x0058, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x0059, # LATIN CAPITAL LETTER Y
|
||||
0x005a: 0x005a, # LATIN CAPITAL LETTER Z
|
||||
0x005b: 0x005b, # LEFT SQUARE BRACKET
|
||||
0x005c: 0x005c, # REVERSE SOLIDUS
|
||||
0x005d: 0x005d, # RIGHT SQUARE BRACKET
|
||||
0x005e: 0x005e, # CIRCUMFLEX ACCENT
|
||||
0x005f: 0x005f, # LOW LINE
|
||||
0x0060: 0x0060, # GRAVE ACCENT
|
||||
0x0061: 0x0061, # LATIN SMALL LETTER A
|
||||
0x0062: 0x0062, # LATIN SMALL LETTER B
|
||||
0x0063: 0x0063, # LATIN SMALL LETTER C
|
||||
0x0064: 0x0064, # LATIN SMALL LETTER D
|
||||
0x0065: 0x0065, # LATIN SMALL LETTER E
|
||||
0x0066: 0x0066, # LATIN SMALL LETTER F
|
||||
0x0067: 0x0067, # LATIN SMALL LETTER G
|
||||
0x0068: 0x0068, # LATIN SMALL LETTER H
|
||||
0x0069: 0x0069, # LATIN SMALL LETTER I
|
||||
0x006a: 0x006a, # LATIN SMALL LETTER J
|
||||
0x006b: 0x006b, # LATIN SMALL LETTER K
|
||||
0x006c: 0x006c, # LATIN SMALL LETTER L
|
||||
0x006d: 0x006d, # LATIN SMALL LETTER M
|
||||
0x006e: 0x006e, # LATIN SMALL LETTER N
|
||||
0x006f: 0x006f, # LATIN SMALL LETTER O
|
||||
0x0070: 0x0070, # LATIN SMALL LETTER P
|
||||
0x0071: 0x0071, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x0072, # LATIN SMALL LETTER R
|
||||
0x0073: 0x0073, # LATIN SMALL LETTER S
|
||||
0x0074: 0x0074, # LATIN SMALL LETTER T
|
||||
0x0075: 0x0075, # LATIN SMALL LETTER U
|
||||
0x0076: 0x0076, # LATIN SMALL LETTER V
|
||||
0x0077: 0x0077, # LATIN SMALL LETTER W
|
||||
0x0078: 0x0078, # LATIN SMALL LETTER X
|
||||
0x0079: 0x0079, # LATIN SMALL LETTER Y
|
||||
0x007a: 0x007a, # LATIN SMALL LETTER Z
|
||||
0x007b: 0x007b, # LEFT CURLY BRACKET
|
||||
0x007c: 0x007c, # VERTICAL LINE
|
||||
0x007d: 0x007d, # RIGHT CURLY BRACKET
|
||||
0x007e: 0x007e, # TILDE
|
||||
0x007f: 0x007f, # DELETE
|
||||
0x0080: 0x0080, # <control>
|
||||
0x0081: 0x0081, # <control>
|
||||
0x0082: 0x0082, # <control>
|
||||
0x0083: 0x0083, # <control>
|
||||
0x0084: 0x0084, # <control>
|
||||
0x0085: 0x0085, # <control>
|
||||
0x0086: 0x0086, # <control>
|
||||
0x0087: 0x0087, # <control>
|
||||
0x0088: 0x0088, # <control>
|
||||
0x0089: 0x0089, # <control>
|
||||
0x008a: 0x008a, # <control>
|
||||
0x008b: 0x008b, # <control>
|
||||
0x008c: 0x008c, # <control>
|
||||
0x008d: 0x008d, # <control>
|
||||
0x008e: 0x008e, # <control>
|
||||
0x008f: 0x008f, # <control>
|
||||
0x0090: 0x0090, # <control>
|
||||
0x0091: 0x0091, # <control>
|
||||
0x0092: 0x0092, # <control>
|
||||
0x0093: 0x0093, # <control>
|
||||
0x0094: 0x0094, # <control>
|
||||
0x0095: 0x0095, # <control>
|
||||
0x0096: 0x0096, # <control>
|
||||
0x0097: 0x0097, # <control>
|
||||
0x0098: 0x0098, # <control>
|
||||
0x0099: 0x0099, # <control>
|
||||
0x009a: 0x009a, # <control>
|
||||
0x009b: 0x009b, # <control>
|
||||
0x009c: 0x009c, # <control>
|
||||
0x009d: 0x009d, # <control>
|
||||
0x009e: 0x009e, # <control>
|
||||
0x009f: 0x009f, # <control>
|
||||
0x00a0: 0x00a0, # NO-BREAK SPACE
|
||||
0x00a2: 0x00a2, # CENT SIGN
|
||||
0x00a3: 0x00a3, # POUND SIGN
|
||||
0x00a4: 0x00a4, # CURRENCY SIGN
|
||||
0x00a5: 0x00a5, # YEN SIGN
|
||||
0x00a6: 0x00a6, # BROKEN BAR
|
||||
0x00a7: 0x00a7, # SECTION SIGN
|
||||
0x00a8: 0x00a8, # DIAERESIS
|
||||
0x00a9: 0x00a9, # COPYRIGHT SIGN
|
||||
0x00ab: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00ac: 0x00ac, # NOT SIGN
|
||||
0x00ad: 0x00ad, # SOFT HYPHEN
|
||||
0x00ae: 0x00ae, # REGISTERED SIGN
|
||||
0x00af: 0x00af, # MACRON
|
||||
0x00b0: 0x00b0, # DEGREE SIGN
|
||||
0x00b1: 0x00b1, # PLUS-MINUS SIGN
|
||||
0x00b2: 0x00b2, # SUPERSCRIPT TWO
|
||||
0x00b3: 0x00b3, # SUPERSCRIPT THREE
|
||||
0x00b4: 0x00b4, # ACUTE ACCENT
|
||||
0x00b5: 0x00b5, # MICRO SIGN
|
||||
0x00b6: 0x00b6, # PILCROW SIGN
|
||||
0x00b7: 0x00b7, # MIDDLE DOT
|
||||
0x00b8: 0x00b8, # CEDILLA
|
||||
0x00b9: 0x00b9, # SUPERSCRIPT ONE
|
||||
0x00bb: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00bc: 0x00bc, # VULGAR FRACTION ONE QUARTER
|
||||
0x00bd: 0x00bd, # VULGAR FRACTION ONE HALF
|
||||
0x00be: 0x00be, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00d7: 0x00aa, # MULTIPLICATION SIGN
|
||||
0x00f7: 0x00ba, # DIVISION SIGN
|
||||
0x05d0: 0x00e0, # HEBREW LETTER ALEF
|
||||
0x05d1: 0x00e1, # HEBREW LETTER BET
|
||||
0x05d2: 0x00e2, # HEBREW LETTER GIMEL
|
||||
0x05d3: 0x00e3, # HEBREW LETTER DALET
|
||||
0x05d4: 0x00e4, # HEBREW LETTER HE
|
||||
0x05d5: 0x00e5, # HEBREW LETTER VAV
|
||||
0x05d6: 0x00e6, # HEBREW LETTER ZAYIN
|
||||
0x05d7: 0x00e7, # HEBREW LETTER HET
|
||||
0x05d8: 0x00e8, # HEBREW LETTER TET
|
||||
0x05d9: 0x00e9, # HEBREW LETTER YOD
|
||||
0x05da: 0x00ea, # HEBREW LETTER FINAL KAF
|
||||
0x05db: 0x00eb, # HEBREW LETTER KAF
|
||||
0x05dc: 0x00ec, # HEBREW LETTER LAMED
|
||||
0x05dd: 0x00ed, # HEBREW LETTER FINAL MEM
|
||||
0x05de: 0x00ee, # HEBREW LETTER MEM
|
||||
0x05df: 0x00ef, # HEBREW LETTER FINAL NUN
|
||||
0x05e0: 0x00f0, # HEBREW LETTER NUN
|
||||
0x05e1: 0x00f1, # HEBREW LETTER SAMEKH
|
||||
0x05e2: 0x00f2, # HEBREW LETTER AYIN
|
||||
0x05e3: 0x00f3, # HEBREW LETTER FINAL PE
|
||||
0x05e4: 0x00f4, # HEBREW LETTER PE
|
||||
0x05e5: 0x00f5, # HEBREW LETTER FINAL TSADI
|
||||
0x05e6: 0x00f6, # HEBREW LETTER TSADI
|
||||
0x05e7: 0x00f7, # HEBREW LETTER QOF
|
||||
0x05e8: 0x00f8, # HEBREW LETTER RESH
|
||||
0x05e9: 0x00f9, # HEBREW LETTER SHIN
|
||||
0x05ea: 0x00fa, # HEBREW LETTER TAV
|
||||
0x200e: 0x00fd, # LEFT-TO-RIGHT MARK
|
||||
0x200f: 0x00fe, # RIGHT-TO-LEFT MARK
|
||||
0x2017: 0x00df, # DOUBLE LOW LINE
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000a: 0x0a, # LINE FEED
|
||||
0x000b: 0x0b, # VERTICAL TABULATION
|
||||
0x000c: 0x0c, # FORM FEED
|
||||
0x000d: 0x0d, # CARRIAGE RETURN
|
||||
0x000e: 0x0e, # SHIFT OUT
|
||||
0x000f: 0x0f, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001a: 0x1a, # SUBSTITUTE
|
||||
0x001b: 0x1b, # ESCAPE
|
||||
0x001c: 0x1c, # FILE SEPARATOR
|
||||
0x001d: 0x1d, # GROUP SEPARATOR
|
||||
0x001e: 0x1e, # RECORD SEPARATOR
|
||||
0x001f: 0x1f, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002a: 0x2a, # ASTERISK
|
||||
0x002b: 0x2b, # PLUS SIGN
|
||||
0x002c: 0x2c, # COMMA
|
||||
0x002d: 0x2d, # HYPHEN-MINUS
|
||||
0x002e: 0x2e, # FULL STOP
|
||||
0x002f: 0x2f, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003a: 0x3a, # COLON
|
||||
0x003b: 0x3b, # SEMICOLON
|
||||
0x003c: 0x3c, # LESS-THAN SIGN
|
||||
0x003d: 0x3d, # EQUALS SIGN
|
||||
0x003e: 0x3e, # GREATER-THAN SIGN
|
||||
0x003f: 0x3f, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004a: 0x4a, # LATIN CAPITAL LETTER J
|
||||
0x004b: 0x4b, # LATIN CAPITAL LETTER K
|
||||
0x004c: 0x4c, # LATIN CAPITAL LETTER L
|
||||
0x004d: 0x4d, # LATIN CAPITAL LETTER M
|
||||
0x004e: 0x4e, # LATIN CAPITAL LETTER N
|
||||
0x004f: 0x4f, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005a: 0x5a, # LATIN CAPITAL LETTER Z
|
||||
0x005b: 0x5b, # LEFT SQUARE BRACKET
|
||||
0x005c: 0x5c, # REVERSE SOLIDUS
|
||||
0x005d: 0x5d, # RIGHT SQUARE BRACKET
|
||||
0x005e: 0x5e, # CIRCUMFLEX ACCENT
|
||||
0x005f: 0x5f, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006a: 0x6a, # LATIN SMALL LETTER J
|
||||
0x006b: 0x6b, # LATIN SMALL LETTER K
|
||||
0x006c: 0x6c, # LATIN SMALL LETTER L
|
||||
0x006d: 0x6d, # LATIN SMALL LETTER M
|
||||
0x006e: 0x6e, # LATIN SMALL LETTER N
|
||||
0x006f: 0x6f, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007a: 0x7a, # LATIN SMALL LETTER Z
|
||||
0x007b: 0x7b, # LEFT CURLY BRACKET
|
||||
0x007c: 0x7c, # VERTICAL LINE
|
||||
0x007d: 0x7d, # RIGHT CURLY BRACKET
|
||||
0x007e: 0x7e, # TILDE
|
||||
0x007f: 0x7f, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008a: 0x8a, # <control>
|
||||
0x008b: 0x8b, # <control>
|
||||
0x008c: 0x8c, # <control>
|
||||
0x008d: 0x8d, # <control>
|
||||
0x008e: 0x8e, # <control>
|
||||
0x008f: 0x8f, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009a: 0x9a, # <control>
|
||||
0x009b: 0x9b, # <control>
|
||||
0x009c: 0x9c, # <control>
|
||||
0x009d: 0x9d, # <control>
|
||||
0x009e: 0x9e, # <control>
|
||||
0x009f: 0x9f, # <control>
|
||||
0x00a0: 0xa0, # NO-BREAK SPACE
|
||||
0x00a2: 0xa2, # CENT SIGN
|
||||
0x00a3: 0xa3, # POUND SIGN
|
||||
0x00a4: 0xa4, # CURRENCY SIGN
|
||||
0x00a5: 0xa5, # YEN SIGN
|
||||
0x00a6: 0xa6, # BROKEN BAR
|
||||
0x00a7: 0xa7, # SECTION SIGN
|
||||
0x00a8: 0xa8, # DIAERESIS
|
||||
0x00a9: 0xa9, # COPYRIGHT SIGN
|
||||
0x00ab: 0xab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00ac: 0xac, # NOT SIGN
|
||||
0x00ad: 0xad, # SOFT HYPHEN
|
||||
0x00ae: 0xae, # REGISTERED SIGN
|
||||
0x00af: 0xaf, # MACRON
|
||||
0x00b0: 0xb0, # DEGREE SIGN
|
||||
0x00b1: 0xb1, # PLUS-MINUS SIGN
|
||||
0x00b2: 0xb2, # SUPERSCRIPT TWO
|
||||
0x00b3: 0xb3, # SUPERSCRIPT THREE
|
||||
0x00b4: 0xb4, # ACUTE ACCENT
|
||||
0x00b5: 0xb5, # MICRO SIGN
|
||||
0x00b6: 0xb6, # PILCROW SIGN
|
||||
0x00b7: 0xb7, # MIDDLE DOT
|
||||
0x00b8: 0xb8, # CEDILLA
|
||||
0x00b9: 0xb9, # SUPERSCRIPT ONE
|
||||
0x00bb: 0xbb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
0x00bc: 0xbc, # VULGAR FRACTION ONE QUARTER
|
||||
0x00bd: 0xbd, # VULGAR FRACTION ONE HALF
|
||||
0x00be: 0xbe, # VULGAR FRACTION THREE QUARTERS
|
||||
0x00d7: 0xaa, # MULTIPLICATION SIGN
|
||||
0x00f7: 0xba, # DIVISION SIGN
|
||||
0x05d0: 0xe0, # HEBREW LETTER ALEF
|
||||
0x05d1: 0xe1, # HEBREW LETTER BET
|
||||
0x05d2: 0xe2, # HEBREW LETTER GIMEL
|
||||
0x05d3: 0xe3, # HEBREW LETTER DALET
|
||||
0x05d4: 0xe4, # HEBREW LETTER HE
|
||||
0x05d5: 0xe5, # HEBREW LETTER VAV
|
||||
0x05d6: 0xe6, # HEBREW LETTER ZAYIN
|
||||
0x05d7: 0xe7, # HEBREW LETTER HET
|
||||
0x05d8: 0xe8, # HEBREW LETTER TET
|
||||
0x05d9: 0xe9, # HEBREW LETTER YOD
|
||||
0x05da: 0xea, # HEBREW LETTER FINAL KAF
|
||||
0x05db: 0xeb, # HEBREW LETTER KAF
|
||||
0x05dc: 0xec, # HEBREW LETTER LAMED
|
||||
0x05dd: 0xed, # HEBREW LETTER FINAL MEM
|
||||
0x05de: 0xee, # HEBREW LETTER MEM
|
||||
0x05df: 0xef, # HEBREW LETTER FINAL NUN
|
||||
0x05e0: 0xf0, # HEBREW LETTER NUN
|
||||
0x05e1: 0xf1, # HEBREW LETTER SAMEKH
|
||||
0x05e2: 0xf2, # HEBREW LETTER AYIN
|
||||
0x05e3: 0xf3, # HEBREW LETTER FINAL PE
|
||||
0x05e4: 0xf4, # HEBREW LETTER PE
|
||||
0x05e5: 0xf5, # HEBREW LETTER FINAL TSADI
|
||||
0x05e6: 0xf6, # HEBREW LETTER TSADI
|
||||
0x05e7: 0xf7, # HEBREW LETTER QOF
|
||||
0x05e8: 0xf8, # HEBREW LETTER RESH
|
||||
0x05e9: 0xf9, # HEBREW LETTER SHIN
|
||||
0x05ea: 0xfa, # HEBREW LETTER TAV
|
||||
0x200e: 0xfd, # LEFT-TO-RIGHT MARK
|
||||
0x200f: 0xfe, # RIGHT-TO-LEFT MARK
|
||||
0x2017: 0xdf, # DOUBLE LOW LINE
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,15 +1,8 @@
|
|||
""" Python Character Mapping Codec for KOI8U.
|
||||
|
||||
This character scheme is compliant to RFC2319
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
Modified by Maxim Dzumanenko <mvd@mylinux.com.ua>.
|
||||
|
||||
(c) Copyright 2002, Python Software Foundation.
|
||||
""" Python Character Mapping Codec generated from 'python-mappings/KOI8-U.TXT' with gencodec.py.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs, koi8_r
|
||||
import codecs
|
||||
|
||||
### Codec APIs
|
||||
|
||||
|
@ -21,8 +14,8 @@ def encode(self,input,errors='strict'):
|
|||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
|
@ -35,20 +28,525 @@ def getregentry():
|
|||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = koi8_r.decoding_map.copy()
|
||||
decoding_map.update({
|
||||
0x00a4: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
0x00a6: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x00a7: 0x0457, # CYRILLIC SMALL LETTER YI (UKRAINIAN)
|
||||
0x00ad: 0x0491, # CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN
|
||||
0x00b4: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
0x00b6: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x00b7: 0x0407, # CYRILLIC CAPITAL LETTER YI (UKRAINIAN)
|
||||
0x00bd: 0x0490, # CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN
|
||||
})
|
||||
### Decoding Table
|
||||
|
||||
decoding_table = (
|
||||
u'\x00' # 0x00 -> NULL
|
||||
u'\x01' # 0x01 -> START OF HEADING
|
||||
u'\x02' # 0x02 -> START OF TEXT
|
||||
u'\x03' # 0x03 -> END OF TEXT
|
||||
u'\x04' # 0x04 -> END OF TRANSMISSION
|
||||
u'\x05' # 0x05 -> ENQUIRY
|
||||
u'\x06' # 0x06 -> ACKNOWLEDGE
|
||||
u'\x07' # 0x07 -> BELL
|
||||
u'\x08' # 0x08 -> BACKSPACE
|
||||
u'\t' # 0x09 -> HORIZONTAL TABULATION
|
||||
u'\n' # 0x0a -> LINE FEED
|
||||
u'\x0b' # 0x0b -> VERTICAL TABULATION
|
||||
u'\x0c' # 0x0c -> FORM FEED
|
||||
u'\r' # 0x0d -> CARRIAGE RETURN
|
||||
u'\x0e' # 0x0e -> SHIFT OUT
|
||||
u'\x0f' # 0x0f -> SHIFT IN
|
||||
u'\x10' # 0x10 -> DATA LINK ESCAPE
|
||||
u'\x11' # 0x11 -> DEVICE CONTROL ONE
|
||||
u'\x12' # 0x12 -> DEVICE CONTROL TWO
|
||||
u'\x13' # 0x13 -> DEVICE CONTROL THREE
|
||||
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
|
||||
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
|
||||
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
|
||||
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
|
||||
u'\x18' # 0x18 -> CANCEL
|
||||
u'\x19' # 0x19 -> END OF MEDIUM
|
||||
u'\x1a' # 0x1a -> SUBSTITUTE
|
||||
u'\x1b' # 0x1b -> ESCAPE
|
||||
u'\x1c' # 0x1c -> FILE SEPARATOR
|
||||
u'\x1d' # 0x1d -> GROUP SEPARATOR
|
||||
u'\x1e' # 0x1e -> RECORD SEPARATOR
|
||||
u'\x1f' # 0x1f -> UNIT SEPARATOR
|
||||
u' ' # 0x20 -> SPACE
|
||||
u'!' # 0x21 -> EXCLAMATION MARK
|
||||
u'"' # 0x22 -> QUOTATION MARK
|
||||
u'#' # 0x23 -> NUMBER SIGN
|
||||
u'$' # 0x24 -> DOLLAR SIGN
|
||||
u'%' # 0x25 -> PERCENT SIGN
|
||||
u'&' # 0x26 -> AMPERSAND
|
||||
u"'" # 0x27 -> APOSTROPHE
|
||||
u'(' # 0x28 -> LEFT PARENTHESIS
|
||||
u')' # 0x29 -> RIGHT PARENTHESIS
|
||||
u'*' # 0x2a -> ASTERISK
|
||||
u'+' # 0x2b -> PLUS SIGN
|
||||
u',' # 0x2c -> COMMA
|
||||
u'-' # 0x2d -> HYPHEN-MINUS
|
||||
u'.' # 0x2e -> FULL STOP
|
||||
u'/' # 0x2f -> SOLIDUS
|
||||
u'0' # 0x30 -> DIGIT ZERO
|
||||
u'1' # 0x31 -> DIGIT ONE
|
||||
u'2' # 0x32 -> DIGIT TWO
|
||||
u'3' # 0x33 -> DIGIT THREE
|
||||
u'4' # 0x34 -> DIGIT FOUR
|
||||
u'5' # 0x35 -> DIGIT FIVE
|
||||
u'6' # 0x36 -> DIGIT SIX
|
||||
u'7' # 0x37 -> DIGIT SEVEN
|
||||
u'8' # 0x38 -> DIGIT EIGHT
|
||||
u'9' # 0x39 -> DIGIT NINE
|
||||
u':' # 0x3a -> COLON
|
||||
u';' # 0x3b -> SEMICOLON
|
||||
u'<' # 0x3c -> LESS-THAN SIGN
|
||||
u'=' # 0x3d -> EQUALS SIGN
|
||||
u'>' # 0x3e -> GREATER-THAN SIGN
|
||||
u'?' # 0x3f -> QUESTION MARK
|
||||
u'@' # 0x40 -> COMMERCIAL AT
|
||||
u'A' # 0x41 -> LATIN CAPITAL LETTER A
|
||||
u'B' # 0x42 -> LATIN CAPITAL LETTER B
|
||||
u'C' # 0x43 -> LATIN CAPITAL LETTER C
|
||||
u'D' # 0x44 -> LATIN CAPITAL LETTER D
|
||||
u'E' # 0x45 -> LATIN CAPITAL LETTER E
|
||||
u'F' # 0x46 -> LATIN CAPITAL LETTER F
|
||||
u'G' # 0x47 -> LATIN CAPITAL LETTER G
|
||||
u'H' # 0x48 -> LATIN CAPITAL LETTER H
|
||||
u'I' # 0x49 -> LATIN CAPITAL LETTER I
|
||||
u'J' # 0x4a -> LATIN CAPITAL LETTER J
|
||||
u'K' # 0x4b -> LATIN CAPITAL LETTER K
|
||||
u'L' # 0x4c -> LATIN CAPITAL LETTER L
|
||||
u'M' # 0x4d -> LATIN CAPITAL LETTER M
|
||||
u'N' # 0x4e -> LATIN CAPITAL LETTER N
|
||||
u'O' # 0x4f -> LATIN CAPITAL LETTER O
|
||||
u'P' # 0x50 -> LATIN CAPITAL LETTER P
|
||||
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
|
||||
u'R' # 0x52 -> LATIN CAPITAL LETTER R
|
||||
u'S' # 0x53 -> LATIN CAPITAL LETTER S
|
||||
u'T' # 0x54 -> LATIN CAPITAL LETTER T
|
||||
u'U' # 0x55 -> LATIN CAPITAL LETTER U
|
||||
u'V' # 0x56 -> LATIN CAPITAL LETTER V
|
||||
u'W' # 0x57 -> LATIN CAPITAL LETTER W
|
||||
u'X' # 0x58 -> LATIN CAPITAL LETTER X
|
||||
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
|
||||
u'Z' # 0x5a -> LATIN CAPITAL LETTER Z
|
||||
u'[' # 0x5b -> LEFT SQUARE BRACKET
|
||||
u'\\' # 0x5c -> REVERSE SOLIDUS
|
||||
u']' # 0x5d -> RIGHT SQUARE BRACKET
|
||||
u'^' # 0x5e -> CIRCUMFLEX ACCENT
|
||||
u'_' # 0x5f -> LOW LINE
|
||||
u'`' # 0x60 -> GRAVE ACCENT
|
||||
u'a' # 0x61 -> LATIN SMALL LETTER A
|
||||
u'b' # 0x62 -> LATIN SMALL LETTER B
|
||||
u'c' # 0x63 -> LATIN SMALL LETTER C
|
||||
u'd' # 0x64 -> LATIN SMALL LETTER D
|
||||
u'e' # 0x65 -> LATIN SMALL LETTER E
|
||||
u'f' # 0x66 -> LATIN SMALL LETTER F
|
||||
u'g' # 0x67 -> LATIN SMALL LETTER G
|
||||
u'h' # 0x68 -> LATIN SMALL LETTER H
|
||||
u'i' # 0x69 -> LATIN SMALL LETTER I
|
||||
u'j' # 0x6a -> LATIN SMALL LETTER J
|
||||
u'k' # 0x6b -> LATIN SMALL LETTER K
|
||||
u'l' # 0x6c -> LATIN SMALL LETTER L
|
||||
u'm' # 0x6d -> LATIN SMALL LETTER M
|
||||
u'n' # 0x6e -> LATIN SMALL LETTER N
|
||||
u'o' # 0x6f -> LATIN SMALL LETTER O
|
||||
u'p' # 0x70 -> LATIN SMALL LETTER P
|
||||
u'q' # 0x71 -> LATIN SMALL LETTER Q
|
||||
u'r' # 0x72 -> LATIN SMALL LETTER R
|
||||
u's' # 0x73 -> LATIN SMALL LETTER S
|
||||
u't' # 0x74 -> LATIN SMALL LETTER T
|
||||
u'u' # 0x75 -> LATIN SMALL LETTER U
|
||||
u'v' # 0x76 -> LATIN SMALL LETTER V
|
||||
u'w' # 0x77 -> LATIN SMALL LETTER W
|
||||
u'x' # 0x78 -> LATIN SMALL LETTER X
|
||||
u'y' # 0x79 -> LATIN SMALL LETTER Y
|
||||
u'z' # 0x7a -> LATIN SMALL LETTER Z
|
||||
u'{' # 0x7b -> LEFT CURLY BRACKET
|
||||
u'|' # 0x7c -> VERTICAL LINE
|
||||
u'}' # 0x7d -> RIGHT CURLY BRACKET
|
||||
u'~' # 0x7e -> TILDE
|
||||
u'\x7f' # 0x7f -> DELETE
|
||||
u'\u2500' # 0x80 -> BOX DRAWINGS LIGHT HORIZONTAL
|
||||
u'\u2502' # 0x81 -> BOX DRAWINGS LIGHT VERTICAL
|
||||
u'\u250c' # 0x82 -> BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
u'\u2510' # 0x83 -> BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
u'\u2514' # 0x84 -> BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
u'\u2518' # 0x85 -> BOX DRAWINGS LIGHT UP AND LEFT
|
||||
u'\u251c' # 0x86 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
u'\u2524' # 0x87 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
u'\u252c' # 0x88 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
u'\u2534' # 0x89 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
u'\u253c' # 0x8a -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
u'\u2580' # 0x8b -> UPPER HALF BLOCK
|
||||
u'\u2584' # 0x8c -> LOWER HALF BLOCK
|
||||
u'\u2588' # 0x8d -> FULL BLOCK
|
||||
u'\u258c' # 0x8e -> LEFT HALF BLOCK
|
||||
u'\u2590' # 0x8f -> RIGHT HALF BLOCK
|
||||
u'\u2591' # 0x90 -> LIGHT SHADE
|
||||
u'\u2592' # 0x91 -> MEDIUM SHADE
|
||||
u'\u2593' # 0x92 -> DARK SHADE
|
||||
u'\u2320' # 0x93 -> TOP HALF INTEGRAL
|
||||
u'\u25a0' # 0x94 -> BLACK SQUARE
|
||||
u'\u2219' # 0x95 -> BULLET OPERATOR
|
||||
u'\u221a' # 0x96 -> SQUARE ROOT
|
||||
u'\u2248' # 0x97 -> ALMOST EQUAL TO
|
||||
u'\u2264' # 0x98 -> LESS-THAN OR EQUAL TO
|
||||
u'\u2265' # 0x99 -> GREATER-THAN OR EQUAL TO
|
||||
u'\xa0' # 0x9a -> NO-BREAK SPACE
|
||||
u'\u2321' # 0x9b -> BOTTOM HALF INTEGRAL
|
||||
u'\xb0' # 0x9c -> DEGREE SIGN
|
||||
u'\xb2' # 0x9d -> SUPERSCRIPT TWO
|
||||
u'\xb7' # 0x9e -> MIDDLE DOT
|
||||
u'\xf7' # 0x9f -> DIVISION SIGN
|
||||
u'\u2550' # 0xa0 -> BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
u'\u2551' # 0xa1 -> BOX DRAWINGS DOUBLE VERTICAL
|
||||
u'\u2552' # 0xa2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
|
||||
u'\u0451' # 0xa3 -> CYRILLIC SMALL LETTER IO
|
||||
u'\u0454' # 0xa4 -> CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
u'\u2554' # 0xa5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
u'\u0456' # 0xa6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
u'\u0457' # 0xa7 -> CYRILLIC SMALL LETTER YI (UKRAINIAN)
|
||||
u'\u2557' # 0xa8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
u'\u2558' # 0xa9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
|
||||
u'\u2559' # 0xaa -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
|
||||
u'\u255a' # 0xab -> BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
u'\u255b' # 0xac -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
|
||||
u'\u0491' # 0xad -> CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN
|
||||
u'\u255d' # 0xae -> BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
u'\u255e' # 0xaf -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
|
||||
u'\u255f' # 0xb0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
|
||||
u'\u2560' # 0xb1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
u'\u2561' # 0xb2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
|
||||
u'\u0401' # 0xb3 -> CYRILLIC CAPITAL LETTER IO
|
||||
u'\u0404' # 0xb4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
u'\u2563' # 0xb5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
u'\u0406' # 0xb6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
u'\u0407' # 0xb7 -> CYRILLIC CAPITAL LETTER YI (UKRAINIAN)
|
||||
u'\u2566' # 0xb8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
u'\u2567' # 0xb9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
|
||||
u'\u2568' # 0xba -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
|
||||
u'\u2569' # 0xbb -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
u'\u256a' # 0xbc -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
|
||||
u'\u0490' # 0xbd -> CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN
|
||||
u'\u256c' # 0xbe -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
u'\xa9' # 0xbf -> COPYRIGHT SIGN
|
||||
u'\u044e' # 0xc0 -> CYRILLIC SMALL LETTER YU
|
||||
u'\u0430' # 0xc1 -> CYRILLIC SMALL LETTER A
|
||||
u'\u0431' # 0xc2 -> CYRILLIC SMALL LETTER BE
|
||||
u'\u0446' # 0xc3 -> CYRILLIC SMALL LETTER TSE
|
||||
u'\u0434' # 0xc4 -> CYRILLIC SMALL LETTER DE
|
||||
u'\u0435' # 0xc5 -> CYRILLIC SMALL LETTER IE
|
||||
u'\u0444' # 0xc6 -> CYRILLIC SMALL LETTER EF
|
||||
u'\u0433' # 0xc7 -> CYRILLIC SMALL LETTER GHE
|
||||
u'\u0445' # 0xc8 -> CYRILLIC SMALL LETTER HA
|
||||
u'\u0438' # 0xc9 -> CYRILLIC SMALL LETTER I
|
||||
u'\u0439' # 0xca -> CYRILLIC SMALL LETTER SHORT I
|
||||
u'\u043a' # 0xcb -> CYRILLIC SMALL LETTER KA
|
||||
u'\u043b' # 0xcc -> CYRILLIC SMALL LETTER EL
|
||||
u'\u043c' # 0xcd -> CYRILLIC SMALL LETTER EM
|
||||
u'\u043d' # 0xce -> CYRILLIC SMALL LETTER EN
|
||||
u'\u043e' # 0xcf -> CYRILLIC SMALL LETTER O
|
||||
u'\u043f' # 0xd0 -> CYRILLIC SMALL LETTER PE
|
||||
u'\u044f' # 0xd1 -> CYRILLIC SMALL LETTER YA
|
||||
u'\u0440' # 0xd2 -> CYRILLIC SMALL LETTER ER
|
||||
u'\u0441' # 0xd3 -> CYRILLIC SMALL LETTER ES
|
||||
u'\u0442' # 0xd4 -> CYRILLIC SMALL LETTER TE
|
||||
u'\u0443' # 0xd5 -> CYRILLIC SMALL LETTER U
|
||||
u'\u0436' # 0xd6 -> CYRILLIC SMALL LETTER ZHE
|
||||
u'\u0432' # 0xd7 -> CYRILLIC SMALL LETTER VE
|
||||
u'\u044c' # 0xd8 -> CYRILLIC SMALL LETTER SOFT SIGN
|
||||
u'\u044b' # 0xd9 -> CYRILLIC SMALL LETTER YERU
|
||||
u'\u0437' # 0xda -> CYRILLIC SMALL LETTER ZE
|
||||
u'\u0448' # 0xdb -> CYRILLIC SMALL LETTER SHA
|
||||
u'\u044d' # 0xdc -> CYRILLIC SMALL LETTER E
|
||||
u'\u0449' # 0xdd -> CYRILLIC SMALL LETTER SHCHA
|
||||
u'\u0447' # 0xde -> CYRILLIC SMALL LETTER CHE
|
||||
u'\u044a' # 0xdf -> CYRILLIC SMALL LETTER HARD SIGN
|
||||
u'\u042e' # 0xe0 -> CYRILLIC CAPITAL LETTER YU
|
||||
u'\u0410' # 0xe1 -> CYRILLIC CAPITAL LETTER A
|
||||
u'\u0411' # 0xe2 -> CYRILLIC CAPITAL LETTER BE
|
||||
u'\u0426' # 0xe3 -> CYRILLIC CAPITAL LETTER TSE
|
||||
u'\u0414' # 0xe4 -> CYRILLIC CAPITAL LETTER DE
|
||||
u'\u0415' # 0xe5 -> CYRILLIC CAPITAL LETTER IE
|
||||
u'\u0424' # 0xe6 -> CYRILLIC CAPITAL LETTER EF
|
||||
u'\u0413' # 0xe7 -> CYRILLIC CAPITAL LETTER GHE
|
||||
u'\u0425' # 0xe8 -> CYRILLIC CAPITAL LETTER HA
|
||||
u'\u0418' # 0xe9 -> CYRILLIC CAPITAL LETTER I
|
||||
u'\u0419' # 0xea -> CYRILLIC CAPITAL LETTER SHORT I
|
||||
u'\u041a' # 0xeb -> CYRILLIC CAPITAL LETTER KA
|
||||
u'\u041b' # 0xec -> CYRILLIC CAPITAL LETTER EL
|
||||
u'\u041c' # 0xed -> CYRILLIC CAPITAL LETTER EM
|
||||
u'\u041d' # 0xee -> CYRILLIC CAPITAL LETTER EN
|
||||
u'\u041e' # 0xef -> CYRILLIC CAPITAL LETTER O
|
||||
u'\u041f' # 0xf0 -> CYRILLIC CAPITAL LETTER PE
|
||||
u'\u042f' # 0xf1 -> CYRILLIC CAPITAL LETTER YA
|
||||
u'\u0420' # 0xf2 -> CYRILLIC CAPITAL LETTER ER
|
||||
u'\u0421' # 0xf3 -> CYRILLIC CAPITAL LETTER ES
|
||||
u'\u0422' # 0xf4 -> CYRILLIC CAPITAL LETTER TE
|
||||
u'\u0423' # 0xf5 -> CYRILLIC CAPITAL LETTER U
|
||||
u'\u0416' # 0xf6 -> CYRILLIC CAPITAL LETTER ZHE
|
||||
u'\u0412' # 0xf7 -> CYRILLIC CAPITAL LETTER VE
|
||||
u'\u042c' # 0xf8 -> CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
u'\u042b' # 0xf9 -> CYRILLIC CAPITAL LETTER YERU
|
||||
u'\u0417' # 0xfa -> CYRILLIC CAPITAL LETTER ZE
|
||||
u'\u0428' # 0xfb -> CYRILLIC CAPITAL LETTER SHA
|
||||
u'\u042d' # 0xfc -> CYRILLIC CAPITAL LETTER E
|
||||
u'\u0429' # 0xfd -> CYRILLIC CAPITAL LETTER SHCHA
|
||||
u'\u0427' # 0xfe -> CYRILLIC CAPITAL LETTER CHE
|
||||
u'\u042a' # 0xff -> CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = codecs.make_encoding_map(decoding_map)
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000a: 0x0a, # LINE FEED
|
||||
0x000b: 0x0b, # VERTICAL TABULATION
|
||||
0x000c: 0x0c, # FORM FEED
|
||||
0x000d: 0x0d, # CARRIAGE RETURN
|
||||
0x000e: 0x0e, # SHIFT OUT
|
||||
0x000f: 0x0f, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001a: 0x1a, # SUBSTITUTE
|
||||
0x001b: 0x1b, # ESCAPE
|
||||
0x001c: 0x1c, # FILE SEPARATOR
|
||||
0x001d: 0x1d, # GROUP SEPARATOR
|
||||
0x001e: 0x1e, # RECORD SEPARATOR
|
||||
0x001f: 0x1f, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002a: 0x2a, # ASTERISK
|
||||
0x002b: 0x2b, # PLUS SIGN
|
||||
0x002c: 0x2c, # COMMA
|
||||
0x002d: 0x2d, # HYPHEN-MINUS
|
||||
0x002e: 0x2e, # FULL STOP
|
||||
0x002f: 0x2f, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003a: 0x3a, # COLON
|
||||
0x003b: 0x3b, # SEMICOLON
|
||||
0x003c: 0x3c, # LESS-THAN SIGN
|
||||
0x003d: 0x3d, # EQUALS SIGN
|
||||
0x003e: 0x3e, # GREATER-THAN SIGN
|
||||
0x003f: 0x3f, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004a: 0x4a, # LATIN CAPITAL LETTER J
|
||||
0x004b: 0x4b, # LATIN CAPITAL LETTER K
|
||||
0x004c: 0x4c, # LATIN CAPITAL LETTER L
|
||||
0x004d: 0x4d, # LATIN CAPITAL LETTER M
|
||||
0x004e: 0x4e, # LATIN CAPITAL LETTER N
|
||||
0x004f: 0x4f, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005a: 0x5a, # LATIN CAPITAL LETTER Z
|
||||
0x005b: 0x5b, # LEFT SQUARE BRACKET
|
||||
0x005c: 0x5c, # REVERSE SOLIDUS
|
||||
0x005d: 0x5d, # RIGHT SQUARE BRACKET
|
||||
0x005e: 0x5e, # CIRCUMFLEX ACCENT
|
||||
0x005f: 0x5f, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006a: 0x6a, # LATIN SMALL LETTER J
|
||||
0x006b: 0x6b, # LATIN SMALL LETTER K
|
||||
0x006c: 0x6c, # LATIN SMALL LETTER L
|
||||
0x006d: 0x6d, # LATIN SMALL LETTER M
|
||||
0x006e: 0x6e, # LATIN SMALL LETTER N
|
||||
0x006f: 0x6f, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007a: 0x7a, # LATIN SMALL LETTER Z
|
||||
0x007b: 0x7b, # LEFT CURLY BRACKET
|
||||
0x007c: 0x7c, # VERTICAL LINE
|
||||
0x007d: 0x7d, # RIGHT CURLY BRACKET
|
||||
0x007e: 0x7e, # TILDE
|
||||
0x007f: 0x7f, # DELETE
|
||||
0x00a0: 0x9a, # NO-BREAK SPACE
|
||||
0x00a9: 0xbf, # COPYRIGHT SIGN
|
||||
0x00b0: 0x9c, # DEGREE SIGN
|
||||
0x00b2: 0x9d, # SUPERSCRIPT TWO
|
||||
0x00b7: 0x9e, # MIDDLE DOT
|
||||
0x00f7: 0x9f, # DIVISION SIGN
|
||||
0x0401: 0xb3, # CYRILLIC CAPITAL LETTER IO
|
||||
0x0404: 0xb4, # CYRILLIC CAPITAL LETTER UKRAINIAN IE
|
||||
0x0406: 0xb6, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x0407: 0xb7, # CYRILLIC CAPITAL LETTER YI (UKRAINIAN)
|
||||
0x0410: 0xe1, # CYRILLIC CAPITAL LETTER A
|
||||
0x0411: 0xe2, # CYRILLIC CAPITAL LETTER BE
|
||||
0x0412: 0xf7, # CYRILLIC CAPITAL LETTER VE
|
||||
0x0413: 0xe7, # CYRILLIC CAPITAL LETTER GHE
|
||||
0x0414: 0xe4, # CYRILLIC CAPITAL LETTER DE
|
||||
0x0415: 0xe5, # CYRILLIC CAPITAL LETTER IE
|
||||
0x0416: 0xf6, # CYRILLIC CAPITAL LETTER ZHE
|
||||
0x0417: 0xfa, # CYRILLIC CAPITAL LETTER ZE
|
||||
0x0418: 0xe9, # CYRILLIC CAPITAL LETTER I
|
||||
0x0419: 0xea, # CYRILLIC CAPITAL LETTER SHORT I
|
||||
0x041a: 0xeb, # CYRILLIC CAPITAL LETTER KA
|
||||
0x041b: 0xec, # CYRILLIC CAPITAL LETTER EL
|
||||
0x041c: 0xed, # CYRILLIC CAPITAL LETTER EM
|
||||
0x041d: 0xee, # CYRILLIC CAPITAL LETTER EN
|
||||
0x041e: 0xef, # CYRILLIC CAPITAL LETTER O
|
||||
0x041f: 0xf0, # CYRILLIC CAPITAL LETTER PE
|
||||
0x0420: 0xf2, # CYRILLIC CAPITAL LETTER ER
|
||||
0x0421: 0xf3, # CYRILLIC CAPITAL LETTER ES
|
||||
0x0422: 0xf4, # CYRILLIC CAPITAL LETTER TE
|
||||
0x0423: 0xf5, # CYRILLIC CAPITAL LETTER U
|
||||
0x0424: 0xe6, # CYRILLIC CAPITAL LETTER EF
|
||||
0x0425: 0xe8, # CYRILLIC CAPITAL LETTER HA
|
||||
0x0426: 0xe3, # CYRILLIC CAPITAL LETTER TSE
|
||||
0x0427: 0xfe, # CYRILLIC CAPITAL LETTER CHE
|
||||
0x0428: 0xfb, # CYRILLIC CAPITAL LETTER SHA
|
||||
0x0429: 0xfd, # CYRILLIC CAPITAL LETTER SHCHA
|
||||
0x042a: 0xff, # CYRILLIC CAPITAL LETTER HARD SIGN
|
||||
0x042b: 0xf9, # CYRILLIC CAPITAL LETTER YERU
|
||||
0x042c: 0xf8, # CYRILLIC CAPITAL LETTER SOFT SIGN
|
||||
0x042d: 0xfc, # CYRILLIC CAPITAL LETTER E
|
||||
0x042e: 0xe0, # CYRILLIC CAPITAL LETTER YU
|
||||
0x042f: 0xf1, # CYRILLIC CAPITAL LETTER YA
|
||||
0x0430: 0xc1, # CYRILLIC SMALL LETTER A
|
||||
0x0431: 0xc2, # CYRILLIC SMALL LETTER BE
|
||||
0x0432: 0xd7, # CYRILLIC SMALL LETTER VE
|
||||
0x0433: 0xc7, # CYRILLIC SMALL LETTER GHE
|
||||
0x0434: 0xc4, # CYRILLIC SMALL LETTER DE
|
||||
0x0435: 0xc5, # CYRILLIC SMALL LETTER IE
|
||||
0x0436: 0xd6, # CYRILLIC SMALL LETTER ZHE
|
||||
0x0437: 0xda, # CYRILLIC SMALL LETTER ZE
|
||||
0x0438: 0xc9, # CYRILLIC SMALL LETTER I
|
||||
0x0439: 0xca, # CYRILLIC SMALL LETTER SHORT I
|
||||
0x043a: 0xcb, # CYRILLIC SMALL LETTER KA
|
||||
0x043b: 0xcc, # CYRILLIC SMALL LETTER EL
|
||||
0x043c: 0xcd, # CYRILLIC SMALL LETTER EM
|
||||
0x043d: 0xce, # CYRILLIC SMALL LETTER EN
|
||||
0x043e: 0xcf, # CYRILLIC SMALL LETTER O
|
||||
0x043f: 0xd0, # CYRILLIC SMALL LETTER PE
|
||||
0x0440: 0xd2, # CYRILLIC SMALL LETTER ER
|
||||
0x0441: 0xd3, # CYRILLIC SMALL LETTER ES
|
||||
0x0442: 0xd4, # CYRILLIC SMALL LETTER TE
|
||||
0x0443: 0xd5, # CYRILLIC SMALL LETTER U
|
||||
0x0444: 0xc6, # CYRILLIC SMALL LETTER EF
|
||||
0x0445: 0xc8, # CYRILLIC SMALL LETTER HA
|
||||
0x0446: 0xc3, # CYRILLIC SMALL LETTER TSE
|
||||
0x0447: 0xde, # CYRILLIC SMALL LETTER CHE
|
||||
0x0448: 0xdb, # CYRILLIC SMALL LETTER SHA
|
||||
0x0449: 0xdd, # CYRILLIC SMALL LETTER SHCHA
|
||||
0x044a: 0xdf, # CYRILLIC SMALL LETTER HARD SIGN
|
||||
0x044b: 0xd9, # CYRILLIC SMALL LETTER YERU
|
||||
0x044c: 0xd8, # CYRILLIC SMALL LETTER SOFT SIGN
|
||||
0x044d: 0xdc, # CYRILLIC SMALL LETTER E
|
||||
0x044e: 0xc0, # CYRILLIC SMALL LETTER YU
|
||||
0x044f: 0xd1, # CYRILLIC SMALL LETTER YA
|
||||
0x0451: 0xa3, # CYRILLIC SMALL LETTER IO
|
||||
0x0454: 0xa4, # CYRILLIC SMALL LETTER UKRAINIAN IE
|
||||
0x0456: 0xa6, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
|
||||
0x0457: 0xa7, # CYRILLIC SMALL LETTER YI (UKRAINIAN)
|
||||
0x0490: 0xbd, # CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN
|
||||
0x0491: 0xad, # CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN
|
||||
0x2219: 0x95, # BULLET OPERATOR
|
||||
0x221a: 0x96, # SQUARE ROOT
|
||||
0x2248: 0x97, # ALMOST EQUAL TO
|
||||
0x2264: 0x98, # LESS-THAN OR EQUAL TO
|
||||
0x2265: 0x99, # GREATER-THAN OR EQUAL TO
|
||||
0x2320: 0x93, # TOP HALF INTEGRAL
|
||||
0x2321: 0x9b, # BOTTOM HALF INTEGRAL
|
||||
0x2500: 0x80, # BOX DRAWINGS LIGHT HORIZONTAL
|
||||
0x2502: 0x81, # BOX DRAWINGS LIGHT VERTICAL
|
||||
0x250c: 0x82, # BOX DRAWINGS LIGHT DOWN AND RIGHT
|
||||
0x2510: 0x83, # BOX DRAWINGS LIGHT DOWN AND LEFT
|
||||
0x2514: 0x84, # BOX DRAWINGS LIGHT UP AND RIGHT
|
||||
0x2518: 0x85, # BOX DRAWINGS LIGHT UP AND LEFT
|
||||
0x251c: 0x86, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT
|
||||
0x2524: 0x87, # BOX DRAWINGS LIGHT VERTICAL AND LEFT
|
||||
0x252c: 0x88, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
|
||||
0x2534: 0x89, # BOX DRAWINGS LIGHT UP AND HORIZONTAL
|
||||
0x253c: 0x8a, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
|
||||
0x2550: 0xa0, # BOX DRAWINGS DOUBLE HORIZONTAL
|
||||
0x2551: 0xa1, # BOX DRAWINGS DOUBLE VERTICAL
|
||||
0x2552: 0xa2, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
|
||||
0x2554: 0xa5, # BOX DRAWINGS DOUBLE DOWN AND RIGHT
|
||||
0x2557: 0xa8, # BOX DRAWINGS DOUBLE DOWN AND LEFT
|
||||
0x2558: 0xa9, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
|
||||
0x2559: 0xaa, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
|
||||
0x255a: 0xab, # BOX DRAWINGS DOUBLE UP AND RIGHT
|
||||
0x255b: 0xac, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
|
||||
0x255d: 0xae, # BOX DRAWINGS DOUBLE UP AND LEFT
|
||||
0x255e: 0xaf, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
|
||||
0x255f: 0xb0, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
|
||||
0x2560: 0xb1, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
|
||||
0x2561: 0xb2, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
|
||||
0x2563: 0xb5, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT
|
||||
0x2566: 0xb8, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
|
||||
0x2567: 0xb9, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
|
||||
0x2568: 0xba, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
|
||||
0x2569: 0xbb, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL
|
||||
0x256a: 0xbc, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
|
||||
0x256c: 0xbe, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
|
||||
0x2580: 0x8b, # UPPER HALF BLOCK
|
||||
0x2584: 0x8c, # LOWER HALF BLOCK
|
||||
0x2588: 0x8d, # FULL BLOCK
|
||||
0x258c: 0x8e, # LEFT HALF BLOCK
|
||||
0x2590: 0x8f, # RIGHT HALF BLOCK
|
||||
0x2591: 0x90, # LIGHT SHADE
|
||||
0x2592: 0x91, # MEDIUM SHADE
|
||||
0x2593: 0x92, # DARK SHADE
|
||||
0x25a0: 0x94, # BLACK SQUARE
|
||||
}
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,14 +1,8 @@
|
|||
""" Python Character Mapping Codec for TIS-620.
|
||||
|
||||
According to
|
||||
ftp://ftp.unicode.org/Public/MAPPINGS/ISO8859/8859-11.TXT the
|
||||
TIS-620 is the identical to ISO_8859-11 with the 0xA0 (no-break
|
||||
space) mapping removed.
|
||||
""" Python Character Mapping Codec generated from 'python-mappings/TIS-620.TXT' with gencodec.py.
|
||||
|
||||
"""#"
|
||||
|
||||
import codecs
|
||||
from encodings.iso8859_11 import decoding_map
|
||||
|
||||
### Codec APIs
|
||||
|
||||
|
@ -20,8 +14,8 @@ def encode(self,input,errors='strict'):
|
|||
|
||||
def decode(self,input,errors='strict'):
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_map)
|
||||
|
||||
return codecs.charmap_decode(input,errors,decoding_table)
|
||||
|
||||
class StreamWriter(Codec,codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
|
@ -34,13 +28,516 @@ def getregentry():
|
|||
|
||||
return (Codec().encode,Codec().decode,StreamReader,StreamWriter)
|
||||
|
||||
### Decoding Map
|
||||
|
||||
decoding_map = decoding_map.copy()
|
||||
decoding_map.update({
|
||||
0x00a0: None,
|
||||
})
|
||||
### Decoding Table
|
||||
|
||||
decoding_table = (
|
||||
u'\x00' # 0x00 -> NULL
|
||||
u'\x01' # 0x01 -> START OF HEADING
|
||||
u'\x02' # 0x02 -> START OF TEXT
|
||||
u'\x03' # 0x03 -> END OF TEXT
|
||||
u'\x04' # 0x04 -> END OF TRANSMISSION
|
||||
u'\x05' # 0x05 -> ENQUIRY
|
||||
u'\x06' # 0x06 -> ACKNOWLEDGE
|
||||
u'\x07' # 0x07 -> BELL
|
||||
u'\x08' # 0x08 -> BACKSPACE
|
||||
u'\t' # 0x09 -> HORIZONTAL TABULATION
|
||||
u'\n' # 0x0a -> LINE FEED
|
||||
u'\x0b' # 0x0b -> VERTICAL TABULATION
|
||||
u'\x0c' # 0x0c -> FORM FEED
|
||||
u'\r' # 0x0d -> CARRIAGE RETURN
|
||||
u'\x0e' # 0x0e -> SHIFT OUT
|
||||
u'\x0f' # 0x0f -> SHIFT IN
|
||||
u'\x10' # 0x10 -> DATA LINK ESCAPE
|
||||
u'\x11' # 0x11 -> DEVICE CONTROL ONE
|
||||
u'\x12' # 0x12 -> DEVICE CONTROL TWO
|
||||
u'\x13' # 0x13 -> DEVICE CONTROL THREE
|
||||
u'\x14' # 0x14 -> DEVICE CONTROL FOUR
|
||||
u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE
|
||||
u'\x16' # 0x16 -> SYNCHRONOUS IDLE
|
||||
u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK
|
||||
u'\x18' # 0x18 -> CANCEL
|
||||
u'\x19' # 0x19 -> END OF MEDIUM
|
||||
u'\x1a' # 0x1a -> SUBSTITUTE
|
||||
u'\x1b' # 0x1b -> ESCAPE
|
||||
u'\x1c' # 0x1c -> FILE SEPARATOR
|
||||
u'\x1d' # 0x1d -> GROUP SEPARATOR
|
||||
u'\x1e' # 0x1e -> RECORD SEPARATOR
|
||||
u'\x1f' # 0x1f -> UNIT SEPARATOR
|
||||
u' ' # 0x20 -> SPACE
|
||||
u'!' # 0x21 -> EXCLAMATION MARK
|
||||
u'"' # 0x22 -> QUOTATION MARK
|
||||
u'#' # 0x23 -> NUMBER SIGN
|
||||
u'$' # 0x24 -> DOLLAR SIGN
|
||||
u'%' # 0x25 -> PERCENT SIGN
|
||||
u'&' # 0x26 -> AMPERSAND
|
||||
u"'" # 0x27 -> APOSTROPHE
|
||||
u'(' # 0x28 -> LEFT PARENTHESIS
|
||||
u')' # 0x29 -> RIGHT PARENTHESIS
|
||||
u'*' # 0x2a -> ASTERISK
|
||||
u'+' # 0x2b -> PLUS SIGN
|
||||
u',' # 0x2c -> COMMA
|
||||
u'-' # 0x2d -> HYPHEN-MINUS
|
||||
u'.' # 0x2e -> FULL STOP
|
||||
u'/' # 0x2f -> SOLIDUS
|
||||
u'0' # 0x30 -> DIGIT ZERO
|
||||
u'1' # 0x31 -> DIGIT ONE
|
||||
u'2' # 0x32 -> DIGIT TWO
|
||||
u'3' # 0x33 -> DIGIT THREE
|
||||
u'4' # 0x34 -> DIGIT FOUR
|
||||
u'5' # 0x35 -> DIGIT FIVE
|
||||
u'6' # 0x36 -> DIGIT SIX
|
||||
u'7' # 0x37 -> DIGIT SEVEN
|
||||
u'8' # 0x38 -> DIGIT EIGHT
|
||||
u'9' # 0x39 -> DIGIT NINE
|
||||
u':' # 0x3a -> COLON
|
||||
u';' # 0x3b -> SEMICOLON
|
||||
u'<' # 0x3c -> LESS-THAN SIGN
|
||||
u'=' # 0x3d -> EQUALS SIGN
|
||||
u'>' # 0x3e -> GREATER-THAN SIGN
|
||||
u'?' # 0x3f -> QUESTION MARK
|
||||
u'@' # 0x40 -> COMMERCIAL AT
|
||||
u'A' # 0x41 -> LATIN CAPITAL LETTER A
|
||||
u'B' # 0x42 -> LATIN CAPITAL LETTER B
|
||||
u'C' # 0x43 -> LATIN CAPITAL LETTER C
|
||||
u'D' # 0x44 -> LATIN CAPITAL LETTER D
|
||||
u'E' # 0x45 -> LATIN CAPITAL LETTER E
|
||||
u'F' # 0x46 -> LATIN CAPITAL LETTER F
|
||||
u'G' # 0x47 -> LATIN CAPITAL LETTER G
|
||||
u'H' # 0x48 -> LATIN CAPITAL LETTER H
|
||||
u'I' # 0x49 -> LATIN CAPITAL LETTER I
|
||||
u'J' # 0x4a -> LATIN CAPITAL LETTER J
|
||||
u'K' # 0x4b -> LATIN CAPITAL LETTER K
|
||||
u'L' # 0x4c -> LATIN CAPITAL LETTER L
|
||||
u'M' # 0x4d -> LATIN CAPITAL LETTER M
|
||||
u'N' # 0x4e -> LATIN CAPITAL LETTER N
|
||||
u'O' # 0x4f -> LATIN CAPITAL LETTER O
|
||||
u'P' # 0x50 -> LATIN CAPITAL LETTER P
|
||||
u'Q' # 0x51 -> LATIN CAPITAL LETTER Q
|
||||
u'R' # 0x52 -> LATIN CAPITAL LETTER R
|
||||
u'S' # 0x53 -> LATIN CAPITAL LETTER S
|
||||
u'T' # 0x54 -> LATIN CAPITAL LETTER T
|
||||
u'U' # 0x55 -> LATIN CAPITAL LETTER U
|
||||
u'V' # 0x56 -> LATIN CAPITAL LETTER V
|
||||
u'W' # 0x57 -> LATIN CAPITAL LETTER W
|
||||
u'X' # 0x58 -> LATIN CAPITAL LETTER X
|
||||
u'Y' # 0x59 -> LATIN CAPITAL LETTER Y
|
||||
u'Z' # 0x5a -> LATIN CAPITAL LETTER Z
|
||||
u'[' # 0x5b -> LEFT SQUARE BRACKET
|
||||
u'\\' # 0x5c -> REVERSE SOLIDUS
|
||||
u']' # 0x5d -> RIGHT SQUARE BRACKET
|
||||
u'^' # 0x5e -> CIRCUMFLEX ACCENT
|
||||
u'_' # 0x5f -> LOW LINE
|
||||
u'`' # 0x60 -> GRAVE ACCENT
|
||||
u'a' # 0x61 -> LATIN SMALL LETTER A
|
||||
u'b' # 0x62 -> LATIN SMALL LETTER B
|
||||
u'c' # 0x63 -> LATIN SMALL LETTER C
|
||||
u'd' # 0x64 -> LATIN SMALL LETTER D
|
||||
u'e' # 0x65 -> LATIN SMALL LETTER E
|
||||
u'f' # 0x66 -> LATIN SMALL LETTER F
|
||||
u'g' # 0x67 -> LATIN SMALL LETTER G
|
||||
u'h' # 0x68 -> LATIN SMALL LETTER H
|
||||
u'i' # 0x69 -> LATIN SMALL LETTER I
|
||||
u'j' # 0x6a -> LATIN SMALL LETTER J
|
||||
u'k' # 0x6b -> LATIN SMALL LETTER K
|
||||
u'l' # 0x6c -> LATIN SMALL LETTER L
|
||||
u'm' # 0x6d -> LATIN SMALL LETTER M
|
||||
u'n' # 0x6e -> LATIN SMALL LETTER N
|
||||
u'o' # 0x6f -> LATIN SMALL LETTER O
|
||||
u'p' # 0x70 -> LATIN SMALL LETTER P
|
||||
u'q' # 0x71 -> LATIN SMALL LETTER Q
|
||||
u'r' # 0x72 -> LATIN SMALL LETTER R
|
||||
u's' # 0x73 -> LATIN SMALL LETTER S
|
||||
u't' # 0x74 -> LATIN SMALL LETTER T
|
||||
u'u' # 0x75 -> LATIN SMALL LETTER U
|
||||
u'v' # 0x76 -> LATIN SMALL LETTER V
|
||||
u'w' # 0x77 -> LATIN SMALL LETTER W
|
||||
u'x' # 0x78 -> LATIN SMALL LETTER X
|
||||
u'y' # 0x79 -> LATIN SMALL LETTER Y
|
||||
u'z' # 0x7a -> LATIN SMALL LETTER Z
|
||||
u'{' # 0x7b -> LEFT CURLY BRACKET
|
||||
u'|' # 0x7c -> VERTICAL LINE
|
||||
u'}' # 0x7d -> RIGHT CURLY BRACKET
|
||||
u'~' # 0x7e -> TILDE
|
||||
u'\x7f' # 0x7f -> DELETE
|
||||
u'\x80' # 0x80 -> <control>
|
||||
u'\x81' # 0x81 -> <control>
|
||||
u'\x82' # 0x82 -> <control>
|
||||
u'\x83' # 0x83 -> <control>
|
||||
u'\x84' # 0x84 -> <control>
|
||||
u'\x85' # 0x85 -> <control>
|
||||
u'\x86' # 0x86 -> <control>
|
||||
u'\x87' # 0x87 -> <control>
|
||||
u'\x88' # 0x88 -> <control>
|
||||
u'\x89' # 0x89 -> <control>
|
||||
u'\x8a' # 0x8a -> <control>
|
||||
u'\x8b' # 0x8b -> <control>
|
||||
u'\x8c' # 0x8c -> <control>
|
||||
u'\x8d' # 0x8d -> <control>
|
||||
u'\x8e' # 0x8e -> <control>
|
||||
u'\x8f' # 0x8f -> <control>
|
||||
u'\x90' # 0x90 -> <control>
|
||||
u'\x91' # 0x91 -> <control>
|
||||
u'\x92' # 0x92 -> <control>
|
||||
u'\x93' # 0x93 -> <control>
|
||||
u'\x94' # 0x94 -> <control>
|
||||
u'\x95' # 0x95 -> <control>
|
||||
u'\x96' # 0x96 -> <control>
|
||||
u'\x97' # 0x97 -> <control>
|
||||
u'\x98' # 0x98 -> <control>
|
||||
u'\x99' # 0x99 -> <control>
|
||||
u'\x9a' # 0x9a -> <control>
|
||||
u'\x9b' # 0x9b -> <control>
|
||||
u'\x9c' # 0x9c -> <control>
|
||||
u'\x9d' # 0x9d -> <control>
|
||||
u'\x9e' # 0x9e -> <control>
|
||||
u'\x9f' # 0x9f -> <control>
|
||||
u'\ufffe'
|
||||
u'\u0e01' # 0xa1 -> THAI CHARACTER KO KAI
|
||||
u'\u0e02' # 0xa2 -> THAI CHARACTER KHO KHAI
|
||||
u'\u0e03' # 0xa3 -> THAI CHARACTER KHO KHUAT
|
||||
u'\u0e04' # 0xa4 -> THAI CHARACTER KHO KHWAI
|
||||
u'\u0e05' # 0xa5 -> THAI CHARACTER KHO KHON
|
||||
u'\u0e06' # 0xa6 -> THAI CHARACTER KHO RAKHANG
|
||||
u'\u0e07' # 0xa7 -> THAI CHARACTER NGO NGU
|
||||
u'\u0e08' # 0xa8 -> THAI CHARACTER CHO CHAN
|
||||
u'\u0e09' # 0xa9 -> THAI CHARACTER CHO CHING
|
||||
u'\u0e0a' # 0xaa -> THAI CHARACTER CHO CHANG
|
||||
u'\u0e0b' # 0xab -> THAI CHARACTER SO SO
|
||||
u'\u0e0c' # 0xac -> THAI CHARACTER CHO CHOE
|
||||
u'\u0e0d' # 0xad -> THAI CHARACTER YO YING
|
||||
u'\u0e0e' # 0xae -> THAI CHARACTER DO CHADA
|
||||
u'\u0e0f' # 0xaf -> THAI CHARACTER TO PATAK
|
||||
u'\u0e10' # 0xb0 -> THAI CHARACTER THO THAN
|
||||
u'\u0e11' # 0xb1 -> THAI CHARACTER THO NANGMONTHO
|
||||
u'\u0e12' # 0xb2 -> THAI CHARACTER THO PHUTHAO
|
||||
u'\u0e13' # 0xb3 -> THAI CHARACTER NO NEN
|
||||
u'\u0e14' # 0xb4 -> THAI CHARACTER DO DEK
|
||||
u'\u0e15' # 0xb5 -> THAI CHARACTER TO TAO
|
||||
u'\u0e16' # 0xb6 -> THAI CHARACTER THO THUNG
|
||||
u'\u0e17' # 0xb7 -> THAI CHARACTER THO THAHAN
|
||||
u'\u0e18' # 0xb8 -> THAI CHARACTER THO THONG
|
||||
u'\u0e19' # 0xb9 -> THAI CHARACTER NO NU
|
||||
u'\u0e1a' # 0xba -> THAI CHARACTER BO BAIMAI
|
||||
u'\u0e1b' # 0xbb -> THAI CHARACTER PO PLA
|
||||
u'\u0e1c' # 0xbc -> THAI CHARACTER PHO PHUNG
|
||||
u'\u0e1d' # 0xbd -> THAI CHARACTER FO FA
|
||||
u'\u0e1e' # 0xbe -> THAI CHARACTER PHO PHAN
|
||||
u'\u0e1f' # 0xbf -> THAI CHARACTER FO FAN
|
||||
u'\u0e20' # 0xc0 -> THAI CHARACTER PHO SAMPHAO
|
||||
u'\u0e21' # 0xc1 -> THAI CHARACTER MO MA
|
||||
u'\u0e22' # 0xc2 -> THAI CHARACTER YO YAK
|
||||
u'\u0e23' # 0xc3 -> THAI CHARACTER RO RUA
|
||||
u'\u0e24' # 0xc4 -> THAI CHARACTER RU
|
||||
u'\u0e25' # 0xc5 -> THAI CHARACTER LO LING
|
||||
u'\u0e26' # 0xc6 -> THAI CHARACTER LU
|
||||
u'\u0e27' # 0xc7 -> THAI CHARACTER WO WAEN
|
||||
u'\u0e28' # 0xc8 -> THAI CHARACTER SO SALA
|
||||
u'\u0e29' # 0xc9 -> THAI CHARACTER SO RUSI
|
||||
u'\u0e2a' # 0xca -> THAI CHARACTER SO SUA
|
||||
u'\u0e2b' # 0xcb -> THAI CHARACTER HO HIP
|
||||
u'\u0e2c' # 0xcc -> THAI CHARACTER LO CHULA
|
||||
u'\u0e2d' # 0xcd -> THAI CHARACTER O ANG
|
||||
u'\u0e2e' # 0xce -> THAI CHARACTER HO NOKHUK
|
||||
u'\u0e2f' # 0xcf -> THAI CHARACTER PAIYANNOI
|
||||
u'\u0e30' # 0xd0 -> THAI CHARACTER SARA A
|
||||
u'\u0e31' # 0xd1 -> THAI CHARACTER MAI HAN-AKAT
|
||||
u'\u0e32' # 0xd2 -> THAI CHARACTER SARA AA
|
||||
u'\u0e33' # 0xd3 -> THAI CHARACTER SARA AM
|
||||
u'\u0e34' # 0xd4 -> THAI CHARACTER SARA I
|
||||
u'\u0e35' # 0xd5 -> THAI CHARACTER SARA II
|
||||
u'\u0e36' # 0xd6 -> THAI CHARACTER SARA UE
|
||||
u'\u0e37' # 0xd7 -> THAI CHARACTER SARA UEE
|
||||
u'\u0e38' # 0xd8 -> THAI CHARACTER SARA U
|
||||
u'\u0e39' # 0xd9 -> THAI CHARACTER SARA UU
|
||||
u'\u0e3a' # 0xda -> THAI CHARACTER PHINTHU
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\u0e3f' # 0xdf -> THAI CURRENCY SYMBOL BAHT
|
||||
u'\u0e40' # 0xe0 -> THAI CHARACTER SARA E
|
||||
u'\u0e41' # 0xe1 -> THAI CHARACTER SARA AE
|
||||
u'\u0e42' # 0xe2 -> THAI CHARACTER SARA O
|
||||
u'\u0e43' # 0xe3 -> THAI CHARACTER SARA AI MAIMUAN
|
||||
u'\u0e44' # 0xe4 -> THAI CHARACTER SARA AI MAIMALAI
|
||||
u'\u0e45' # 0xe5 -> THAI CHARACTER LAKKHANGYAO
|
||||
u'\u0e46' # 0xe6 -> THAI CHARACTER MAIYAMOK
|
||||
u'\u0e47' # 0xe7 -> THAI CHARACTER MAITAIKHU
|
||||
u'\u0e48' # 0xe8 -> THAI CHARACTER MAI EK
|
||||
u'\u0e49' # 0xe9 -> THAI CHARACTER MAI THO
|
||||
u'\u0e4a' # 0xea -> THAI CHARACTER MAI TRI
|
||||
u'\u0e4b' # 0xeb -> THAI CHARACTER MAI CHATTAWA
|
||||
u'\u0e4c' # 0xec -> THAI CHARACTER THANTHAKHAT
|
||||
u'\u0e4d' # 0xed -> THAI CHARACTER NIKHAHIT
|
||||
u'\u0e4e' # 0xee -> THAI CHARACTER YAMAKKAN
|
||||
u'\u0e4f' # 0xef -> THAI CHARACTER FONGMAN
|
||||
u'\u0e50' # 0xf0 -> THAI DIGIT ZERO
|
||||
u'\u0e51' # 0xf1 -> THAI DIGIT ONE
|
||||
u'\u0e52' # 0xf2 -> THAI DIGIT TWO
|
||||
u'\u0e53' # 0xf3 -> THAI DIGIT THREE
|
||||
u'\u0e54' # 0xf4 -> THAI DIGIT FOUR
|
||||
u'\u0e55' # 0xf5 -> THAI DIGIT FIVE
|
||||
u'\u0e56' # 0xf6 -> THAI DIGIT SIX
|
||||
u'\u0e57' # 0xf7 -> THAI DIGIT SEVEN
|
||||
u'\u0e58' # 0xf8 -> THAI DIGIT EIGHT
|
||||
u'\u0e59' # 0xf9 -> THAI DIGIT NINE
|
||||
u'\u0e5a' # 0xfa -> THAI CHARACTER ANGKHANKHU
|
||||
u'\u0e5b' # 0xfb -> THAI CHARACTER KHOMUT
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
u'\ufffe'
|
||||
)
|
||||
|
||||
### Encoding Map
|
||||
|
||||
encoding_map = codecs.make_encoding_map(decoding_map)
|
||||
encoding_map = {
|
||||
0x0000: 0x00, # NULL
|
||||
0x0001: 0x01, # START OF HEADING
|
||||
0x0002: 0x02, # START OF TEXT
|
||||
0x0003: 0x03, # END OF TEXT
|
||||
0x0004: 0x04, # END OF TRANSMISSION
|
||||
0x0005: 0x05, # ENQUIRY
|
||||
0x0006: 0x06, # ACKNOWLEDGE
|
||||
0x0007: 0x07, # BELL
|
||||
0x0008: 0x08, # BACKSPACE
|
||||
0x0009: 0x09, # HORIZONTAL TABULATION
|
||||
0x000a: 0x0a, # LINE FEED
|
||||
0x000b: 0x0b, # VERTICAL TABULATION
|
||||
0x000c: 0x0c, # FORM FEED
|
||||
0x000d: 0x0d, # CARRIAGE RETURN
|
||||
0x000e: 0x0e, # SHIFT OUT
|
||||
0x000f: 0x0f, # SHIFT IN
|
||||
0x0010: 0x10, # DATA LINK ESCAPE
|
||||
0x0011: 0x11, # DEVICE CONTROL ONE
|
||||
0x0012: 0x12, # DEVICE CONTROL TWO
|
||||
0x0013: 0x13, # DEVICE CONTROL THREE
|
||||
0x0014: 0x14, # DEVICE CONTROL FOUR
|
||||
0x0015: 0x15, # NEGATIVE ACKNOWLEDGE
|
||||
0x0016: 0x16, # SYNCHRONOUS IDLE
|
||||
0x0017: 0x17, # END OF TRANSMISSION BLOCK
|
||||
0x0018: 0x18, # CANCEL
|
||||
0x0019: 0x19, # END OF MEDIUM
|
||||
0x001a: 0x1a, # SUBSTITUTE
|
||||
0x001b: 0x1b, # ESCAPE
|
||||
0x001c: 0x1c, # FILE SEPARATOR
|
||||
0x001d: 0x1d, # GROUP SEPARATOR
|
||||
0x001e: 0x1e, # RECORD SEPARATOR
|
||||
0x001f: 0x1f, # UNIT SEPARATOR
|
||||
0x0020: 0x20, # SPACE
|
||||
0x0021: 0x21, # EXCLAMATION MARK
|
||||
0x0022: 0x22, # QUOTATION MARK
|
||||
0x0023: 0x23, # NUMBER SIGN
|
||||
0x0024: 0x24, # DOLLAR SIGN
|
||||
0x0025: 0x25, # PERCENT SIGN
|
||||
0x0026: 0x26, # AMPERSAND
|
||||
0x0027: 0x27, # APOSTROPHE
|
||||
0x0028: 0x28, # LEFT PARENTHESIS
|
||||
0x0029: 0x29, # RIGHT PARENTHESIS
|
||||
0x002a: 0x2a, # ASTERISK
|
||||
0x002b: 0x2b, # PLUS SIGN
|
||||
0x002c: 0x2c, # COMMA
|
||||
0x002d: 0x2d, # HYPHEN-MINUS
|
||||
0x002e: 0x2e, # FULL STOP
|
||||
0x002f: 0x2f, # SOLIDUS
|
||||
0x0030: 0x30, # DIGIT ZERO
|
||||
0x0031: 0x31, # DIGIT ONE
|
||||
0x0032: 0x32, # DIGIT TWO
|
||||
0x0033: 0x33, # DIGIT THREE
|
||||
0x0034: 0x34, # DIGIT FOUR
|
||||
0x0035: 0x35, # DIGIT FIVE
|
||||
0x0036: 0x36, # DIGIT SIX
|
||||
0x0037: 0x37, # DIGIT SEVEN
|
||||
0x0038: 0x38, # DIGIT EIGHT
|
||||
0x0039: 0x39, # DIGIT NINE
|
||||
0x003a: 0x3a, # COLON
|
||||
0x003b: 0x3b, # SEMICOLON
|
||||
0x003c: 0x3c, # LESS-THAN SIGN
|
||||
0x003d: 0x3d, # EQUALS SIGN
|
||||
0x003e: 0x3e, # GREATER-THAN SIGN
|
||||
0x003f: 0x3f, # QUESTION MARK
|
||||
0x0040: 0x40, # COMMERCIAL AT
|
||||
0x0041: 0x41, # LATIN CAPITAL LETTER A
|
||||
0x0042: 0x42, # LATIN CAPITAL LETTER B
|
||||
0x0043: 0x43, # LATIN CAPITAL LETTER C
|
||||
0x0044: 0x44, # LATIN CAPITAL LETTER D
|
||||
0x0045: 0x45, # LATIN CAPITAL LETTER E
|
||||
0x0046: 0x46, # LATIN CAPITAL LETTER F
|
||||
0x0047: 0x47, # LATIN CAPITAL LETTER G
|
||||
0x0048: 0x48, # LATIN CAPITAL LETTER H
|
||||
0x0049: 0x49, # LATIN CAPITAL LETTER I
|
||||
0x004a: 0x4a, # LATIN CAPITAL LETTER J
|
||||
0x004b: 0x4b, # LATIN CAPITAL LETTER K
|
||||
0x004c: 0x4c, # LATIN CAPITAL LETTER L
|
||||
0x004d: 0x4d, # LATIN CAPITAL LETTER M
|
||||
0x004e: 0x4e, # LATIN CAPITAL LETTER N
|
||||
0x004f: 0x4f, # LATIN CAPITAL LETTER O
|
||||
0x0050: 0x50, # LATIN CAPITAL LETTER P
|
||||
0x0051: 0x51, # LATIN CAPITAL LETTER Q
|
||||
0x0052: 0x52, # LATIN CAPITAL LETTER R
|
||||
0x0053: 0x53, # LATIN CAPITAL LETTER S
|
||||
0x0054: 0x54, # LATIN CAPITAL LETTER T
|
||||
0x0055: 0x55, # LATIN CAPITAL LETTER U
|
||||
0x0056: 0x56, # LATIN CAPITAL LETTER V
|
||||
0x0057: 0x57, # LATIN CAPITAL LETTER W
|
||||
0x0058: 0x58, # LATIN CAPITAL LETTER X
|
||||
0x0059: 0x59, # LATIN CAPITAL LETTER Y
|
||||
0x005a: 0x5a, # LATIN CAPITAL LETTER Z
|
||||
0x005b: 0x5b, # LEFT SQUARE BRACKET
|
||||
0x005c: 0x5c, # REVERSE SOLIDUS
|
||||
0x005d: 0x5d, # RIGHT SQUARE BRACKET
|
||||
0x005e: 0x5e, # CIRCUMFLEX ACCENT
|
||||
0x005f: 0x5f, # LOW LINE
|
||||
0x0060: 0x60, # GRAVE ACCENT
|
||||
0x0061: 0x61, # LATIN SMALL LETTER A
|
||||
0x0062: 0x62, # LATIN SMALL LETTER B
|
||||
0x0063: 0x63, # LATIN SMALL LETTER C
|
||||
0x0064: 0x64, # LATIN SMALL LETTER D
|
||||
0x0065: 0x65, # LATIN SMALL LETTER E
|
||||
0x0066: 0x66, # LATIN SMALL LETTER F
|
||||
0x0067: 0x67, # LATIN SMALL LETTER G
|
||||
0x0068: 0x68, # LATIN SMALL LETTER H
|
||||
0x0069: 0x69, # LATIN SMALL LETTER I
|
||||
0x006a: 0x6a, # LATIN SMALL LETTER J
|
||||
0x006b: 0x6b, # LATIN SMALL LETTER K
|
||||
0x006c: 0x6c, # LATIN SMALL LETTER L
|
||||
0x006d: 0x6d, # LATIN SMALL LETTER M
|
||||
0x006e: 0x6e, # LATIN SMALL LETTER N
|
||||
0x006f: 0x6f, # LATIN SMALL LETTER O
|
||||
0x0070: 0x70, # LATIN SMALL LETTER P
|
||||
0x0071: 0x71, # LATIN SMALL LETTER Q
|
||||
0x0072: 0x72, # LATIN SMALL LETTER R
|
||||
0x0073: 0x73, # LATIN SMALL LETTER S
|
||||
0x0074: 0x74, # LATIN SMALL LETTER T
|
||||
0x0075: 0x75, # LATIN SMALL LETTER U
|
||||
0x0076: 0x76, # LATIN SMALL LETTER V
|
||||
0x0077: 0x77, # LATIN SMALL LETTER W
|
||||
0x0078: 0x78, # LATIN SMALL LETTER X
|
||||
0x0079: 0x79, # LATIN SMALL LETTER Y
|
||||
0x007a: 0x7a, # LATIN SMALL LETTER Z
|
||||
0x007b: 0x7b, # LEFT CURLY BRACKET
|
||||
0x007c: 0x7c, # VERTICAL LINE
|
||||
0x007d: 0x7d, # RIGHT CURLY BRACKET
|
||||
0x007e: 0x7e, # TILDE
|
||||
0x007f: 0x7f, # DELETE
|
||||
0x0080: 0x80, # <control>
|
||||
0x0081: 0x81, # <control>
|
||||
0x0082: 0x82, # <control>
|
||||
0x0083: 0x83, # <control>
|
||||
0x0084: 0x84, # <control>
|
||||
0x0085: 0x85, # <control>
|
||||
0x0086: 0x86, # <control>
|
||||
0x0087: 0x87, # <control>
|
||||
0x0088: 0x88, # <control>
|
||||
0x0089: 0x89, # <control>
|
||||
0x008a: 0x8a, # <control>
|
||||
0x008b: 0x8b, # <control>
|
||||
0x008c: 0x8c, # <control>
|
||||
0x008d: 0x8d, # <control>
|
||||
0x008e: 0x8e, # <control>
|
||||
0x008f: 0x8f, # <control>
|
||||
0x0090: 0x90, # <control>
|
||||
0x0091: 0x91, # <control>
|
||||
0x0092: 0x92, # <control>
|
||||
0x0093: 0x93, # <control>
|
||||
0x0094: 0x94, # <control>
|
||||
0x0095: 0x95, # <control>
|
||||
0x0096: 0x96, # <control>
|
||||
0x0097: 0x97, # <control>
|
||||
0x0098: 0x98, # <control>
|
||||
0x0099: 0x99, # <control>
|
||||
0x009a: 0x9a, # <control>
|
||||
0x009b: 0x9b, # <control>
|
||||
0x009c: 0x9c, # <control>
|
||||
0x009d: 0x9d, # <control>
|
||||
0x009e: 0x9e, # <control>
|
||||
0x009f: 0x9f, # <control>
|
||||
0x0e01: 0xa1, # THAI CHARACTER KO KAI
|
||||
0x0e02: 0xa2, # THAI CHARACTER KHO KHAI
|
||||
0x0e03: 0xa3, # THAI CHARACTER KHO KHUAT
|
||||
0x0e04: 0xa4, # THAI CHARACTER KHO KHWAI
|
||||
0x0e05: 0xa5, # THAI CHARACTER KHO KHON
|
||||
0x0e06: 0xa6, # THAI CHARACTER KHO RAKHANG
|
||||
0x0e07: 0xa7, # THAI CHARACTER NGO NGU
|
||||
0x0e08: 0xa8, # THAI CHARACTER CHO CHAN
|
||||
0x0e09: 0xa9, # THAI CHARACTER CHO CHING
|
||||
0x0e0a: 0xaa, # THAI CHARACTER CHO CHANG
|
||||
0x0e0b: 0xab, # THAI CHARACTER SO SO
|
||||
0x0e0c: 0xac, # THAI CHARACTER CHO CHOE
|
||||
0x0e0d: 0xad, # THAI CHARACTER YO YING
|
||||
0x0e0e: 0xae, # THAI CHARACTER DO CHADA
|
||||
0x0e0f: 0xaf, # THAI CHARACTER TO PATAK
|
||||
0x0e10: 0xb0, # THAI CHARACTER THO THAN
|
||||
0x0e11: 0xb1, # THAI CHARACTER THO NANGMONTHO
|
||||
0x0e12: 0xb2, # THAI CHARACTER THO PHUTHAO
|
||||
0x0e13: 0xb3, # THAI CHARACTER NO NEN
|
||||
0x0e14: 0xb4, # THAI CHARACTER DO DEK
|
||||
0x0e15: 0xb5, # THAI CHARACTER TO TAO
|
||||
0x0e16: 0xb6, # THAI CHARACTER THO THUNG
|
||||
0x0e17: 0xb7, # THAI CHARACTER THO THAHAN
|
||||
0x0e18: 0xb8, # THAI CHARACTER THO THONG
|
||||
0x0e19: 0xb9, # THAI CHARACTER NO NU
|
||||
0x0e1a: 0xba, # THAI CHARACTER BO BAIMAI
|
||||
0x0e1b: 0xbb, # THAI CHARACTER PO PLA
|
||||
0x0e1c: 0xbc, # THAI CHARACTER PHO PHUNG
|
||||
0x0e1d: 0xbd, # THAI CHARACTER FO FA
|
||||
0x0e1e: 0xbe, # THAI CHARACTER PHO PHAN
|
||||
0x0e1f: 0xbf, # THAI CHARACTER FO FAN
|
||||
0x0e20: 0xc0, # THAI CHARACTER PHO SAMPHAO
|
||||
0x0e21: 0xc1, # THAI CHARACTER MO MA
|
||||
0x0e22: 0xc2, # THAI CHARACTER YO YAK
|
||||
0x0e23: 0xc3, # THAI CHARACTER RO RUA
|
||||
0x0e24: 0xc4, # THAI CHARACTER RU
|
||||
0x0e25: 0xc5, # THAI CHARACTER LO LING
|
||||
0x0e26: 0xc6, # THAI CHARACTER LU
|
||||
0x0e27: 0xc7, # THAI CHARACTER WO WAEN
|
||||
0x0e28: 0xc8, # THAI CHARACTER SO SALA
|
||||
0x0e29: 0xc9, # THAI CHARACTER SO RUSI
|
||||
0x0e2a: 0xca, # THAI CHARACTER SO SUA
|
||||
0x0e2b: 0xcb, # THAI CHARACTER HO HIP
|
||||
0x0e2c: 0xcc, # THAI CHARACTER LO CHULA
|
||||
0x0e2d: 0xcd, # THAI CHARACTER O ANG
|
||||
0x0e2e: 0xce, # THAI CHARACTER HO NOKHUK
|
||||
0x0e2f: 0xcf, # THAI CHARACTER PAIYANNOI
|
||||
0x0e30: 0xd0, # THAI CHARACTER SARA A
|
||||
0x0e31: 0xd1, # THAI CHARACTER MAI HAN-AKAT
|
||||
0x0e32: 0xd2, # THAI CHARACTER SARA AA
|
||||
0x0e33: 0xd3, # THAI CHARACTER SARA AM
|
||||
0x0e34: 0xd4, # THAI CHARACTER SARA I
|
||||
0x0e35: 0xd5, # THAI CHARACTER SARA II
|
||||
0x0e36: 0xd6, # THAI CHARACTER SARA UE
|
||||
0x0e37: 0xd7, # THAI CHARACTER SARA UEE
|
||||
0x0e38: 0xd8, # THAI CHARACTER SARA U
|
||||
0x0e39: 0xd9, # THAI CHARACTER SARA UU
|
||||
0x0e3a: 0xda, # THAI CHARACTER PHINTHU
|
||||
0x0e3f: 0xdf, # THAI CURRENCY SYMBOL BAHT
|
||||
0x0e40: 0xe0, # THAI CHARACTER SARA E
|
||||
0x0e41: 0xe1, # THAI CHARACTER SARA AE
|
||||
0x0e42: 0xe2, # THAI CHARACTER SARA O
|
||||
0x0e43: 0xe3, # THAI CHARACTER SARA AI MAIMUAN
|
||||
0x0e44: 0xe4, # THAI CHARACTER SARA AI MAIMALAI
|
||||
0x0e45: 0xe5, # THAI CHARACTER LAKKHANGYAO
|
||||
0x0e46: 0xe6, # THAI CHARACTER MAIYAMOK
|
||||
0x0e47: 0xe7, # THAI CHARACTER MAITAIKHU
|
||||
0x0e48: 0xe8, # THAI CHARACTER MAI EK
|
||||
0x0e49: 0xe9, # THAI CHARACTER MAI THO
|
||||
0x0e4a: 0xea, # THAI CHARACTER MAI TRI
|
||||
0x0e4b: 0xeb, # THAI CHARACTER MAI CHATTAWA
|
||||
0x0e4c: 0xec, # THAI CHARACTER THANTHAKHAT
|
||||
0x0e4d: 0xed, # THAI CHARACTER NIKHAHIT
|
||||
0x0e4e: 0xee, # THAI CHARACTER YAMAKKAN
|
||||
0x0e4f: 0xef, # THAI CHARACTER FONGMAN
|
||||
0x0e50: 0xf0, # THAI DIGIT ZERO
|
||||
0x0e51: 0xf1, # THAI DIGIT ONE
|
||||
0x0e52: 0xf2, # THAI DIGIT TWO
|
||||
0x0e53: 0xf3, # THAI DIGIT THREE
|
||||
0x0e54: 0xf4, # THAI DIGIT FOUR
|
||||
0x0e55: 0xf5, # THAI DIGIT FIVE
|
||||
0x0e56: 0xf6, # THAI DIGIT SIX
|
||||
0x0e57: 0xf7, # THAI DIGIT SEVEN
|
||||
0x0e58: 0xf8, # THAI DIGIT EIGHT
|
||||
0x0e59: 0xf9, # THAI DIGIT NINE
|
||||
0x0e5a: 0xfa, # THAI CHARACTER ANGKHANKHU
|
||||
0x0e5b: 0xfb, # THAI CHARACTER KHOMUT
|
||||
}
|
Loading…
Reference in New Issue