pokecrystal/textpre.py

380 lines
6.3 KiB
Python
Raw Normal View History

2012-03-05 07:05:36 +00:00
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
chars = {
"": 0x05,
"": 0x06,
"": 0x07,
"": 0x08,
"": 0x09,
"": 0x0A,
"": 0x0B,
"": 0x0C,
"": 0x0D,
"": 0x0E,
"": 0x0F,
"": 0x10,
"": 0x11,
"": 0x12,
"": 0x13,
"": 0x19,
"": 0x1A,
"": 0x1B,
"": 0x1C,
"": 0x26,
"": 0x27,
"": 0x28,
"": 0x29,
"": 0x2A,
"": 0x2B,
"": 0x2C,
"": 0x2D,
"": 0x2E,
"": 0x2F,
"": 0x30,
"": 0x31,
"": 0x32,
"": 0x33,
"": 0x34,
"": 0x3A,
"": 0x3B,
"": 0x3C,
"": 0x3D,
"": 0x3E,
"": 0x40,
"": 0x41,
"": 0x42,
"": 0x43,
"": 0x44,
"": 0x45,
"": 0x46,
"": 0x47,
"": 0x48,
"": 0x80,
"": 0x81,
"": 0x82,
"": 0x83,
"": 0x84,
"": 0x85,
"": 0x86,
"": 0x87,
"": 0x88,
"": 0x89,
"": 0x8A,
"": 0x8B,
"": 0x8C,
"": 0x8D,
"": 0x8E,
"": 0x8F,
"": 0x90,
"": 0x91,
"": 0x92,
"": 0x93,
"": 0x94,
"": 0x95,
"": 0x96,
"": 0x97,
"": 0x98,
"": 0x99,
"": 0x9A,
"": 0x9B,
"": 0x9C,
"": 0x9D,
"": 0x9E,
"": 0x9F,
"": 0xA0,
"": 0xA1,
"": 0xA2,
"": 0xA3,
"": 0xA4,
"": 0xA5,
"": 0xA6,
"": 0xA7,
"": 0xA8,
"": 0xA9,
"": 0xAA,
"": 0xAB,
"": 0xAC,
"": 0xAD,
"": 0xAE,
"": 0xAF,
"": 0xB0,
"": 0xB1,
"": 0xB2,
"": 0xB3,
"": 0xB4,
"": 0xB5,
"": 0xB6,
"": 0xB7,
"": 0xB8,
"": 0xB9,
"": 0xBA,
"": 0xBB,
"": 0xBC,
"": 0xBD,
"": 0xBE,
"": 0xBF,
"": 0xC0,
"": 0xC1,
"": 0xC2,
"": 0xC3,
"": 0xC4,
"": 0xC5,
"": 0xC6,
"": 0xC7,
"": 0xC8,
"": 0xC9,
"": 0xCA,
"": 0xCB,
"": 0xCC,
"": 0xCD,
"": 0xCE,
"": 0xCF,
"": 0xD0,
"": 0xD1,
"": 0xD2,
"": 0xD3,
"": 0xD4,
"": 0xD5,
"": 0xD6,
"": 0xD7,
"": 0xD8,
"": 0xD9,
"": 0xDA,
"": 0xDB,
"": 0xDC,
"": 0xDD,
"": 0xDE,
"": 0xDF,
"": 0xE0,
"": 0xE1,
"": 0xE2,
"": 0xE3,
2012-03-05 07:05:36 +00:00
"@": 0x50,
"#": 0x54,
"": 0x75,
2012-03-05 07:05:36 +00:00
"": 0x79,
"": 0x7A,
"": 0x7B,
"": 0x7C,
"": 0x7D,
"": 0x7E,
2012-03-05 07:05:36 +00:00
"": 0x74,
2012-03-05 07:05:36 +00:00
" ": 0x7F,
"A": 0x80,
"B": 0x81,
"C": 0x82,
"D": 0x83,
"E": 0x84,
"F": 0x85,
"G": 0x86,
"H": 0x87,
"I": 0x88,
"J": 0x89,
"K": 0x8A,
"L": 0x8B,
"M": 0x8C,
"N": 0x8D,
"O": 0x8E,
"P": 0x8F,
"Q": 0x90,
"R": 0x91,
"S": 0x92,
"T": 0x93,
"U": 0x94,
"V": 0x95,
"W": 0x96,
"X": 0x97,
"Y": 0x98,
"Z": 0x99,
"(": 0x9A,
")": 0x9B,
":": 0x9C,
";": 0x9D,
"[": 0x9E,
"]": 0x9F,
"a": 0xA0,
"b": 0xA1,
"c": 0xA2,
"d": 0xA3,
"e": 0xA4,
"f": 0xA5,
"g": 0xA6,
"h": 0xA7,
"i": 0xA8,
"j": 0xA9,
"k": 0xAA,
"l": 0xAB,
"m": 0xAC,
"n": 0xAD,
"o": 0xAE,
"p": 0xAF,
"q": 0xB0,
"r": 0xB1,
"s": 0xB2,
"t": 0xB3,
"u": 0xB4,
"v": 0xB5,
"w": 0xB6,
"x": 0xB7,
"y": 0xB8,
"z": 0xB9,
"Ä": 0xC0,
"Ö": 0xC1,
"Ü": 0xC2,
"ä": 0xC3,
"ö": 0xC4,
"ü": 0xC5,
"'d": 0xD0,
"'l": 0xD1,
"'m": 0xD2,
"'r": 0xD3,
"'s": 0xD4,
"'t": 0xD5,
"'v": 0xD6,
"'": 0xE0,
"-": 0xE3,
"?": 0xE6,
"!": 0xE7,
".": 0xE8,
"&": 0xE9,
"é": 0xEA,
"": 0xEB,
"": 0xEF,
"¥": 0xF0,
"×": 0xF1,
"/": 0xF3,
",": 0xF4,
"": 0xF5,
"0": 0xF6,
"1": 0xF7,
"2": 0xF8,
"3": 0xF9,
"4": 0xFA,
"5": 0xFB,
"6": 0xFC,
"7": 0xFD,
"8": 0xFE,
"9": 0xFF
}
2012-03-05 07:05:36 +00:00
def separate_comment(l):
""" Separates asm and comments on a single line.
"""
2012-04-26 15:56:21 +00:00
asm = ""
comment = None
in_quotes = False
in_comment = False
# token either belongs to the line or to the comment
for token in l:
if in_comment:
comment += token
elif in_quotes and token != "\"":
asm += token
elif in_quotes and token == "\"":
in_quotes = False
asm += token
elif not in_quotes and token == "\"":
in_quotes = True
asm += token
elif not in_quotes and token != "\"":
if token == ";":
in_comment = True
comment = ";"
else:
asm += token
return asm, comment
2012-03-05 07:05:36 +00:00
def quote_translator(asm):
""" Writes asm with quoted text translated into bytes.
"""
2012-03-05 07:05:36 +00:00
# split by quotes
asms = asm.split("\"")
# skip asm that actually does use ASCII in quotes
lowasm = asms[0].lower()
if "section" in lowasm \
or "include" in lowasm \
or "incbin" in lowasm:
sys.stdout.write(asm)
return
2012-03-05 07:05:36 +00:00
even = False
i = 0
for token in asms:
i = i + 1
2012-03-05 07:05:36 +00:00
if even:
# token is a string to convert to byte values
while len(token):
# read a single UTF-8 codepoint
char = token[0]
if ord(char) >= 0xFC:
char = char + token[1:6]
token = token[6:]
elif ord(char) >= 0xF8:
char = char + token[1:5]
token = token[5:]
elif ord(char) >= 0xF0:
char = char + token[1:4]
token = token[4:]
elif ord(char) >= 0xE0:
char = char + token[1:3]
token = token[3:]
elif ord(char) >= 0xC0:
char = char + token[1:2]
token = token[2:]
else:
token = token[1:]
# certain apostrophe-letter pairs are only a single byte
if char == "'" and \
(token[0] == "d" or \
token[0] == "l" or \
token[0] == "m" or \
token[0] == "r" or \
token[0] == "s" or \
token[0] == "t" or \
token[0] == "v"):
char = char + token[0]
token = token[1:]
sys.stdout.write("${0:02X}".format(chars[char]))
2012-03-05 07:05:36 +00:00
if len(token):
sys.stdout.write(", ")
# if not even
2012-03-05 07:05:36 +00:00
else:
sys.stdout.write(token)
2012-03-05 07:05:36 +00:00
even = not even
return
for l in sys.stdin:
# strip and store any comment on this line
if ";" in l:
asm, comment = separate_comment(l)
else:
asm = l
comment = None
# convert text to bytes when a quote appears (not in a comment)
if "\"" in asm:
quote_translator(asm)
else:
sys.stdout.write(asm)
2012-04-26 15:51:37 +00:00
# show line comment
2012-04-26 15:51:37 +00:00
if comment != None:
sys.stdout.write(comment)