pokecrystal/preprocessor.py

627 lines
14 KiB
Python
Raw Normal View History

2012-03-05 07:05:36 +00:00
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
2013-01-10 21:07:36 +00:00
from extras.crystal import (
command_classes,
Warp,
XYTrigger,
Signpost,
PeopleEvent,
DataByteWordMacro,
PointerLabelBeforeBank,
PointerLabelAfterBank,
MoneyByteParam,
ItemFragment,
TextEndingCommand,
text_command_classes,
movement_command_classes,
music_classes
)
2012-04-26 19:14:46 +00:00
macros = command_classes + \
[
Warp,
XYTrigger,
Signpost,
PeopleEvent,
DataByteWordMacro,
ItemFragment,
] + [x[1] for x in text_command_classes] + \
movement_command_classes + \
music_classes
2012-04-26 19:14:46 +00:00
2013-01-10 21:42:43 +00:00
# show lines before preprocessing in stdout
show_original_lines = False
# helpful for debugging macros
do_macro_sanity_check = False
chars = {
"": 0x05,
"": 0x06,
"": 0x07,
"": 0x08,
"": 0x09,
"": 0x0A,
"": 0x0B,
"": 0x0C,
"": 0x0D,
"": 0x0E,
"": 0x0F,
"": 0x10,
"": 0x11,
"": 0x12,
"": 0x13,
"": 0x19,
"": 0x1A,
"": 0x1B,
"": 0x1C,
"": 0x26,
"": 0x27,
"": 0x28,
"": 0x29,
"": 0x2A,
"": 0x2B,
"": 0x2C,
"": 0x2D,
"": 0x2E,
"": 0x2F,
"": 0x30,
"": 0x31,
"": 0x32,
"": 0x33,
"": 0x34,
"": 0x3A,
"": 0x3B,
"": 0x3C,
"": 0x3D,
"": 0x3E,
"": 0x40,
"": 0x41,
"": 0x42,
"": 0x43,
"": 0x44,
"": 0x45,
"": 0x46,
"": 0x47,
"": 0x48,
"": 0x80,
"": 0x81,
"": 0x82,
"": 0x83,
"": 0x84,
"": 0x85,
"": 0x86,
"": 0x87,
"": 0x88,
"": 0x89,
"": 0x8A,
"": 0x8B,
"": 0x8C,
"": 0x8D,
"": 0x8E,
"": 0x8F,
"": 0x90,
"": 0x91,
"": 0x92,
"": 0x93,
"": 0x94,
"": 0x95,
"": 0x96,
"": 0x97,
"": 0x98,
"": 0x99,
"": 0x9A,
"": 0x9B,
"": 0x9C,
"": 0x9D,
"": 0x9E,
"": 0x9F,
"": 0xA0,
"": 0xA1,
"": 0xA2,
"": 0xA3,
"": 0xA4,
"": 0xA5,
"": 0xA6,
"": 0xA7,
"": 0xA8,
"": 0xA9,
"": 0xAA,
"": 0xAB,
"": 0xAC,
"": 0xAD,
"": 0xAE,
"": 0xAF,
"": 0xB0,
"": 0xB1,
"": 0xB2,
"": 0xB3,
"": 0xB4,
"": 0xB5,
"": 0xB6,
"": 0xB7,
"": 0xB8,
"": 0xB9,
"": 0xBA,
"": 0xBB,
"": 0xBC,
"": 0xBD,
"": 0xBE,
"": 0xBF,
"": 0xC0,
"": 0xC1,
"": 0xC2,
"": 0xC3,
"": 0xC4,
"": 0xC5,
"": 0xC6,
"": 0xC7,
"": 0xC8,
"": 0xC9,
"": 0xCA,
"": 0xCB,
"": 0xCC,
"": 0xCD,
"": 0xCE,
"": 0xCF,
"": 0xD0,
"": 0xD1,
"": 0xD2,
"": 0xD3,
"": 0xD4,
"": 0xD5,
"": 0xD6,
"": 0xD7,
"": 0xD8,
"": 0xD9,
"": 0xDA,
"": 0xDB,
"": 0xDC,
"": 0xDD,
"": 0xDE,
"": 0xDF,
"": 0xE0,
"": 0xE1,
"": 0xE2,
"": 0xE3,
2012-03-05 07:05:36 +00:00
"@": 0x50,
"#": 0x54,
"": 0x75,
2012-03-05 07:05:36 +00:00
"": 0x79,
"": 0x7A,
"": 0x7B,
"": 0x7C,
"": 0x7D,
"": 0x7E,
2012-03-05 07:05:36 +00:00
"": 0x74,
2012-03-05 07:05:36 +00:00
" ": 0x7F,
"A": 0x80,
"B": 0x81,
"C": 0x82,
"D": 0x83,
"E": 0x84,
"F": 0x85,
"G": 0x86,
"H": 0x87,
"I": 0x88,
"J": 0x89,
"K": 0x8A,
"L": 0x8B,
"M": 0x8C,
"N": 0x8D,
"O": 0x8E,
"P": 0x8F,
"Q": 0x90,
"R": 0x91,
"S": 0x92,
"T": 0x93,
"U": 0x94,
"V": 0x95,
"W": 0x96,
"X": 0x97,
"Y": 0x98,
"Z": 0x99,
"(": 0x9A,
")": 0x9B,
":": 0x9C,
";": 0x9D,
"[": 0x9E,
"]": 0x9F,
"a": 0xA0,
"b": 0xA1,
"c": 0xA2,
"d": 0xA3,
"e": 0xA4,
"f": 0xA5,
"g": 0xA6,
"h": 0xA7,
"i": 0xA8,
"j": 0xA9,
"k": 0xAA,
"l": 0xAB,
"m": 0xAC,
"n": 0xAD,
"o": 0xAE,
"p": 0xAF,
"q": 0xB0,
"r": 0xB1,
"s": 0xB2,
"t": 0xB3,
"u": 0xB4,
"v": 0xB5,
"w": 0xB6,
"x": 0xB7,
"y": 0xB8,
"z": 0xB9,
"Ä": 0xC0,
"Ö": 0xC1,
"Ü": 0xC2,
"ä": 0xC3,
"ö": 0xC4,
"ü": 0xC5,
"'d": 0xD0,
"'l": 0xD1,
"'m": 0xD2,
"'r": 0xD3,
"'s": 0xD4,
"'t": 0xD5,
"'v": 0xD6,
"'": 0xE0,
"-": 0xE3,
"?": 0xE6,
"!": 0xE7,
".": 0xE8,
"&": 0xE9,
"é": 0xEA,
"": 0xEB,
"": 0xED,
"": 0xEF,
"¥": 0xF0,
"×": 0xF1,
"/": 0xF3,
",": 0xF4,
"": 0xF5,
"0": 0xF6,
"1": 0xF7,
"2": 0xF8,
"3": 0xF9,
"4": 0xFA,
"5": 0xFB,
"6": 0xFC,
"7": 0xFD,
"8": 0xFE,
"9": 0xFF
}
2012-03-05 07:05:36 +00:00
def separate_comment(l):
""" Separates asm and comments on a single line.
"""
2012-04-26 15:56:21 +00:00
asm = ""
comment = None
in_quotes = False
in_comment = False
# token either belongs to the line or to the comment
for token in l:
if in_comment:
comment += token
elif in_quotes and token != "\"":
asm += token
elif in_quotes and token == "\"":
in_quotes = False
asm += token
elif not in_quotes and token == "\"":
in_quotes = True
asm += token
elif not in_quotes and token != "\"":
if token == ";":
in_comment = True
comment = ";"
else:
asm += token
return asm, comment
2012-03-05 07:05:36 +00:00
def quote_translator(asm):
""" Writes asm with quoted text translated into bytes.
"""
2012-03-05 07:05:36 +00:00
# split by quotes
asms = asm.split("\"")
# skip asm that actually does use ASCII in quotes
lowasm = asms[0].lower()
2012-11-30 15:56:15 +00:00
2012-03-05 07:05:36 +00:00
if "section" in lowasm \
or "incbin" in lowasm:
sys.stdout.write(asm)
return
2012-03-05 07:05:36 +00:00
output = ""
2012-03-05 07:05:36 +00:00
even = False
i = 0
for token in asms:
i = i + 1
2012-03-05 07:05:36 +00:00
if even:
# token is a string to convert to byte values
while len(token):
# read a single UTF-8 codepoint
char = token[0]
if ord(char) >= 0xFC:
char = char + token[1:6]
token = token[6:]
elif ord(char) >= 0xF8:
char = char + token[1:5]
token = token[5:]
elif ord(char) >= 0xF0:
char = char + token[1:4]
token = token[4:]
elif ord(char) >= 0xE0:
char = char + token[1:3]
token = token[3:]
elif ord(char) >= 0xC0:
char = char + token[1:2]
token = token[2:]
else:
token = token[1:]
# certain apostrophe-letter pairs are only a single byte
if char == "'" and len(token) > 0 and \
2012-03-05 07:05:36 +00:00
(token[0] == "d" or \
token[0] == "l" or \
token[0] == "m" or \
token[0] == "r" or \
token[0] == "s" or \
token[0] == "t" or \
token[0] == "v"):
char = char + token[0]
token = token[1:]
output += ("${0:02X}".format(chars[char]))
2012-03-05 07:05:36 +00:00
if len(token):
output += (", ")
# if not even
2012-03-05 07:05:36 +00:00
else:
output += (token)
2012-03-05 07:05:36 +00:00
even = not even
sys.stdout.write(output)
return
2012-04-26 19:14:46 +00:00
def extract_token(asm):
token = asm.split(" ")[0].replace("\t", "").replace("\n", "")
2012-04-26 19:14:46 +00:00
return token
def make_macro_table():
return dict([(macro.macro_name, macro) for macro in macros])
macro_table = make_macro_table()
2012-04-26 19:14:46 +00:00
def macro_test(asm):
""" Returns a matching macro, or None/False.
"""
# macros are determined by the first symbol on the line
token = extract_token(asm)
# check against all names
2013-01-10 21:34:30 +00:00
if token in macro_table:
return (macro_table[token], token)
2013-01-10 21:34:30 +00:00
else:
return (None, None)
2012-04-26 19:14:46 +00:00
def macro_translator(macro, token, line):
""" Converts a line with a macro into a rgbasm-compatible line.
"""
2012-11-30 16:26:49 +00:00
2012-04-26 19:14:46 +00:00
assert macro.macro_name == token, "macro/token mismatch"
2012-04-26 19:14:46 +00:00
original_line = line
# remove trailing newline
if line[-1] == "\n":
line = line[:-1]
else:
original_line += "\n"
# remove first tab
has_tab = False
if line[0] == "\t":
has_tab = True
line = line[1:]
# remove duplicate whitespace (also trailing)
line = " ".join(line.split())
params = []
# check if the line has params
if " " in line:
# split the line into separate parameters
params = line.replace(token, "").split(",")
# check if there are no params (redundant)
if len(params) == 1 and params[0] == "":
raise Exception, "macro has no params?"
2012-04-26 19:14:46 +00:00
# write out a comment showing the original line
2013-01-10 21:42:43 +00:00
if show_original_lines:
sys.stdout.write("; original_line: " + original_line)
2012-04-26 19:14:46 +00:00
# "db" is a macro because of TextEndingCommand
# rgbasm can handle "db" so no preprocessing is required
# (don't check its param count)
if macro.macro_name == "db" and macro in [TextEndingCommand, ItemFragment]:
sys.stdout.write(original_line)
return
2012-04-26 19:14:46 +00:00
# certain macros don't need an initial byte written
# do: all scripting macros
# don't: signpost, warp_def, person_event, xy_trigger
if not macro.override_byte_check:
sys.stdout.write("db ${0:02X}\n".format(macro.id))
2012-04-26 19:14:46 +00:00
# --- long-winded sanity check goes here ---
if do_macro_sanity_check:
# sanity check... this won't work because PointerLabelBeforeBank shows
# up as two params, so these two lengths will always be different.
#assert len(params) == len(macro.param_types), \
# "mismatched number of parameters on this line: " + \
# original_line
# v2 sanity check :) although it sorta sucks that this loop happens twice?
allowed_length = 0
for (index, param_type) in macro.param_types.items():
param_klass = param_type["class"]
if param_klass.byte_type == "db":
allowed_length += 1 # just one value
elif param_klass.byte_type == "dw":
if param_klass.size == 2:
allowed_length += 1 # just label
elif param_klass == MoneyByteParam:
allowed_length += 1
elif param_klass.size == 3:
allowed_length += 2 # bank and label
else:
raise Exception, "dunno what to do with a macro param with a size > 3"
2012-04-26 19:14:46 +00:00
else:
raise Exception, "dunno what to do with this non db/dw macro param: " + \
str(param_klass) + " in line: " + original_line
# sometimes the allowed length can vary
if hasattr(macro, "allowed_lengths"):
allowed_lengths = macro.allowed_lengths + [allowed_length]
else:
allowed_lengths = [allowed_length]
assert len(params) in allowed_lengths, \
"mismatched number of parameters on this line: " + \
original_line
2012-04-26 19:14:46 +00:00
# --- end of ridiculously long sanity check ---
# used for storetext
correction = 0
output = ""
2012-04-26 19:14:46 +00:00
index = 0
while index < len(params):
param_type = macro.param_types[index - correction]
2012-04-26 19:14:46 +00:00
description = param_type["name"]
param_klass = param_type["class"]
byte_type = param_klass.byte_type # db or dw
size = param_klass.size
param = params[index].strip()
2012-04-26 19:14:46 +00:00
# param_klass.to_asm() won't work here because it doesn't
# include db/dw.
# some parameters are really multiple types of bytes
if (byte_type == "dw" and size != 2) or \
(byte_type == "db" and size != 1):
output += ("; " + description + "\n")
2012-04-26 19:14:46 +00:00
if size == 3 and issubclass(param_klass, PointerLabelBeforeBank):
# write the bank first
output += ("db " + param + "\n")
2012-04-26 19:14:46 +00:00
# write the pointer second
output += ("dw " + params[index+1].strip() + "\n")
2012-04-26 19:14:46 +00:00
index += 2
correction += 1
elif size == 3 and issubclass(param_klass, PointerLabelAfterBank):
2012-04-26 19:14:46 +00:00
# write the pointer first
output += ("dw " + param + "\n")
2012-04-26 19:14:46 +00:00
# write the bank second
output += ("db " + params[index+1].strip() + "\n")
2012-04-26 19:14:46 +00:00
index += 2
correction += 1
elif size == 3 and issubclass(param_klass, MoneyByteParam):
output += ("db " + MoneyByteParam.from_asm(param) + "\n")
index += 1
2012-04-26 19:14:46 +00:00
else:
raise Exception, "dunno what to do with this macro " + \
"param (" + str(param_klass) + ") " + "on this line: " + \
original_line
# or just print out the byte
else:
output += (byte_type + " " + param + " ; " + description + "\n")
2012-04-26 19:14:46 +00:00
index += 1
sys.stdout.write(output)
def include_file(asm):
2012-11-30 15:58:57 +00:00
"""This is more reliable than rgbasm/rgbds including files on its own."""
2012-11-30 16:26:49 +00:00
filename = asm.split("\"")[1]
lines = open(filename, "r").readlines()
2012-11-30 16:26:49 +00:00
for line in lines:
read_line(line)
2012-04-26 19:14:46 +00:00
def read_line(l):
"""Preprocesses a given line of asm."""
2012-11-30 16:26:49 +00:00
# strip and store any comment on this line
if ";" in l:
asm, comment = separate_comment(l)
else:
asm = l
comment = None
2012-11-30 15:56:15 +00:00
# handle INCLUDE as a special case either at the start of the line or
# after the first character in the line (like a tab)
if "INCLUDE \"" in [asm[0:9], asm[1:9]]:
include_file(asm)
2012-11-30 15:56:15 +00:00
# convert text to bytes when a quote appears (not in a comment)
elif "\"" in asm:
quote_translator(asm)
2012-11-30 15:56:15 +00:00
# check against other preprocessor features
else:
macro, token = macro_test(asm)
2012-11-30 15:56:15 +00:00
if macro:
macro_translator(macro, token, asm)
else:
sys.stdout.write(asm)
2012-11-30 15:56:15 +00:00
# show line comment
if comment != None:
sys.stdout.write(comment)
2012-04-26 15:51:37 +00:00
def preprocess(lines=None):
"""Main entry point for the preprocessor."""
2012-11-30 16:26:49 +00:00
if not lines:
# read each line from stdin
lines = sys.stdin
elif not isinstance(lines, list):
# split up the input into individual lines
lines = lines.split("\n")
for l in lines:
read_line(l)
# only run against stdin when not included as a module
if __name__ == "__main__":
preprocess()