pokecrystal/preprocessor.py

635 lines
15 KiB
Python
Raw Normal View History

2012-03-05 07:05:36 +00:00
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
from extras.pokemontools.crystal import (
2013-01-10 21:07:36 +00:00
command_classes,
Warp,
XYTrigger,
Signpost,
PeopleEvent,
DataByteWordMacro,
PointerLabelBeforeBank,
PointerLabelAfterBank,
ItemFragment,
TextEndingCommand,
text_command_classes,
movement_command_classes,
2013-01-27 23:45:23 +00:00
music_classes,
2013-03-29 22:54:13 +00:00
effect_classes,
2013-01-10 21:07:36 +00:00
)
2012-04-26 19:14:46 +00:00
2013-01-27 23:45:23 +00:00
even_more_macros = [
Warp,
XYTrigger,
Signpost,
PeopleEvent,
DataByteWordMacro,
ItemFragment,
2013-01-27 23:45:23 +00:00
]
macros = command_classes
macros += even_more_macros
macros += [each[1] for each in text_command_classes]
macros += movement_command_classes
macros += music_classes
2013-03-29 22:54:13 +00:00
macros += effect_classes
2012-04-26 19:14:46 +00:00
2013-01-10 21:42:43 +00:00
# show lines before preprocessing in stdout
show_original_lines = False
# helpful for debugging macros
do_macro_sanity_check = False
chars = {
"": 0x05,
"": 0x06,
"": 0x07,
"": 0x08,
"": 0x09,
"": 0x0A,
"": 0x0B,
"": 0x0C,
"": 0x0D,
"": 0x0E,
"": 0x0F,
"": 0x10,
"": 0x11,
"": 0x12,
"": 0x13,
"": 0x19,
"": 0x1A,
"": 0x1B,
"": 0x1C,
"": 0x26,
"": 0x27,
"": 0x28,
"": 0x29,
"": 0x2A,
"": 0x2B,
"": 0x2C,
"": 0x2D,
"": 0x2E,
"": 0x2F,
"": 0x30,
"": 0x31,
"": 0x32,
"": 0x33,
"": 0x34,
"": 0x3A,
"": 0x3B,
"": 0x3C,
"": 0x3D,
"": 0x3E,
"": 0x40,
"": 0x41,
"": 0x42,
"": 0x43,
"": 0x44,
"": 0x45,
"": 0x46,
"": 0x47,
"": 0x48,
"": 0x80,
"": 0x81,
"": 0x82,
"": 0x83,
"": 0x84,
"": 0x85,
"": 0x86,
"": 0x87,
"": 0x88,
"": 0x89,
"": 0x8A,
"": 0x8B,
"": 0x8C,
"": 0x8D,
"": 0x8E,
"": 0x8F,
"": 0x90,
"": 0x91,
"": 0x92,
"": 0x93,
"": 0x94,
"": 0x95,
"": 0x96,
"": 0x97,
"": 0x98,
"": 0x99,
"": 0x9A,
"": 0x9B,
"": 0x9C,
"": 0x9D,
"": 0x9E,
"": 0x9F,
"": 0xA0,
"": 0xA1,
"": 0xA2,
"": 0xA3,
"": 0xA4,
"": 0xA5,
"": 0xA6,
"": 0xA7,
"": 0xA8,
"": 0xA9,
"": 0xAA,
"": 0xAB,
"": 0xAC,
"": 0xAD,
"": 0xAE,
"": 0xAF,
"": 0xB0,
"": 0xB1,
"": 0xB2,
"": 0xB3,
"": 0xB4,
"": 0xB5,
"": 0xB6,
"": 0xB7,
"": 0xB8,
"": 0xB9,
"": 0xBA,
"": 0xBB,
"": 0xBC,
"": 0xBD,
"": 0xBE,
"": 0xBF,
"": 0xC0,
"": 0xC1,
"": 0xC2,
"": 0xC3,
"": 0xC4,
"": 0xC5,
"": 0xC6,
"": 0xC7,
"": 0xC8,
"": 0xC9,
"": 0xCA,
"": 0xCB,
"": 0xCC,
"": 0xCD,
"": 0xCE,
"": 0xCF,
"": 0xD0,
"": 0xD1,
"": 0xD2,
"": 0xD3,
"": 0xD4,
"": 0xD5,
"": 0xD6,
"": 0xD7,
"": 0xD8,
"": 0xD9,
"": 0xDA,
"": 0xDB,
"": 0xDC,
"": 0xDD,
"": 0xDE,
"": 0xDF,
"": 0xE0,
"": 0xE1,
"": 0xE2,
"": 0xE3,
2013-07-31 22:02:08 +00:00
"": 0xE9,
2012-03-05 07:05:36 +00:00
"@": 0x50,
"#": 0x54,
"": 0x75,
2012-03-05 07:05:36 +00:00
"": 0x79,
"": 0x7A,
"": 0x7B,
"": 0x7C,
"": 0x7D,
"": 0x7E,
2012-03-05 07:05:36 +00:00
"": 0x74,
2012-03-05 07:05:36 +00:00
" ": 0x7F,
"A": 0x80,
"B": 0x81,
"C": 0x82,
"D": 0x83,
"E": 0x84,
"F": 0x85,
"G": 0x86,
"H": 0x87,
"I": 0x88,
"J": 0x89,
"K": 0x8A,
"L": 0x8B,
"M": 0x8C,
"N": 0x8D,
"O": 0x8E,
"P": 0x8F,
"Q": 0x90,
"R": 0x91,
"S": 0x92,
"T": 0x93,
"U": 0x94,
"V": 0x95,
"W": 0x96,
"X": 0x97,
"Y": 0x98,
"Z": 0x99,
"(": 0x9A,
")": 0x9B,
":": 0x9C,
";": 0x9D,
"[": 0x9E,
"]": 0x9F,
"a": 0xA0,
"b": 0xA1,
"c": 0xA2,
"d": 0xA3,
"e": 0xA4,
"f": 0xA5,
"g": 0xA6,
"h": 0xA7,
"i": 0xA8,
"j": 0xA9,
"k": 0xAA,
"l": 0xAB,
"m": 0xAC,
"n": 0xAD,
"o": 0xAE,
"p": 0xAF,
"q": 0xB0,
"r": 0xB1,
"s": 0xB2,
"t": 0xB3,
"u": 0xB4,
"v": 0xB5,
"w": 0xB6,
"x": 0xB7,
"y": 0xB8,
"z": 0xB9,
"Ä": 0xC0,
"Ö": 0xC1,
"Ü": 0xC2,
"ä": 0xC3,
"ö": 0xC4,
"ü": 0xC5,
"'d": 0xD0,
"'l": 0xD1,
"'m": 0xD2,
"'r": 0xD3,
"'s": 0xD4,
"'t": 0xD5,
"'v": 0xD6,
"'": 0xE0,
"-": 0xE3,
"?": 0xE6,
"!": 0xE7,
".": 0xE8,
"&": 0xE9,
"é": 0xEA,
"": 0xEB,
2013-07-01 06:48:08 +00:00
"": 0xEC,
"": 0xED,
2013-05-02 03:18:38 +00:00
"": 0xEE,
"": 0xEF,
"¥": 0xF0,
"×": 0xF1,
"/": 0xF3,
",": 0xF4,
"": 0xF5,
"0": 0xF6,
"1": 0xF7,
"2": 0xF8,
"3": 0xF9,
"4": 0xFA,
"5": 0xFB,
"6": 0xFC,
"7": 0xFD,
"8": 0xFE,
"9": 0xFF
}
2012-03-05 07:05:36 +00:00
def separate_comment(l):
2013-03-21 20:15:42 +00:00
"""
Separates asm and comments on a single line.
"""
in_quotes = False
for i in xrange(len(l)):
if not in_quotes:
if l[i] == ";":
break
if l[i] == "\"":
in_quotes = not in_quotes
return l[:i], l[i:] or None
2012-03-05 07:05:36 +00:00
def quote_translator(asm):
2013-03-21 20:15:42 +00:00
"""
Writes asm with quoted text translated into bytes.
"""
2012-03-05 07:05:36 +00:00
# split by quotes
asms = asm.split('"')
2012-03-05 07:05:36 +00:00
# skip asm that actually does use ASCII in quotes
if "SECTION" in asms[0]\
or "INCBIN" in asms[0]\
or "INCLUDE" in asms[0]:
return asm
2012-03-05 07:05:36 +00:00
print_macro = False
if asms[0].strip() == 'print':
asms[0] = asms[0].replace('print','db 0,')
print_macro = True
output = ''
2012-03-05 07:05:36 +00:00
even = False
for token in asms:
if even:
characters = []
2012-03-05 07:05:36 +00:00
# token is a string to convert to byte values
while len(token):
# read a single UTF-8 codepoint
char = token[0]
if ord(char) < 0xc0:
token = token[1:]
# certain apostrophe-letter pairs are considered a single character
if char == "'" and token:
if token[0] in 'dlmrstv':
char += token[0]
token = token[1:]
elif ord(char) < 0xe0:
2012-03-05 07:05:36 +00:00
char = char + token[1:2]
token = token[2:]
elif ord(char) < 0xf0:
char = char + token[1:3]
token = token[3:]
elif ord(char) < 0xf8:
char = char + token[1:4]
token = token[4:]
elif ord(char) < 0xfc:
char = char + token[1:5]
token = token[5:]
2012-03-05 07:05:36 +00:00
else:
char = char + token[1:6]
token = token[6:]
characters += [char]
if print_macro:
line = 0
while len(characters):
last_char = 1
if len(characters) > 18 and characters[-1] != '@':
for i, char in enumerate(characters):
last_char = i + 1
if ' ' not in characters[i+1:18]: break
output += ", ".join("${0:02X}".format(chars[char]) for char in characters[:last_char-1])
if characters[last_char-1] != " ":
output += ", ${0:02X}".format(characters[last_char-1])
if not line & 1:
line_ending = 0x4f
else:
line_ending = 0x51
output += ", ${0:02X}".format(line_ending)
line += 1
else:
output += ", ".join(["${0:02X}".format(chars[char]) for char in characters[:last_char]])
characters = characters[last_char:]
if len(characters): output += ", "
# end text
line_ending = 0x57
output += ", ${0:02X}".format(line_ending)
output += ", ".join(["${0:02X}".format(chars[char]) for char in characters])
2012-03-05 07:05:36 +00:00
else:
output += token
2012-03-05 07:05:36 +00:00
even = not even
return output
2012-04-26 19:14:46 +00:00
def extract_token(asm):
return asm.split(" ")[0].strip()
2012-04-26 19:14:46 +00:00
def make_macro_table():
return dict(((macro.macro_name, macro) for macro in macros))
macro_table = make_macro_table()
2012-04-26 19:14:46 +00:00
def macro_test(asm):
2013-03-21 20:15:42 +00:00
"""
Returns a matching macro, or None/False.
2012-04-26 19:14:46 +00:00
"""
# macros are determined by the first symbol on the line
token = extract_token(asm)
# check against all names
2013-01-10 21:34:30 +00:00
if token in macro_table:
return (macro_table[token], token)
2013-01-10 21:34:30 +00:00
else:
return (None, None)
2012-04-26 19:14:46 +00:00
def is_based_on(something, base):
"""
Checks whether or not 'something' is a class that is a subclass of a class
by name. This is a terrible hack but it removes a direct dependency on
existing macros.
Used by macro_translator.
"""
options = [str(klass.__name__) for klass in something.__bases__]
options += [something.__name__]
return (base in options)
2012-04-26 19:14:46 +00:00
def macro_translator(macro, token, line):
2013-03-21 20:15:42 +00:00
"""
Converts a line with a macro into a rgbasm-compatible line.
2012-04-26 19:14:46 +00:00
"""
2012-11-30 16:26:49 +00:00
2012-04-26 19:14:46 +00:00
assert macro.macro_name == token, "macro/token mismatch"
2012-04-26 19:14:46 +00:00
original_line = line
# remove trailing newline
if line[-1] == "\n":
line = line[:-1]
else:
original_line += "\n"
# remove first tab
has_tab = False
if line[0] == "\t":
has_tab = True
line = line[1:]
# remove duplicate whitespace (also trailing)
line = " ".join(line.split())
params = []
# check if the line has params
if " " in line:
# split the line into separate parameters
params = line.replace(token, "").split(",")
# check if there are no params (redundant)
if len(params) == 1 and params[0] == "":
raise Exception, "macro has no params?"
2012-04-26 19:14:46 +00:00
# write out a comment showing the original line
2013-01-10 21:42:43 +00:00
if show_original_lines:
sys.stdout.write("; original_line: " + original_line)
2012-04-26 19:14:46 +00:00
# "db" is a macro because of TextEndingCommand
# rgbasm can handle "db" so no preprocessing is required
# (don't check its param count)
if macro.macro_name == "db" and macro in [TextEndingCommand, ItemFragment]:
sys.stdout.write(original_line)
return
2012-04-26 19:14:46 +00:00
# certain macros don't need an initial byte written
# do: all scripting macros
# don't: signpost, warp_def, person_event, xy_trigger
if not macro.override_byte_check:
sys.stdout.write("db ${0:02X}\n".format(macro.id))
2012-04-26 19:14:46 +00:00
# --- long-winded sanity check goes here ---
if do_macro_sanity_check:
# sanity check... this won't work because PointerLabelBeforeBank shows
# up as two params, so these two lengths will always be different.
#assert len(params) == len(macro.param_types), \
# "mismatched number of parameters on this line: " + \
# original_line
# v2 sanity check :) although it sorta sucks that this loop happens twice?
allowed_length = 0
for (index, param_type) in macro.param_types.items():
param_klass = param_type["class"]
if param_klass.byte_type == "db":
allowed_length += 1 # just one value
elif param_klass.byte_type == "dw":
if param_klass.size == 2:
allowed_length += 1 # just label
elif param_klass.size == 3:
allowed_length += 2 # bank and label
else:
raise Exception, "dunno what to do with a macro param with a size > 3"
2012-04-26 19:14:46 +00:00
else:
raise Exception, "dunno what to do with this non db/dw macro param: " + \
str(param_klass) + " in line: " + original_line
# sometimes the allowed length can vary
if hasattr(macro, "allowed_lengths"):
allowed_lengths = macro.allowed_lengths + [allowed_length]
else:
allowed_lengths = [allowed_length]
assert len(params) in allowed_lengths, \
"mismatched number of parameters on this line: " + \
original_line
2012-04-26 19:14:46 +00:00
# --- end of ridiculously long sanity check ---
# used for storetext
correction = 0
output = ""
2012-04-26 19:14:46 +00:00
index = 0
while index < len(params):
param_type = macro.param_types[index - correction]
2012-04-26 19:14:46 +00:00
description = param_type["name"]
param_klass = param_type["class"]
byte_type = param_klass.byte_type # db or dw
size = param_klass.size
param = params[index].strip()
2012-04-26 19:14:46 +00:00
# param_klass.to_asm() won't work here because it doesn't
# include db/dw.
# some parameters are really multiple types of bytes
if (byte_type == "dw" and size != 2) or \
(byte_type == "db" and size != 1):
output += ("; " + description + "\n")
if size == 3 and is_based_on(param_klass, "PointerLabelBeforeBank"):
2012-04-26 19:14:46 +00:00
# write the bank first
output += ("db " + param + "\n")
2012-04-26 19:14:46 +00:00
# write the pointer second
output += ("dw " + params[index+1].strip() + "\n")
2012-04-26 19:14:46 +00:00
index += 2
correction += 1
elif size == 3 and is_based_on(param_klass, "PointerLabelAfterBank"):
2012-04-26 19:14:46 +00:00
# write the pointer first
output += ("dw " + param + "\n")
2012-04-26 19:14:46 +00:00
# write the bank second
output += ("db " + params[index+1].strip() + "\n")
2012-04-26 19:14:46 +00:00
index += 2
correction += 1
elif size == 3 and "from_asm" in dir(param_klass):
output += ("db " + param_klass.from_asm(param) + "\n")
index += 1
2012-04-26 19:14:46 +00:00
else:
raise Exception, "dunno what to do with this macro " + \
"param (" + str(param_klass) + ") " + "on this line: " + \
original_line
# or just print out the byte
else:
output += (byte_type + " " + param + " ; " + description + "\n")
2012-04-26 19:14:46 +00:00
index += 1
sys.stdout.write(output)
def read_line(l):
"""Preprocesses a given line of asm."""
2012-11-30 16:26:49 +00:00
# strip comments from asm
asm, comment = separate_comment(l)
# export all labels
if ':' in asm[:asm.find('"')]:
2013-06-26 03:25:50 +00:00
sys.stdout.write('GLOBAL ' + asm.split(':')[0] + '\n')
2012-11-30 15:56:15 +00:00
# expect preprocessed .asm files
if "INCLUDE" in asm:
asm = asm.replace('.asm','.tx')
sys.stdout.write(asm)
2012-11-30 15:56:15 +00:00
2013-03-02 03:39:27 +00:00
# ascii string macro preserves the bytes as ascii (skip the translator)
elif len(asm) > 6 and "ascii " == asm[:6] or "\tascii " == asm[:7]:
2013-03-02 03:39:27 +00:00
asm = asm.replace("ascii", "db", 1)
sys.stdout.write(asm)
# convert text to bytes when a quote appears (not in a comment)
elif "\"" in asm:
sys.stdout.write(quote_translator(asm))
2012-11-30 15:56:15 +00:00
# check against other preprocessor features
else:
macro, token = macro_test(asm)
if macro:
macro_translator(macro, token, asm)
else:
sys.stdout.write(asm)
if comment: sys.stdout.write(comment)
2012-04-26 15:51:37 +00:00
def preprocess(lines=None):
"""Main entry point for the preprocessor."""
2012-11-30 16:26:49 +00:00
if not lines:
# read each line from stdin
lines = (sys.stdin.readlines())
elif not isinstance(lines, list):
# split up the input into individual lines
lines = lines.split("\n")
for l in lines:
read_line(l)
# only run against stdin when not included as a module
if __name__ == "__main__":
preprocess()