mirror of https://github.com/pret/pokecrystal.git
move main preprocessor into pokemontools
Only pokecrystal-specific preprocessor stuff lives in preprocessor.py now. The other stuff lives in pokemontools.
This commit is contained in:
parent
4f685791c1
commit
c32cc3dcb8
2
extras
2
extras
|
@ -1 +1 @@
|
|||
Subproject commit 016f0206b5029fc83a6200be29b0f980c76dfd90
|
||||
Subproject commit 0856dee10ad124a6f313492561ed1c2a2df74abd
|
607
preprocessor.py
607
preprocessor.py
|
@ -3,6 +3,8 @@
|
|||
|
||||
import sys
|
||||
|
||||
import extras.pokemontools.preprocessor as preprocessor
|
||||
|
||||
from extras.pokemontools.crystal import (
|
||||
command_classes,
|
||||
Warp,
|
||||
|
@ -33,610 +35,11 @@ macros += movement_command_classes
|
|||
macros += music_classes
|
||||
macros += effect_classes
|
||||
|
||||
# show lines before preprocessing in stdout
|
||||
show_original_lines = False
|
||||
|
||||
# helpful for debugging macros
|
||||
do_macro_sanity_check = False
|
||||
|
||||
class SkippableMacro(object):
|
||||
macro_name = "db"
|
||||
|
||||
chars = {
|
||||
"ガ": 0x05,
|
||||
"ギ": 0x06,
|
||||
"グ": 0x07,
|
||||
"ゲ": 0x08,
|
||||
"ゴ": 0x09,
|
||||
"ザ": 0x0A,
|
||||
"ジ": 0x0B,
|
||||
"ズ": 0x0C,
|
||||
"ゼ": 0x0D,
|
||||
"ゾ": 0x0E,
|
||||
"ダ": 0x0F,
|
||||
"ヂ": 0x10,
|
||||
"ヅ": 0x11,
|
||||
"デ": 0x12,
|
||||
"ド": 0x13,
|
||||
"バ": 0x19,
|
||||
"ビ": 0x1A,
|
||||
"ブ": 0x1B,
|
||||
"ボ": 0x1C,
|
||||
"が": 0x26,
|
||||
"ぎ": 0x27,
|
||||
"ぐ": 0x28,
|
||||
"げ": 0x29,
|
||||
"ご": 0x2A,
|
||||
"ざ": 0x2B,
|
||||
"じ": 0x2C,
|
||||
"ず": 0x2D,
|
||||
"ぜ": 0x2E,
|
||||
"ぞ": 0x2F,
|
||||
"だ": 0x30,
|
||||
"ぢ": 0x31,
|
||||
"づ": 0x32,
|
||||
"で": 0x33,
|
||||
"ど": 0x34,
|
||||
"ば": 0x3A,
|
||||
"び": 0x3B,
|
||||
"ぶ": 0x3C,
|
||||
"べ": 0x3D,
|
||||
"ぼ": 0x3E,
|
||||
"パ": 0x40,
|
||||
"ピ": 0x41,
|
||||
"プ": 0x42,
|
||||
"ポ": 0x43,
|
||||
"ぱ": 0x44,
|
||||
"ぴ": 0x45,
|
||||
"ぷ": 0x46,
|
||||
"ぺ": 0x47,
|
||||
"ぽ": 0x48,
|
||||
"ア": 0x80,
|
||||
"イ": 0x81,
|
||||
"ウ": 0x82,
|
||||
"エ": 0x83,
|
||||
"ォ": 0x84,
|
||||
"カ": 0x85,
|
||||
"キ": 0x86,
|
||||
"ク": 0x87,
|
||||
"ケ": 0x88,
|
||||
"コ": 0x89,
|
||||
"サ": 0x8A,
|
||||
"シ": 0x8B,
|
||||
"ス": 0x8C,
|
||||
"セ": 0x8D,
|
||||
"ソ": 0x8E,
|
||||
"タ": 0x8F,
|
||||
"チ": 0x90,
|
||||
"ツ": 0x91,
|
||||
"テ": 0x92,
|
||||
"ト": 0x93,
|
||||
"ナ": 0x94,
|
||||
"ニ": 0x95,
|
||||
"ヌ": 0x96,
|
||||
"ネ": 0x97,
|
||||
"ノ": 0x98,
|
||||
"ハ": 0x99,
|
||||
"ヒ": 0x9A,
|
||||
"フ": 0x9B,
|
||||
"ホ": 0x9C,
|
||||
"マ": 0x9D,
|
||||
"ミ": 0x9E,
|
||||
"ム": 0x9F,
|
||||
"メ": 0xA0,
|
||||
"モ": 0xA1,
|
||||
"ヤ": 0xA2,
|
||||
"ユ": 0xA3,
|
||||
"ヨ": 0xA4,
|
||||
"ラ": 0xA5,
|
||||
"ル": 0xA6,
|
||||
"レ": 0xA7,
|
||||
"ロ": 0xA8,
|
||||
"ワ": 0xA9,
|
||||
"ヲ": 0xAA,
|
||||
"ン": 0xAB,
|
||||
"ッ": 0xAC,
|
||||
"ャ": 0xAD,
|
||||
"ュ": 0xAE,
|
||||
"ョ": 0xAF,
|
||||
"ィ": 0xB0,
|
||||
"あ": 0xB1,
|
||||
"い": 0xB2,
|
||||
"う": 0xB3,
|
||||
"え": 0xB4,
|
||||
"お": 0xB5,
|
||||
"か": 0xB6,
|
||||
"き": 0xB7,
|
||||
"く": 0xB8,
|
||||
"け": 0xB9,
|
||||
"こ": 0xBA,
|
||||
"さ": 0xBB,
|
||||
"し": 0xBC,
|
||||
"す": 0xBD,
|
||||
"せ": 0xBE,
|
||||
"そ": 0xBF,
|
||||
"た": 0xC0,
|
||||
"ち": 0xC1,
|
||||
"つ": 0xC2,
|
||||
"て": 0xC3,
|
||||
"と": 0xC4,
|
||||
"な": 0xC5,
|
||||
"に": 0xC6,
|
||||
"ぬ": 0xC7,
|
||||
"ね": 0xC8,
|
||||
"の": 0xC9,
|
||||
"は": 0xCA,
|
||||
"ひ": 0xCB,
|
||||
"ふ": 0xCC,
|
||||
"へ": 0xCD,
|
||||
"ほ": 0xCE,
|
||||
"ま": 0xCF,
|
||||
"み": 0xD0,
|
||||
"む": 0xD1,
|
||||
"め": 0xD2,
|
||||
"も": 0xD3,
|
||||
"や": 0xD4,
|
||||
"ゆ": 0xD5,
|
||||
"よ": 0xD6,
|
||||
"ら": 0xD7,
|
||||
"り": 0xD8,
|
||||
"る": 0xD9,
|
||||
"れ": 0xDA,
|
||||
"ろ": 0xDB,
|
||||
"わ": 0xDC,
|
||||
"を": 0xDD,
|
||||
"ん": 0xDE,
|
||||
"っ": 0xDF,
|
||||
"ゃ": 0xE0,
|
||||
"ゅ": 0xE1,
|
||||
"ょ": 0xE2,
|
||||
"ー": 0xE3,
|
||||
"ァ": 0xE9,
|
||||
|
||||
"@": 0x50,
|
||||
"#": 0x54,
|
||||
"…": 0x75,
|
||||
|
||||
"┌": 0x79,
|
||||
"─": 0x7A,
|
||||
"┐": 0x7B,
|
||||
"│": 0x7C,
|
||||
"└": 0x7D,
|
||||
"┘": 0x7E,
|
||||
|
||||
"№": 0x74,
|
||||
|
||||
" ": 0x7F,
|
||||
"A": 0x80,
|
||||
"B": 0x81,
|
||||
"C": 0x82,
|
||||
"D": 0x83,
|
||||
"E": 0x84,
|
||||
"F": 0x85,
|
||||
"G": 0x86,
|
||||
"H": 0x87,
|
||||
"I": 0x88,
|
||||
"J": 0x89,
|
||||
"K": 0x8A,
|
||||
"L": 0x8B,
|
||||
"M": 0x8C,
|
||||
"N": 0x8D,
|
||||
"O": 0x8E,
|
||||
"P": 0x8F,
|
||||
"Q": 0x90,
|
||||
"R": 0x91,
|
||||
"S": 0x92,
|
||||
"T": 0x93,
|
||||
"U": 0x94,
|
||||
"V": 0x95,
|
||||
"W": 0x96,
|
||||
"X": 0x97,
|
||||
"Y": 0x98,
|
||||
"Z": 0x99,
|
||||
"(": 0x9A,
|
||||
")": 0x9B,
|
||||
":": 0x9C,
|
||||
";": 0x9D,
|
||||
"[": 0x9E,
|
||||
"]": 0x9F,
|
||||
"a": 0xA0,
|
||||
"b": 0xA1,
|
||||
"c": 0xA2,
|
||||
"d": 0xA3,
|
||||
"e": 0xA4,
|
||||
"f": 0xA5,
|
||||
"g": 0xA6,
|
||||
"h": 0xA7,
|
||||
"i": 0xA8,
|
||||
"j": 0xA9,
|
||||
"k": 0xAA,
|
||||
"l": 0xAB,
|
||||
"m": 0xAC,
|
||||
"n": 0xAD,
|
||||
"o": 0xAE,
|
||||
"p": 0xAF,
|
||||
"q": 0xB0,
|
||||
"r": 0xB1,
|
||||
"s": 0xB2,
|
||||
"t": 0xB3,
|
||||
"u": 0xB4,
|
||||
"v": 0xB5,
|
||||
"w": 0xB6,
|
||||
"x": 0xB7,
|
||||
"y": 0xB8,
|
||||
"z": 0xB9,
|
||||
"Ä": 0xC0,
|
||||
"Ö": 0xC1,
|
||||
"Ü": 0xC2,
|
||||
"ä": 0xC3,
|
||||
"ö": 0xC4,
|
||||
"ü": 0xC5,
|
||||
"'d": 0xD0,
|
||||
"'l": 0xD1,
|
||||
"'m": 0xD2,
|
||||
"'r": 0xD3,
|
||||
"'s": 0xD4,
|
||||
"'t": 0xD5,
|
||||
"'v": 0xD6,
|
||||
"'": 0xE0,
|
||||
"-": 0xE3,
|
||||
"?": 0xE6,
|
||||
"!": 0xE7,
|
||||
".": 0xE8,
|
||||
"&": 0xE9,
|
||||
"é": 0xEA,
|
||||
"→": 0xEB,
|
||||
"▷": 0xEC,
|
||||
"▶": 0xED,
|
||||
"▼": 0xEE,
|
||||
"♂": 0xEF,
|
||||
"¥": 0xF0,
|
||||
"×": 0xF1,
|
||||
"/": 0xF3,
|
||||
",": 0xF4,
|
||||
"♀": 0xF5,
|
||||
"0": 0xF6,
|
||||
"1": 0xF7,
|
||||
"2": 0xF8,
|
||||
"3": 0xF9,
|
||||
"4": 0xFA,
|
||||
"5": 0xFB,
|
||||
"6": 0xFC,
|
||||
"7": 0xFD,
|
||||
"8": 0xFE,
|
||||
"9": 0xFF
|
||||
}
|
||||
|
||||
def separate_comment(l):
|
||||
def preprocess(macros):
|
||||
"""
|
||||
Separates asm and comments on a single line.
|
||||
Entry point for the preprocessor.
|
||||
"""
|
||||
in_quotes = False
|
||||
for i in xrange(len(l)):
|
||||
if not in_quotes:
|
||||
if l[i] == ";":
|
||||
break
|
||||
if l[i] == "\"":
|
||||
in_quotes = not in_quotes
|
||||
return l[:i], l[i:] or None
|
||||
|
||||
def quote_translator(asm):
|
||||
"""
|
||||
Writes asm with quoted text translated into bytes.
|
||||
"""
|
||||
|
||||
# split by quotes
|
||||
asms = asm.split('"')
|
||||
|
||||
# skip asm that actually does use ASCII in quotes
|
||||
if "SECTION" in asms[0]\
|
||||
or "INCBIN" in asms[0]\
|
||||
or "INCLUDE" in asms[0]:
|
||||
return asm
|
||||
|
||||
print_macro = False
|
||||
if asms[0].strip() == 'print':
|
||||
asms[0] = asms[0].replace('print','db 0,')
|
||||
print_macro = True
|
||||
|
||||
output = ''
|
||||
even = False
|
||||
for token in asms:
|
||||
if even:
|
||||
characters = []
|
||||
# token is a string to convert to byte values
|
||||
while len(token):
|
||||
# read a single UTF-8 codepoint
|
||||
char = token[0]
|
||||
if ord(char) < 0xc0:
|
||||
token = token[1:]
|
||||
# certain apostrophe-letter pairs are considered a single character
|
||||
if char == "'" and token:
|
||||
if token[0] in 'dlmrstv':
|
||||
char += token[0]
|
||||
token = token[1:]
|
||||
elif ord(char) < 0xe0:
|
||||
char = char + token[1:2]
|
||||
token = token[2:]
|
||||
elif ord(char) < 0xf0:
|
||||
char = char + token[1:3]
|
||||
token = token[3:]
|
||||
elif ord(char) < 0xf8:
|
||||
char = char + token[1:4]
|
||||
token = token[4:]
|
||||
elif ord(char) < 0xfc:
|
||||
char = char + token[1:5]
|
||||
token = token[5:]
|
||||
else:
|
||||
char = char + token[1:6]
|
||||
token = token[6:]
|
||||
characters += [char]
|
||||
|
||||
if print_macro:
|
||||
line = 0
|
||||
while len(characters):
|
||||
last_char = 1
|
||||
if len(characters) > 18 and characters[-1] != '@':
|
||||
for i, char in enumerate(characters):
|
||||
last_char = i + 1
|
||||
if ' ' not in characters[i+1:18]: break
|
||||
output += ", ".join("${0:02X}".format(chars[char]) for char in characters[:last_char-1])
|
||||
if characters[last_char-1] != " ":
|
||||
output += ", ${0:02X}".format(characters[last_char-1])
|
||||
if not line & 1:
|
||||
line_ending = 0x4f
|
||||
else:
|
||||
line_ending = 0x51
|
||||
output += ", ${0:02X}".format(line_ending)
|
||||
line += 1
|
||||
else:
|
||||
output += ", ".join(["${0:02X}".format(chars[char]) for char in characters[:last_char]])
|
||||
characters = characters[last_char:]
|
||||
if len(characters): output += ", "
|
||||
# end text
|
||||
line_ending = 0x57
|
||||
output += ", ${0:02X}".format(line_ending)
|
||||
|
||||
output += ", ".join(["${0:02X}".format(chars[char]) for char in characters])
|
||||
|
||||
else:
|
||||
output += token
|
||||
|
||||
even = not even
|
||||
|
||||
return output
|
||||
|
||||
def extract_token(asm):
|
||||
return asm.split(" ")[0].strip()
|
||||
|
||||
def make_macro_table(macros):
|
||||
return dict(((macro.macro_name, macro) for macro in macros))
|
||||
|
||||
def macro_test(asm, macro_table):
|
||||
"""
|
||||
Returns a matching macro, or None/False.
|
||||
"""
|
||||
# macros are determined by the first symbol on the line
|
||||
token = extract_token(asm)
|
||||
# check against all names
|
||||
if token in macro_table:
|
||||
return (macro_table[token], token)
|
||||
else:
|
||||
return (None, None)
|
||||
|
||||
def is_based_on(something, base):
|
||||
"""
|
||||
Checks whether or not 'something' is a class that is a subclass of a class
|
||||
by name. This is a terrible hack but it removes a direct dependency on
|
||||
existing macros.
|
||||
|
||||
Used by macro_translator.
|
||||
"""
|
||||
options = [str(klass.__name__) for klass in something.__bases__]
|
||||
options += [something.__name__]
|
||||
return (base in options)
|
||||
|
||||
def macro_translator(macro, token, line, skippable_macros):
|
||||
"""
|
||||
Converts a line with a macro into a rgbasm-compatible line.
|
||||
"""
|
||||
|
||||
assert macro.macro_name == token, "macro/token mismatch"
|
||||
|
||||
original_line = line
|
||||
|
||||
# remove trailing newline
|
||||
if line[-1] == "\n":
|
||||
line = line[:-1]
|
||||
else:
|
||||
original_line += "\n"
|
||||
|
||||
# remove first tab
|
||||
has_tab = False
|
||||
if line[0] == "\t":
|
||||
has_tab = True
|
||||
line = line[1:]
|
||||
|
||||
# remove duplicate whitespace (also trailing)
|
||||
line = " ".join(line.split())
|
||||
|
||||
params = []
|
||||
|
||||
# check if the line has params
|
||||
if " " in line:
|
||||
# split the line into separate parameters
|
||||
params = line.replace(token, "").split(",")
|
||||
|
||||
# check if there are no params (redundant)
|
||||
if len(params) == 1 and params[0] == "":
|
||||
raise Exception, "macro has no params?"
|
||||
|
||||
# write out a comment showing the original line
|
||||
if show_original_lines:
|
||||
sys.stdout.write("; original_line: " + original_line)
|
||||
|
||||
# "db" is a macro because of SkippableMacro
|
||||
# rgbasm can handle "db" so no preprocessing is required
|
||||
# (don't check its param count)
|
||||
if macro.__name__ in skippable_macros or (macro.macro_name == "db" and macro in skippable_macros):
|
||||
sys.stdout.write(original_line)
|
||||
return
|
||||
|
||||
# certain macros don't need an initial byte written
|
||||
# do: all scripting macros
|
||||
# don't: signpost, warp_def, person_event, xy_trigger
|
||||
if not macro.override_byte_check:
|
||||
sys.stdout.write("db ${0:02X}\n".format(macro.id))
|
||||
|
||||
# --- long-winded sanity check goes here ---
|
||||
|
||||
if do_macro_sanity_check:
|
||||
|
||||
# sanity check... this won't work because PointerLabelBeforeBank shows
|
||||
# up as two params, so these two lengths will always be different.
|
||||
#assert len(params) == len(macro.param_types), \
|
||||
# "mismatched number of parameters on this line: " + \
|
||||
# original_line
|
||||
|
||||
# v2 sanity check :) although it sorta sucks that this loop happens twice?
|
||||
allowed_length = 0
|
||||
for (index, param_type) in macro.param_types.items():
|
||||
param_klass = param_type["class"]
|
||||
|
||||
if param_klass.byte_type == "db":
|
||||
allowed_length += 1 # just one value
|
||||
elif param_klass.byte_type == "dw":
|
||||
if param_klass.size == 2:
|
||||
allowed_length += 1 # just label
|
||||
elif param_klass.size == 3:
|
||||
allowed_length += 2 # bank and label
|
||||
else:
|
||||
raise Exception, "dunno what to do with a macro param with a size > 3"
|
||||
else:
|
||||
raise Exception, "dunno what to do with this non db/dw macro param: " + \
|
||||
str(param_klass) + " in line: " + original_line
|
||||
|
||||
# sometimes the allowed length can vary
|
||||
if hasattr(macro, "allowed_lengths"):
|
||||
allowed_lengths = macro.allowed_lengths + [allowed_length]
|
||||
else:
|
||||
allowed_lengths = [allowed_length]
|
||||
|
||||
assert len(params) in allowed_lengths, \
|
||||
"mismatched number of parameters on this line: " + \
|
||||
original_line
|
||||
|
||||
# --- end of ridiculously long sanity check ---
|
||||
|
||||
# used for storetext
|
||||
correction = 0
|
||||
|
||||
output = ""
|
||||
|
||||
index = 0
|
||||
while index < len(params):
|
||||
try:
|
||||
param_type = macro.param_types[index - correction]
|
||||
except KeyError as exception:
|
||||
raise Exception("line is: " + str(line) + " and macro is: " + str(macro))
|
||||
description = param_type["name"]
|
||||
param_klass = param_type["class"]
|
||||
byte_type = param_klass.byte_type # db or dw
|
||||
size = param_klass.size
|
||||
param = params[index].strip()
|
||||
|
||||
# param_klass.to_asm() won't work here because it doesn't
|
||||
# include db/dw.
|
||||
|
||||
# some parameters are really multiple types of bytes
|
||||
if (byte_type == "dw" and size != 2) or \
|
||||
(byte_type == "db" and size != 1):
|
||||
|
||||
output += ("; " + description + "\n")
|
||||
|
||||
if size == 3 and is_based_on(param_klass, "PointerLabelBeforeBank"):
|
||||
# write the bank first
|
||||
output += ("db " + param + "\n")
|
||||
# write the pointer second
|
||||
output += ("dw " + params[index+1].strip() + "\n")
|
||||
index += 2
|
||||
correction += 1
|
||||
elif size == 3 and is_based_on(param_klass, "PointerLabelAfterBank"):
|
||||
# write the pointer first
|
||||
output += ("dw " + param + "\n")
|
||||
# write the bank second
|
||||
output += ("db " + params[index+1].strip() + "\n")
|
||||
index += 2
|
||||
correction += 1
|
||||
elif size == 3 and "from_asm" in dir(param_klass):
|
||||
output += ("db " + param_klass.from_asm(param) + "\n")
|
||||
index += 1
|
||||
else:
|
||||
raise Exception, "dunno what to do with this macro " + \
|
||||
"param (" + str(param_klass) + ") " + "on this line: " + \
|
||||
original_line
|
||||
|
||||
# or just print out the byte
|
||||
else:
|
||||
output += (byte_type + " " + param + " ; " + description + "\n")
|
||||
|
||||
index += 1
|
||||
|
||||
sys.stdout.write(output)
|
||||
|
||||
def read_line(l, skippable_macros, macro_table):
|
||||
"""Preprocesses a given line of asm."""
|
||||
|
||||
# strip comments from asm
|
||||
asm, comment = separate_comment(l)
|
||||
|
||||
# export all labels
|
||||
if ':' in asm[:asm.find('"')]:
|
||||
sys.stdout.write('GLOBAL ' + asm.split(':')[0] + '\n')
|
||||
|
||||
# expect preprocessed .asm files
|
||||
if "INCLUDE" in asm:
|
||||
asm = asm.replace('.asm','.tx')
|
||||
sys.stdout.write(asm)
|
||||
|
||||
# ascii string macro preserves the bytes as ascii (skip the translator)
|
||||
elif len(asm) > 6 and "ascii " == asm[:6] or "\tascii " == asm[:7]:
|
||||
asm = asm.replace("ascii", "db", 1)
|
||||
sys.stdout.write(asm)
|
||||
|
||||
# convert text to bytes when a quote appears (not in a comment)
|
||||
elif "\"" in asm:
|
||||
sys.stdout.write(quote_translator(asm))
|
||||
|
||||
# check against other preprocessor features
|
||||
else:
|
||||
macro, token = macro_test(asm, macro_table)
|
||||
if macro:
|
||||
macro_translator(macro, token, asm, skippable_macros)
|
||||
else:
|
||||
sys.stdout.write(asm)
|
||||
|
||||
if comment: sys.stdout.write(comment)
|
||||
|
||||
def preprocess(macros, skippable_macros=None, lines=None):
|
||||
"""Main entry point for the preprocessor."""
|
||||
if skippable_macros == None:
|
||||
skippable_macros = [SkippableMacro]
|
||||
|
||||
macro_table = make_macro_table(list(set(macros + skippable_macros)))
|
||||
|
||||
# HACK for pokecrystal. Must be after make_macro_table call.
|
||||
skippable_macros += ["TextEndingCommand"]
|
||||
|
||||
if not lines:
|
||||
# read each line from stdin
|
||||
lines = (sys.stdin.readlines())
|
||||
elif not isinstance(lines, list):
|
||||
# split up the input into individual lines
|
||||
lines = lines.split("\n")
|
||||
|
||||
for l in lines:
|
||||
read_line(l, skippable_macros, macro_table)
|
||||
return preprocessor.preprocess(macros)
|
||||
|
||||
# only run against stdin when not included as a module
|
||||
if __name__ == "__main__":
|
||||
|
|
Loading…
Reference in New Issue