diff --git a/extras b/extras index 016f0206b..0856dee10 160000 --- a/extras +++ b/extras @@ -1 +1 @@ -Subproject commit 016f0206b5029fc83a6200be29b0f980c76dfd90 +Subproject commit 0856dee10ad124a6f313492561ed1c2a2df74abd diff --git a/preprocessor.py b/preprocessor.py index e529fe043..acf296e91 100644 --- a/preprocessor.py +++ b/preprocessor.py @@ -3,6 +3,8 @@ import sys +import extras.pokemontools.preprocessor as preprocessor + from extras.pokemontools.crystal import ( command_classes, Warp, @@ -33,610 +35,11 @@ macros += movement_command_classes macros += music_classes macros += effect_classes -# show lines before preprocessing in stdout -show_original_lines = False - -# helpful for debugging macros -do_macro_sanity_check = False - -class SkippableMacro(object): - macro_name = "db" - -chars = { -"ガ": 0x05, -"ギ": 0x06, -"グ": 0x07, -"ゲ": 0x08, -"ゴ": 0x09, -"ザ": 0x0A, -"ジ": 0x0B, -"ズ": 0x0C, -"ゼ": 0x0D, -"ゾ": 0x0E, -"ダ": 0x0F, -"ヂ": 0x10, -"ヅ": 0x11, -"デ": 0x12, -"ド": 0x13, -"バ": 0x19, -"ビ": 0x1A, -"ブ": 0x1B, -"ボ": 0x1C, -"が": 0x26, -"ぎ": 0x27, -"ぐ": 0x28, -"げ": 0x29, -"ご": 0x2A, -"ざ": 0x2B, -"じ": 0x2C, -"ず": 0x2D, -"ぜ": 0x2E, -"ぞ": 0x2F, -"だ": 0x30, -"ぢ": 0x31, -"づ": 0x32, -"で": 0x33, -"ど": 0x34, -"ば": 0x3A, -"び": 0x3B, -"ぶ": 0x3C, -"べ": 0x3D, -"ぼ": 0x3E, -"パ": 0x40, -"ピ": 0x41, -"プ": 0x42, -"ポ": 0x43, -"ぱ": 0x44, -"ぴ": 0x45, -"ぷ": 0x46, -"ぺ": 0x47, -"ぽ": 0x48, -"ア": 0x80, -"イ": 0x81, -"ウ": 0x82, -"エ": 0x83, -"ォ": 0x84, -"カ": 0x85, -"キ": 0x86, -"ク": 0x87, -"ケ": 0x88, -"コ": 0x89, -"サ": 0x8A, -"シ": 0x8B, -"ス": 0x8C, -"セ": 0x8D, -"ソ": 0x8E, -"タ": 0x8F, -"チ": 0x90, -"ツ": 0x91, -"テ": 0x92, -"ト": 0x93, -"ナ": 0x94, -"ニ": 0x95, -"ヌ": 0x96, -"ネ": 0x97, -"ノ": 0x98, -"ハ": 0x99, -"ヒ": 0x9A, -"フ": 0x9B, -"ホ": 0x9C, -"マ": 0x9D, -"ミ": 0x9E, -"ム": 0x9F, -"メ": 0xA0, -"モ": 0xA1, -"ヤ": 0xA2, -"ユ": 0xA3, -"ヨ": 0xA4, -"ラ": 0xA5, -"ル": 0xA6, -"レ": 0xA7, -"ロ": 0xA8, -"ワ": 0xA9, -"ヲ": 0xAA, -"ン": 0xAB, -"ッ": 0xAC, -"ャ": 0xAD, -"ュ": 0xAE, -"ョ": 0xAF, -"ィ": 0xB0, -"あ": 0xB1, -"い": 0xB2, -"う": 0xB3, -"え": 0xB4, -"お": 0xB5, -"か": 0xB6, -"き": 0xB7, -"く": 0xB8, -"け": 0xB9, -"こ": 0xBA, -"さ": 0xBB, -"し": 0xBC, -"す": 0xBD, -"せ": 0xBE, -"そ": 0xBF, -"た": 0xC0, -"ち": 0xC1, -"つ": 0xC2, -"て": 0xC3, -"と": 0xC4, -"な": 0xC5, -"に": 0xC6, -"ぬ": 0xC7, -"ね": 0xC8, -"の": 0xC9, -"は": 0xCA, -"ひ": 0xCB, -"ふ": 0xCC, -"へ": 0xCD, -"ほ": 0xCE, -"ま": 0xCF, -"み": 0xD0, -"む": 0xD1, -"め": 0xD2, -"も": 0xD3, -"や": 0xD4, -"ゆ": 0xD5, -"よ": 0xD6, -"ら": 0xD7, -"り": 0xD8, -"る": 0xD9, -"れ": 0xDA, -"ろ": 0xDB, -"わ": 0xDC, -"を": 0xDD, -"ん": 0xDE, -"っ": 0xDF, -"ゃ": 0xE0, -"ゅ": 0xE1, -"ょ": 0xE2, -"ー": 0xE3, -"ァ": 0xE9, - -"@": 0x50, -"#": 0x54, -"…": 0x75, - -"┌": 0x79, -"─": 0x7A, -"┐": 0x7B, -"│": 0x7C, -"└": 0x7D, -"┘": 0x7E, - -"№": 0x74, - -" ": 0x7F, -"A": 0x80, -"B": 0x81, -"C": 0x82, -"D": 0x83, -"E": 0x84, -"F": 0x85, -"G": 0x86, -"H": 0x87, -"I": 0x88, -"J": 0x89, -"K": 0x8A, -"L": 0x8B, -"M": 0x8C, -"N": 0x8D, -"O": 0x8E, -"P": 0x8F, -"Q": 0x90, -"R": 0x91, -"S": 0x92, -"T": 0x93, -"U": 0x94, -"V": 0x95, -"W": 0x96, -"X": 0x97, -"Y": 0x98, -"Z": 0x99, -"(": 0x9A, -")": 0x9B, -":": 0x9C, -";": 0x9D, -"[": 0x9E, -"]": 0x9F, -"a": 0xA0, -"b": 0xA1, -"c": 0xA2, -"d": 0xA3, -"e": 0xA4, -"f": 0xA5, -"g": 0xA6, -"h": 0xA7, -"i": 0xA8, -"j": 0xA9, -"k": 0xAA, -"l": 0xAB, -"m": 0xAC, -"n": 0xAD, -"o": 0xAE, -"p": 0xAF, -"q": 0xB0, -"r": 0xB1, -"s": 0xB2, -"t": 0xB3, -"u": 0xB4, -"v": 0xB5, -"w": 0xB6, -"x": 0xB7, -"y": 0xB8, -"z": 0xB9, -"Ä": 0xC0, -"Ö": 0xC1, -"Ü": 0xC2, -"ä": 0xC3, -"ö": 0xC4, -"ü": 0xC5, -"'d": 0xD0, -"'l": 0xD1, -"'m": 0xD2, -"'r": 0xD3, -"'s": 0xD4, -"'t": 0xD5, -"'v": 0xD6, -"'": 0xE0, -"-": 0xE3, -"?": 0xE6, -"!": 0xE7, -".": 0xE8, -"&": 0xE9, -"é": 0xEA, -"→": 0xEB, -"▷": 0xEC, -"▶": 0xED, -"▼": 0xEE, -"♂": 0xEF, -"¥": 0xF0, -"×": 0xF1, -"/": 0xF3, -",": 0xF4, -"♀": 0xF5, -"0": 0xF6, -"1": 0xF7, -"2": 0xF8, -"3": 0xF9, -"4": 0xFA, -"5": 0xFB, -"6": 0xFC, -"7": 0xFD, -"8": 0xFE, -"9": 0xFF -} - -def separate_comment(l): +def preprocess(macros): """ - Separates asm and comments on a single line. + Entry point for the preprocessor. """ - in_quotes = False - for i in xrange(len(l)): - if not in_quotes: - if l[i] == ";": - break - if l[i] == "\"": - in_quotes = not in_quotes - return l[:i], l[i:] or None - -def quote_translator(asm): - """ - Writes asm with quoted text translated into bytes. - """ - - # split by quotes - asms = asm.split('"') - - # skip asm that actually does use ASCII in quotes - if "SECTION" in asms[0]\ - or "INCBIN" in asms[0]\ - or "INCLUDE" in asms[0]: - return asm - - print_macro = False - if asms[0].strip() == 'print': - asms[0] = asms[0].replace('print','db 0,') - print_macro = True - - output = '' - even = False - for token in asms: - if even: - characters = [] - # token is a string to convert to byte values - while len(token): - # read a single UTF-8 codepoint - char = token[0] - if ord(char) < 0xc0: - token = token[1:] - # certain apostrophe-letter pairs are considered a single character - if char == "'" and token: - if token[0] in 'dlmrstv': - char += token[0] - token = token[1:] - elif ord(char) < 0xe0: - char = char + token[1:2] - token = token[2:] - elif ord(char) < 0xf0: - char = char + token[1:3] - token = token[3:] - elif ord(char) < 0xf8: - char = char + token[1:4] - token = token[4:] - elif ord(char) < 0xfc: - char = char + token[1:5] - token = token[5:] - else: - char = char + token[1:6] - token = token[6:] - characters += [char] - - if print_macro: - line = 0 - while len(characters): - last_char = 1 - if len(characters) > 18 and characters[-1] != '@': - for i, char in enumerate(characters): - last_char = i + 1 - if ' ' not in characters[i+1:18]: break - output += ", ".join("${0:02X}".format(chars[char]) for char in characters[:last_char-1]) - if characters[last_char-1] != " ": - output += ", ${0:02X}".format(characters[last_char-1]) - if not line & 1: - line_ending = 0x4f - else: - line_ending = 0x51 - output += ", ${0:02X}".format(line_ending) - line += 1 - else: - output += ", ".join(["${0:02X}".format(chars[char]) for char in characters[:last_char]]) - characters = characters[last_char:] - if len(characters): output += ", " - # end text - line_ending = 0x57 - output += ", ${0:02X}".format(line_ending) - - output += ", ".join(["${0:02X}".format(chars[char]) for char in characters]) - - else: - output += token - - even = not even - - return output - -def extract_token(asm): - return asm.split(" ")[0].strip() - -def make_macro_table(macros): - return dict(((macro.macro_name, macro) for macro in macros)) - -def macro_test(asm, macro_table): - """ - Returns a matching macro, or None/False. - """ - # macros are determined by the first symbol on the line - token = extract_token(asm) - # check against all names - if token in macro_table: - return (macro_table[token], token) - else: - return (None, None) - -def is_based_on(something, base): - """ - Checks whether or not 'something' is a class that is a subclass of a class - by name. This is a terrible hack but it removes a direct dependency on - existing macros. - - Used by macro_translator. - """ - options = [str(klass.__name__) for klass in something.__bases__] - options += [something.__name__] - return (base in options) - -def macro_translator(macro, token, line, skippable_macros): - """ - Converts a line with a macro into a rgbasm-compatible line. - """ - - assert macro.macro_name == token, "macro/token mismatch" - - original_line = line - - # remove trailing newline - if line[-1] == "\n": - line = line[:-1] - else: - original_line += "\n" - - # remove first tab - has_tab = False - if line[0] == "\t": - has_tab = True - line = line[1:] - - # remove duplicate whitespace (also trailing) - line = " ".join(line.split()) - - params = [] - - # check if the line has params - if " " in line: - # split the line into separate parameters - params = line.replace(token, "").split(",") - - # check if there are no params (redundant) - if len(params) == 1 and params[0] == "": - raise Exception, "macro has no params?" - - # write out a comment showing the original line - if show_original_lines: - sys.stdout.write("; original_line: " + original_line) - - # "db" is a macro because of SkippableMacro - # rgbasm can handle "db" so no preprocessing is required - # (don't check its param count) - if macro.__name__ in skippable_macros or (macro.macro_name == "db" and macro in skippable_macros): - sys.stdout.write(original_line) - return - - # certain macros don't need an initial byte written - # do: all scripting macros - # don't: signpost, warp_def, person_event, xy_trigger - if not macro.override_byte_check: - sys.stdout.write("db ${0:02X}\n".format(macro.id)) - - # --- long-winded sanity check goes here --- - - if do_macro_sanity_check: - - # sanity check... this won't work because PointerLabelBeforeBank shows - # up as two params, so these two lengths will always be different. - #assert len(params) == len(macro.param_types), \ - # "mismatched number of parameters on this line: " + \ - # original_line - - # v2 sanity check :) although it sorta sucks that this loop happens twice? - allowed_length = 0 - for (index, param_type) in macro.param_types.items(): - param_klass = param_type["class"] - - if param_klass.byte_type == "db": - allowed_length += 1 # just one value - elif param_klass.byte_type == "dw": - if param_klass.size == 2: - allowed_length += 1 # just label - elif param_klass.size == 3: - allowed_length += 2 # bank and label - else: - raise Exception, "dunno what to do with a macro param with a size > 3" - else: - raise Exception, "dunno what to do with this non db/dw macro param: " + \ - str(param_klass) + " in line: " + original_line - - # sometimes the allowed length can vary - if hasattr(macro, "allowed_lengths"): - allowed_lengths = macro.allowed_lengths + [allowed_length] - else: - allowed_lengths = [allowed_length] - - assert len(params) in allowed_lengths, \ - "mismatched number of parameters on this line: " + \ - original_line - - # --- end of ridiculously long sanity check --- - - # used for storetext - correction = 0 - - output = "" - - index = 0 - while index < len(params): - try: - param_type = macro.param_types[index - correction] - except KeyError as exception: - raise Exception("line is: " + str(line) + " and macro is: " + str(macro)) - description = param_type["name"] - param_klass = param_type["class"] - byte_type = param_klass.byte_type # db or dw - size = param_klass.size - param = params[index].strip() - - # param_klass.to_asm() won't work here because it doesn't - # include db/dw. - - # some parameters are really multiple types of bytes - if (byte_type == "dw" and size != 2) or \ - (byte_type == "db" and size != 1): - - output += ("; " + description + "\n") - - if size == 3 and is_based_on(param_klass, "PointerLabelBeforeBank"): - # write the bank first - output += ("db " + param + "\n") - # write the pointer second - output += ("dw " + params[index+1].strip() + "\n") - index += 2 - correction += 1 - elif size == 3 and is_based_on(param_klass, "PointerLabelAfterBank"): - # write the pointer first - output += ("dw " + param + "\n") - # write the bank second - output += ("db " + params[index+1].strip() + "\n") - index += 2 - correction += 1 - elif size == 3 and "from_asm" in dir(param_klass): - output += ("db " + param_klass.from_asm(param) + "\n") - index += 1 - else: - raise Exception, "dunno what to do with this macro " + \ - "param (" + str(param_klass) + ") " + "on this line: " + \ - original_line - - # or just print out the byte - else: - output += (byte_type + " " + param + " ; " + description + "\n") - - index += 1 - - sys.stdout.write(output) - -def read_line(l, skippable_macros, macro_table): - """Preprocesses a given line of asm.""" - - # strip comments from asm - asm, comment = separate_comment(l) - - # export all labels - if ':' in asm[:asm.find('"')]: - sys.stdout.write('GLOBAL ' + asm.split(':')[0] + '\n') - - # expect preprocessed .asm files - if "INCLUDE" in asm: - asm = asm.replace('.asm','.tx') - sys.stdout.write(asm) - - # ascii string macro preserves the bytes as ascii (skip the translator) - elif len(asm) > 6 and "ascii " == asm[:6] or "\tascii " == asm[:7]: - asm = asm.replace("ascii", "db", 1) - sys.stdout.write(asm) - - # convert text to bytes when a quote appears (not in a comment) - elif "\"" in asm: - sys.stdout.write(quote_translator(asm)) - - # check against other preprocessor features - else: - macro, token = macro_test(asm, macro_table) - if macro: - macro_translator(macro, token, asm, skippable_macros) - else: - sys.stdout.write(asm) - - if comment: sys.stdout.write(comment) - -def preprocess(macros, skippable_macros=None, lines=None): - """Main entry point for the preprocessor.""" - if skippable_macros == None: - skippable_macros = [SkippableMacro] - - macro_table = make_macro_table(list(set(macros + skippable_macros))) - - # HACK for pokecrystal. Must be after make_macro_table call. - skippable_macros += ["TextEndingCommand"] - - if not lines: - # read each line from stdin - lines = (sys.stdin.readlines()) - elif not isinstance(lines, list): - # split up the input into individual lines - lines = lines.split("\n") - - for l in lines: - read_line(l, skippable_macros, macro_table) + return preprocessor.preprocess(macros) # only run against stdin when not included as a module if __name__ == "__main__":