mirror of https://github.com/pret/pokecrystal.git
515 lines
19 KiB
Python
515 lines
19 KiB
Python
import sys, os, time, datetime, json
|
|
from gbz80disasm import opt_table
|
|
from ctypes import c_int8
|
|
from copy import copy, deepcopy
|
|
from labels import get_label_from_line, get_address_from_line_comment
|
|
|
|
relative_jumps = [0x38, 0x30, 0x20, 0x28, 0x18, 0xc3, 0xda, 0xc2, 0x32]
|
|
relative_unconditional_jumps = [0xc3, 0x18]
|
|
call_commands = [0xdc, 0xd4, 0xc4, 0xcc, 0xcd]
|
|
end_08_scripts_with = [
|
|
0xe9, # jp hl
|
|
0xc9, # ret
|
|
] # possibly also:
|
|
# 0xc3, # jp
|
|
# 0xc18, # jr
|
|
# 0xda, 0xe9, 0xd2, 0xc2, 0xca, 0x38, 0x30, 0x20, 0x28, 0x18, 0xd8,
|
|
# 0xd0, 0xc0, 0xc8, 0xc9
|
|
|
|
spacing = "\t"
|
|
|
|
class RomStr(str):
|
|
""" Simple wrapper to prevent a giant rom from being shown on screen.
|
|
"""
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
if "labels" in kwargs.keys() and kwargs["labels"] == True:
|
|
self.load_labels()
|
|
str.__init__(self)
|
|
|
|
def __repr__(self):
|
|
""" Simplifies this object so that the output doesn't overflow stdout.
|
|
"""
|
|
return "RomStr(too long)"
|
|
|
|
@classmethod
|
|
def load(cls, crystal=True, red=False):
|
|
""" Loads a ROM into a RomStr.
|
|
"""
|
|
if crystal and not red:
|
|
file_handler = open("../baserom.gbc", "r")
|
|
elif red and not crystal:
|
|
file_handler = open("../pokered-baserom.gbc", "r")
|
|
else:
|
|
raise Exception, "not sure which rom to load?"
|
|
bytes = file_handler.read()
|
|
file_handler.close()
|
|
return RomStr(bytes)
|
|
|
|
def load_labels(self, filename="labels.json"):
|
|
""" Loads labels from labels.json, or parses the source code file and
|
|
generates new labels.
|
|
"""
|
|
filename = os.path.join(os.path.dirname(__file__), filename)
|
|
|
|
# blank out the hash
|
|
self.labels = {}
|
|
|
|
# check if the labels file exists
|
|
file_existence = os.path.exists(filename)
|
|
|
|
generate_labels = False
|
|
|
|
# determine if the labels file needs to be regenerated
|
|
if file_existence:
|
|
modified = os.path.getmtime(filename)
|
|
modified = datetime.datetime.fromtimestamp(modified)
|
|
current = datetime.datetime.fromtimestamp(time.time())
|
|
|
|
is_old = (current - modified) > datetime.timedelta(days=3)
|
|
|
|
if is_old:
|
|
generate_labels = True
|
|
else:
|
|
generate_labels = True
|
|
|
|
# scan the asm source code for labels
|
|
if generate_labels:
|
|
asm = open(os.path.join(os.path.dirname(__file__), "../main.asm"), "r").read().split("\n")
|
|
|
|
for line in asm:
|
|
label = get_label_from_line(line)
|
|
|
|
if label:
|
|
address = get_address_from_line_comment(line)
|
|
|
|
self.labels[address] = label
|
|
|
|
content = json.dumps(self.labels)
|
|
file_handler = open(filename, "w")
|
|
file_handler.write(content)
|
|
file_handler.close()
|
|
|
|
# load the labels from the file
|
|
self.labels = json.loads(open(filename, "r").read())
|
|
|
|
def get_address_for(self, label):
|
|
""" Returns the address of a label. This is slow and could be improved
|
|
dramatically.
|
|
"""
|
|
label = str(label)
|
|
for address in self.labels.keys():
|
|
if self.labels[address] == label:
|
|
return address
|
|
return None
|
|
|
|
def length(self):
|
|
""" len(self)
|
|
"""
|
|
return len(self)
|
|
|
|
def len(self):
|
|
""" len(self)
|
|
"""
|
|
return self.length()
|
|
|
|
def interval(self, offset, length, strings=True, debug=True):
|
|
""" returns hex values for the rom starting at offset until
|
|
offset+length
|
|
"""
|
|
returnable = []
|
|
for byte in self[offset:offset+length]:
|
|
if strings:
|
|
returnable.append(hex(ord(byte)))
|
|
else:
|
|
returnable.append(ord(byte))
|
|
return returnable
|
|
|
|
def until(self, offset, byte, strings=True, debug=False):
|
|
""" Returns hex values from rom starting at offset until the given
|
|
byte.
|
|
"""
|
|
return self.interval(offset, self.find(chr(byte), offset) - offset, strings=strings)
|
|
|
|
def to_asm(self, address, end_address=None, size=None, max_size=0x4000, debug=None):
|
|
""" Disassembles ASM at some address. This will stop disassembling when
|
|
either the end_address or size is met. Also, there's a maximum size
|
|
that will be parsed, so that large patches of data aren't parsed as
|
|
code.
|
|
"""
|
|
if type(address) == str and "0x" in address:
|
|
address = int(address, 16)
|
|
|
|
start_address = address
|
|
|
|
if start_address == None:
|
|
raise Exception, "address must be given"
|
|
|
|
if debug == None:
|
|
if not hasattr(self, "debug"):
|
|
debug = False
|
|
else:
|
|
debug = self.debug
|
|
|
|
# this is probably a terrible idea.. why am i doing this?
|
|
if size != None and max_size < size:
|
|
raise Exception, "max_size must be greater than or equal to size"
|
|
elif end_address != None and (end_address - start_address) > max_size:
|
|
raise Exception, "end_address is out of bounds"
|
|
elif end_address != None and size != None:
|
|
if (end_address - start_address) >= size:
|
|
size = end_address - start_address
|
|
else:
|
|
end_address = start_address + size
|
|
elif end_address == None and size != None:
|
|
end_address = start_address + size
|
|
elif end_address != None and size == None:
|
|
size = end_address - start_address
|
|
|
|
return DisAsm(start_address=start_address, end_address=end_address, size=size, max_size=max_size, debug=debug, rom=self)
|
|
|
|
class DisAsm:
|
|
""" z80 disassembler
|
|
"""
|
|
|
|
def __init__(self, start_address=None, end_address=None, size=None, max_size=0x4000, debug=True, rom=None):
|
|
assert start_address != None, "start_address must be given"
|
|
|
|
if rom == None:
|
|
file_handler = open("../baserom.gbc", "r")
|
|
bytes = file_handler.read()
|
|
file_handler.close()
|
|
rom = RomStr(bytes)
|
|
|
|
if debug not in [None, True, False]:
|
|
raise Exception, "debug param is invalid"
|
|
if debug == None:
|
|
debug = False
|
|
|
|
# get end_address and size in sync with each other
|
|
if end_address == None and size != None:
|
|
end_address = start_address + size
|
|
elif end_address != None and size == None:
|
|
size = end_address - start_address
|
|
elif end_address != None and size != None:
|
|
size = max(end_address - start_address, size)
|
|
end_address = start_address + size
|
|
|
|
# check that the bounds make sense
|
|
if end_address != None:
|
|
if end_address <= start_address:
|
|
raise Exception, "end_address is out of bounds"
|
|
elif (end_address - start_address) > max_size:
|
|
raise Exception, "end_address goes beyond max_size"
|
|
|
|
# check more edge cases
|
|
if not start_address >= 0:
|
|
raise Exception, "start_address must be at least 0"
|
|
elif end_address != None and not end_address >= 0:
|
|
raise Exception, "end_address must be at least 0"
|
|
|
|
self.rom = rom
|
|
self.start_address = start_address
|
|
self.end_address = end_address
|
|
self.size = size
|
|
self.max_size = max_size
|
|
self.debug = debug
|
|
|
|
self.parse()
|
|
|
|
def parse(self):
|
|
""" Disassembles stuff and things.
|
|
"""
|
|
|
|
rom = self.rom
|
|
start_address = self.start_address
|
|
end_address = self.end_address
|
|
max_size = self.max_size
|
|
debug = self.debug
|
|
|
|
bank_id = start_address / 0x4000
|
|
|
|
# [{"command": 0x20, "bytes": [0x20, 0x40, 0x50],
|
|
# "asm": "jp $5040", "label": "Unknown5040"}]
|
|
asm_commands = {}
|
|
|
|
offset = start_address
|
|
|
|
last_hl_address = None
|
|
last_a_address = None
|
|
used_3d97 = False
|
|
|
|
keep_reading = True
|
|
|
|
while (end_address != 0 and offset <= end_address) or keep_reading:
|
|
# read the current opcode byte
|
|
current_byte = ord(rom[offset])
|
|
current_byte_number = len(asm_commands.keys())
|
|
|
|
# setup this next/upcoming command
|
|
if offset in asm_commands.keys():
|
|
asm_command = asm_commands[offset]
|
|
else:
|
|
asm_command = {}
|
|
|
|
asm_command["address"] = offset
|
|
|
|
if not "references" in asm_command.keys():
|
|
# This counts how many times relative jumps reference this
|
|
# byte. This is used to determine whether or not to print out a
|
|
# label later.
|
|
asm_command["references"] = 0
|
|
|
|
# some commands have two opcodes
|
|
next_byte = ord(rom[offset+1])
|
|
|
|
if self.debug:
|
|
print "offset: \t\t" + hex(offset)
|
|
print "current_byte: \t\t" + hex(current_byte)
|
|
print "next_byte: \t\t" + hex(next_byte)
|
|
|
|
# all two-byte opcodes also have their first byte in there somewhere
|
|
if (current_byte in opt_table.keys()) or ((current_byte + (next_byte << 8)) in opt_table.keys()):
|
|
# this might be a two-byte opcode
|
|
possible_opcode = current_byte + (next_byte << 8)
|
|
|
|
# check if this is a two-byte opcode
|
|
if possible_opcode in opt_table.keys():
|
|
op_code = possible_opcode
|
|
else:
|
|
op_code = current_byte
|
|
|
|
op = opt_table[op_code]
|
|
|
|
opstr = op[0].lower()
|
|
optype = op[1]
|
|
|
|
if self.debug:
|
|
print "opstr: " + opstr
|
|
|
|
asm_command["type"] = "op"
|
|
asm_command["id"] = op_code
|
|
asm_command["format"] = opstr
|
|
asm_command["opnumberthing"] = optype
|
|
|
|
opstr2 = None
|
|
base_opstr = copy(opstr)
|
|
|
|
if "x" in opstr:
|
|
for x in range(0, opstr.count("x")):
|
|
insertion = ord(rom[offset + 1])
|
|
|
|
# Certain opcodes will have a local relative jump label
|
|
# here instead of a raw hex value, but this is
|
|
# controlled through asm output.
|
|
insertion = "$" + hex(insertion)[2:]
|
|
|
|
opstr = opstr[:opstr.find("x")].lower() + insertion + opstr[opstr.find("x")+1:].lower()
|
|
|
|
if op_code in relative_jumps:
|
|
target_address = offset + 2 + c_int8(ord(rom[offset + 1])).value
|
|
insertion = "asm_" + hex(target_address)
|
|
|
|
if str(target_address) in self.rom.labels.keys():
|
|
insertion = self.rom.labels[str(target_address)]
|
|
|
|
opstr2 = base_opstr[:base_opstr.find("x")].lower() + insertion + base_opstr[base_opstr.find("x")+1:].lower()
|
|
asm_command["formatted_with_labels"] = opstr2
|
|
asm_command["target_address"] = target_address
|
|
|
|
current_byte_number += 1
|
|
offset += 1
|
|
|
|
if "?" in opstr:
|
|
for y in range(0, opstr.count("?")):
|
|
byte1 = ord(rom[offset + 1])
|
|
byte2 = ord(rom[offset + 2])
|
|
|
|
number = byte1
|
|
number += byte2 << 8;
|
|
|
|
# In most cases, you can use a label here. Labels will
|
|
# be shown during asm output.
|
|
insertion = "$%.4x" % (number)
|
|
|
|
opstr = opstr[:opstr.find("?")].lower() + insertion + opstr[opstr.find("?")+1:].lower()
|
|
|
|
# This version of the formatted string has labels. In
|
|
# the future, the actual labels should be parsed
|
|
# straight out of the "main.asm" file.
|
|
target_address = number % 0x4000
|
|
insertion = "asm_" + hex(target_address)
|
|
|
|
if str(target_address) in self.rom.labels.keys():
|
|
insertion = self.rom.labels[str(target_address)]
|
|
|
|
opstr2 = base_opstr[:base_opstr.find("?")].lower() + insertion + base_opstr[base_opstr.find("?")+1:].lower()
|
|
asm_command["formatted_with_labels"] = opstr2
|
|
asm_command["target_address"] = target_address
|
|
|
|
current_byte_number += 2
|
|
offset += 2
|
|
|
|
# Check for relative jumps, construct the formatted asm line.
|
|
# Also set the usage of labels.
|
|
if current_byte in [0x18, 0x20] + relative_jumps: # jr or jr nz
|
|
# generate a label for the byte we're jumping to
|
|
target_address = offset + 1 + c_int8(ord(rom[offset])).value
|
|
|
|
if target_address in asm_commands.keys():
|
|
asm_commands[target_address]["references"] += 1
|
|
remote_label = "asm_" + hex(target_address)
|
|
asm_commands[target_address]["current_label"] = remote_label
|
|
asm_command["remote_label"] = remote_label
|
|
|
|
# Not sure how to set this, can't be True because an
|
|
# address referenced multiple times will use a label
|
|
# despite the label not necessarily being used in the
|
|
# output. The "use_remote_label" values should be
|
|
# calculated when rendering the asm output, based on
|
|
# which addresses and which op codes will be displayed
|
|
# (within the range).
|
|
asm_command["use_remote_label"] = "unknown"
|
|
else:
|
|
remote_label = "asm_" + hex(target_address)
|
|
|
|
# This remote address might not be part of this
|
|
# function.
|
|
asm_commands[target_address] = {
|
|
"references": 1,
|
|
"current_label": remote_label,
|
|
"address": target_address,
|
|
}
|
|
# Also, target_address can be negative (before the
|
|
# start_address that the user originally requested),
|
|
# and it shouldn't be shown on asm output because the
|
|
# intermediate bytes (between a negative target_address
|
|
# and start_address) won't be disassembled.
|
|
|
|
# Don't know yet if this remote address is part of this
|
|
# function or not. When the remote address is not part
|
|
# of this function, the label name should not be used,
|
|
# because that label will not be disassembled in the
|
|
# output, until the user asks it to.
|
|
asm_command["use_remote_label"] = "unknown"
|
|
asm_command["remote_label"] = remote_label
|
|
elif current_byte == 0x3e:
|
|
last_a_address = ord(rom[offset + 1])
|
|
|
|
# store the formatted string for the output later
|
|
asm_command["formatted"] = opstr
|
|
|
|
if current_byte == 0x21:
|
|
last_hl_address = byte1 + (byte2 << 8)
|
|
|
|
# this is leftover from pokered, might be meaningless
|
|
if current_byte == 0xcd:
|
|
if number == 0x3d97:
|
|
used_3d97 = True
|
|
|
|
if current_byte == 0xc3 or current_byte in relative_unconditional_jumps:
|
|
if current_byte == 0xc3:
|
|
if number == 0x3d97:
|
|
used_3d97 = True
|
|
|
|
# stop reading at a jump, relative jump or return
|
|
if current_byte in end_08_scripts_with:
|
|
is_data = False
|
|
|
|
if not self.has_outstanding_labels(asm_commands, offset):
|
|
keep_reading = False
|
|
break
|
|
else:
|
|
keep_reading = True
|
|
else:
|
|
keep_reading = True
|
|
|
|
else:
|
|
# This shouldn't really happen, and means that this area of the
|
|
# ROM probably doesn't represent instructions.
|
|
asm_command["type"] = "data" # db
|
|
asm_command["value"] = current_byte
|
|
keep_reading = False
|
|
|
|
# save this new command in the list
|
|
asm_commands[asm_command["address"]] = asm_command
|
|
|
|
# jump forward by a byte
|
|
offset += 1
|
|
|
|
# also save the last command if necessary
|
|
if len(asm_commands.keys()) > 0 and asm_commands[asm_commands.keys()[-1]] is not asm_command:
|
|
asm_commands[asm_command["address"]] = asm_command
|
|
|
|
# store the set of commands on this object
|
|
self.asm_commands = asm_commands
|
|
|
|
self.end_address = offset + 1
|
|
self.last_address = self.end_address
|
|
|
|
def has_outstanding_labels(self, asm_commands, offset):
|
|
""" Checks if there are any labels that haven't yet been created.
|
|
""" # is this really necessary??
|
|
return False
|
|
|
|
def used_addresses(self):
|
|
""" Returns a list of unique addresses that this function will probably
|
|
call.
|
|
"""
|
|
addresses = set()
|
|
|
|
for (id, command) in self.asm_commands.items():
|
|
if command.has_key("target_address") and command["id"] in call_commands:
|
|
addresses.add(command["target_address"])
|
|
|
|
return addresses
|
|
|
|
def __str__(self):
|
|
""" ASM pretty printer.
|
|
"""
|
|
output = ""
|
|
|
|
for (key, line) in self.asm_commands.items():
|
|
# skip anything from before the beginning
|
|
if key < self.start_address:
|
|
continue
|
|
|
|
# show a label
|
|
if line["references"] > 0 and "current_label" in line.keys():
|
|
if line["address"] == self.start_address:
|
|
output += "thing: ; " + hex(line["address"]) + "\n"
|
|
else:
|
|
output += "." + line["current_label"] + "\@ ; " + hex(line["address"]) + "\n"
|
|
|
|
# show the actual line
|
|
if line.has_key("formatted_with_labels"):
|
|
output += spacing + line["formatted_with_labels"]
|
|
elif line.has_key("formatted"):
|
|
output += spacing + line["formatted"]
|
|
#output += " ; to " +
|
|
output += "\n"
|
|
|
|
# show the next address after this chunk
|
|
output += "; " + hex(self.end_address)
|
|
|
|
return output
|
|
|
|
class AsmList(list):
|
|
""" Simple wrapper to prevent all asm lines from being shown on screen.
|
|
"""
|
|
|
|
def length(self):
|
|
""" len(self)
|
|
"""
|
|
return len(self)
|
|
|
|
def __repr__(self):
|
|
""" Simplifies this object so that the output doesn't overflow stdout.
|
|
"""
|
|
return "AsmList(too long)"
|
|
|
|
if __name__ == "__main__":
|
|
cryrom = RomStr(open("../pokecrystal.gbc", "r").read());
|
|
asm = cryrom.to_asm(sys.argv[1])
|
|
print asm
|