pokecrystal/extras/comparator.py

262 lines
8.0 KiB
Python

#!/usr/bin/python
# -*- coding: utf-8 -*-
# author: Bryan Bishop <kanzure@gmail.com>
# date: 2012-05-29
# purpose: find shared functions between red/crystal
from crystal import get_label_from_line, \
get_address_from_line_comment, \
AsmSection
from romstr import RomStr, AsmList
def load_rom(path):
""" Loads a ROM file into an abbreviated RomStr object.
"""
fh = open(path, "r")
x = RomStr(fh.read())
fh.close()
return x
def load_asm(path):
""" Loads source ASM into an abbreviated AsmList object.
"""
fh = open(path, "r")
x = AsmList(fh.read().split("\n"))
fh.close()
return x
def findall_iter(sub, string):
# url: http://stackoverflow.com/a/3874760/687783
def next_index(length):
index = 0 - length
while True:
index = string.find(sub, index + length)
yield index
return iter(next_index(len(sub)).next, -1)
class Address(int):
""" A simple int wrapper to take 0xFFFF and $FFFF addresses.
"""
def __new__(cls, x=None, *args, **kwargs):
if type(x) == str:
if "$" in x:
x = x.replace("$", "0x")
if "0x" in str:
instance = int.__new__(cls, int(x, base=16), *args, **kwargs)
else:
msg = "Address.__new__ doesn't know how to parse this string"
raise Exception, msg
else:
instance = int.__new__(cls, x, *args, **kwargs)
return instance
found_blobs = []
class BinaryBlob(object):
""" Stores a label, line number, and addresses of a function from Pokémon
Red. These details can be used to determine whether or not the function was
copied into Pokémon Crystal.
"""
start_address = None
end_address = None
label = None
line_number = None
bytes = None
bank = None
debug = False
locations = None
def __init__(self, start_address=None, end_address=None, label=None, \
debug=None, line_number=None):
if not isinstance(start_address, Address):
start_address = Address(start_address)
if not isinstance(end_address, Address):
end_address = Address(end_address)
assert label != None, "label can't be none"
assert isinstance(label, str), "label must be a string"
assert line_number != None, "line_number must be provided"
self.start_address = start_address
self.end_address = end_address
self.label = label
self.line_number = line_number
self.bytes = []
self.locations = []
self.bank = start_address / 0x4000
if debug != None:
self.debug = debug
self.parse_from_red()
# self.find_in_crystal()
self.find_by_first_bytes()
def __repr__(self):
""" A beautiful poem.
"""
r = "BinaryBlob("
r += "label=\""+self.label+"\", "
r += "start_address="+hex(self.start_address)+", "
r += "size="+str(self.end_address - self.start_address)+", "
locnum = len(self.locations)
if locnum == 1:
r += "located="+hex(self.locations[0])
elif locnum <= 5:
r += "located="+str([hex(x) for x in self.locations])
else:
r += "located="+str(locnum)
r += ")"
return r
def __str__(self):
return self.__repr__()
def parse_from_red(self):
""" Reads bytes from Pokémon Red and stores them.
"""
self.bytes = redrom[self.start_address : self.end_address + 1]
def pretty_bytes(self):
""" Returns a better looking range of bytes.
"""
bytes = redrom.interval(self.start_address, \
self.end_address - self.start_address, \
strings=False, debug=True)
return bytes
def find_in_crystal(self):
""" Checks whether or not the bytes appear in Pokémon Crystal.
"""
finditer = findall_iter(self.bytes, cryrom)
self.locations = [match for match in finditer]
if len(self.locations) > 0:
found_blobs.append(self)
if self.debug:
print self.label + ": found " + str(len(self.locations)) + " matches."
def find_by_first_bytes(self):
""" Finds this blob in Crystal based on the first n bytes.
"""
# how many bytes to match
first_n = 3
# no match
if len(self.bytes) <= first_n:
return
finditer = findall_iter(self.bytes[0:first_n], cryrom)
self.locations = [match for match in finditer]
# filter out locations that suck
self.locations = [i for i in self.locations if abs(self.start_address - i) <= 0x8000]
if len(self.locations) > 0:
found_blobs.append(self)
if self.debug:
print self.label + ": found " + str(len(self.locations)) + " matches."
pokecrystal_rom_path = "../baserom.gbc"
pokecrystal_src_path = "../main.asm"
pokered_rom_path = "../pokered-baserom.gbc"
pokered_src_path = "../pokered-main.asm"
cryrom = load_rom(pokecrystal_rom_path)
crysrc = load_asm(pokecrystal_src_path)
redrom = load_rom(pokered_rom_path)
redsrc = load_asm(pokered_src_path)
def scan_red_asm(bank_stop=3, debug=True):
""" Scans the ASM from Pokémon Red. Finds labels and objects. Does things.
Uses get_label_from_line and get_address_from_line_comment.
"""
# whether or not to show the lines from redsrc
show_lines = False
line_number = 0
current_bank = 0
current_label = None
latest_label = "ignore me"
current_start_address = None
latest_start_address = 0
latest_line = ""
for line in redsrc:
if debug and show_lines:
print "processing a line from red: " + line
if line[0:7] == "SECTION":
thing = AsmSection(line)
current_bank = thing.bank_id
if debug:
print "scan_red_asm: switching to bank " + str(current_bank)
elif line[0:6] != "INCBIN":
if ":" in line and not ";XXX:" in line and not " ; XXX:" in line:
current_label = get_label_from_line(line)
current_start_address = get_address_from_line_comment(line, \
bank=current_bank)
if current_label != None and current_start_address != None and latest_start_address != None \
and current_start_address != 0 and current_start_address != latest_start_address \
and (current_start_address - latest_start_address) > 1:
if latest_label != None:
if latest_label not in ["Char52", "PokeCenterSignText", "DefaultNamesPlayer", "Unnamed_6a12"]:
blob = BinaryBlob(label=latest_label, \
start_address=latest_start_address, \
end_address=current_start_address, \
line_number=line_number)
if debug:
print "Created a new blob: " + str(blob) + " from line: " + str(latest_line)
latest_label = current_label
latest_start_address = current_start_address
latest_line = line
line_number += 1
if current_bank == bank_stop:
if debug:
print "scan_red_asm: stopping because current_bank >= " + \
str(bank_stop) + " (bank_stop)"
break
scan_red_asm(bank_stop=3)
print "================================"
for blob in found_blobs:
print blob
print "Found " + str(len(found_blobs)) + " possibly copied functions."
print [hex(x) for x in found_blobs[10].locations]