simple lexer

This commit is contained in:
Bryan Bishop 2012-04-26 00:31:53 -05:00
parent 2b159a5ebd
commit ad76f259ff
1 changed files with 494 additions and 0 deletions

494
extras/pkmnasm/asmlex.py Normal file
View File

@ -0,0 +1,494 @@
# -*- coding: utf-8 -*-
import ply.lex as lex
import sys, os
FILENAME = '' # Current filename
_tokens = ('STRING', 'NEWLINE', 'LABEL',
'ID', 'COMMA', 'PLUS', 'MINUS', 'LP', 'RP', 'MUL', 'DIV', 'POW',
'UMINUS', 'APO', 'INTEGER', 'ADDR', 'RB', 'LB',
'LOCALLABEL', 'LSHIFT', 'RSHIFT', 'BITWISE_OR', 'BITWISE_AND',
'LOGICAL_NOT', 'BITWISE_COMPLEMENT',
)
reserved_instructions = {
'adc': 'ADC',
'add': 'ADD',
'and': 'AND',
'bit': 'BIT',
'call': 'CALL',
'ccf': 'CCF',
'cp': 'CP',
'cpd': 'CPD',
'cpdr': 'CPDR',
'cpi': 'CPI',
'cpir': 'CPIR',
'cpl': 'CPL',
'daa': 'DAA',
'dec': 'DEC',
'di': 'DI',
'djnz': 'DJNZ',
'ei': 'EI',
'ex': 'EX',
'exx': 'EXX',
'halt': 'HALT',
'im': 'IM',
'in': 'IN',
'inc': 'INC',
'ind': 'IND',
'indr': 'INDR',
'ini': 'INI',
'inir': 'INIR',
'jp': 'JP',
'jr': 'JR',
'ld': 'LD',
'ldd': 'LDD',
'lddr': 'LDDR',
'ldi': 'LDI',
'ldir': 'LDIR',
'neg': 'NEG',
'nop': 'NOP',
'or': 'OR',
'otdr': 'OTDR',
'otir': 'OTIR',
'out': 'OUT',
'outd': 'OUTD',
'outi': 'OUTI',
'pop': 'POP',
'push': 'PUSH',
'res': 'RES',
'ret': 'RET',
'reti': 'RETI',
'retn': 'RETN',
'rl': 'RL',
'rla': 'RLA',
'rlc': 'RLC',
'rlca': 'RLCA',
'rld': 'RLD',
'rr': 'RR',
'rra': 'RRA',
'rrc': 'RRC',
'rrca': 'RRCA',
'rrd': 'RRD',
'rst': 'RST',
'sbc': 'SBC',
'scf': 'SCF',
'set': 'SET',
'sla': 'SLA',
'sll': 'SLL',
'sra': 'SRA',
'srl': 'SRL',
'sub': 'SUB',
'xor': 'XOR',
}
pseudo = { # pseudo ops
'align': 'ALIGN',
'org': 'ORG',
'defb': 'DEFB',
'defm': 'DEFB',
'db' : 'DEFB',
'defs': 'DEFS',
'defw': 'DEFW',
'ds' : 'DEFS',
'dw' : 'DEFW',
'equ': 'EQU',
'proc': 'PROC',
'endp': 'ENDP',
'local': 'LOCAL',
'end': 'END',
'incbin': 'INCBIN'
}
regs8 = {'a': 'A',
'b': 'B', 'c': 'C',
'd': 'D', 'e': 'E',
'h': 'H', 'l': 'L',
'i': 'I', 'r': 'R',
'ixh': 'IXH', 'ixl': 'IXL',
'iyh': 'IYH', 'iyl': 'IYL'
}
regs16 = {
'af': 'AF',
'bc': 'BC',
'de': 'DE',
'hl': 'HL',
'ix': 'IX',
'iy': 'IY',
'sp': 'SP'
}
flags = {
'z' : 'Z',
'nz' : 'NZ',
'nc' : 'NC',
'po' : 'PO',
'pe' : 'PE',
'p' : 'P',
'm' : 'M',
}
preprocessor = {
'init' : '_INIT',
'line' : '_LINE'
}
# List of token names.
_tokens = _tokens \
+ tuple(reserved_instructions.values()) \
+ tuple(pseudo.values()) \
+ tuple(regs8.values()) \
+ tuple(regs16.values()) \
+ tuple(flags.values()) \
+ tuple(preprocessor.values())
def get_uniques(l):
''' Returns a list with no repeated elements.
'''
result = []
for i in l:
if i not in result:
result.append(i)
return result
tokens = get_uniques(_tokens)
class Lexer(object):
''' Own class lexer to allow multiple instances.
This lexer is just a wrapper of the current FILESTACK[-1] lexer
'''
states = (
('preproc', 'exclusive'),
)
# -------------- TOKEN ACTIONS --------------
def __set_lineno(self, value):
''' Setter for lexer.lineno
'''
self.lex.lineno = value
def __get_lineno(self):
''' Getter for lexer.lineno
'''
if self.lex is None:
return 0
return self.lex.lineno
lineno = property(__get_lineno, __set_lineno)
def t_INITIAL_preproc_skip(self, t):
r'[ \t]+'
pass # Ignore whitespaces and tabs
def t_CHAR(self, t):
r"'.'" # A single char
t.value = ord(t.value[1])
t.type = 'INTEGER'
return t
def t_HEXA(self, t):
r'([0-9][0-9a-fA-F]*[hH])|(\$[0-9a-fA-F]+)'
if t.value[0] == '$':
t.value = t.value[1:] # Remove initial '$'
else:
t.value = t.value[:-1] # Remove last 'h'
t.value = int(t.value, 16) # Convert to decimal
t.type = 'INTEGER'
return t
def t_BIN(self, t):
r'(%[01]+)|([01]+[bB])' # A Binary integer
# Note 00B is a 0 binary, but
# 00Bh is a 12 in hex. So this pattern must come
# after HEXA
if t.value[0] == '%':
t.value = t.value[1:] # Remove initial %
else:
t.value = t.value[:-1] # Remove last 'b'
t.value = int(t.value, 2) # Convert to decimal
t.type = 'INTEGER'
return t
def t_INITIAL_preproc_INTEGER(self, t):
r'[0-9]+' # an integer decimal number
t.value = int(t.value)
return t
def t_INITIAL_ID(self, t):
r'[_a-zA-Z.]([.]?[_a-zA-Z0-9\\@\#]+)*[:]?(\\\W)?' # Any identifier
tmp = t.value # Saves original value
if tmp[-1] == ':':
t.type = 'LABEL'
t.value = tmp[:-1]
return t
if tmp[0] == "." and (tmp[-2:] == "\@" or tmp[-3:] == "\@:"):
t.type = "LOCALLABEL"
t.value = tmp[1:]
return t
t.value = tmp.upper() # Convert it to uppercase, since our internal tables uses uppercase
id = tmp.lower()
t.type = reserved_instructions.get(id)
if t.type is not None: return t
t.type = pseudo.get(id)
if t.type is not None: return t
t.type = regs8.get(id)
if t.type is not None: return t
t.type = flags.get(id)
if t.type is not None: return t
t.type = regs16.get(id, 'ID')
if t.type == 'ID':
t.value = tmp # Restores original value
return t
def t_preproc_ID(self, t):
r'[_a-zA-Z][_a-zA-Z0-9]*' # preprocessor directives
t.type = preprocessor.get(t.value.lower(), 'ID')
return t
def t_COMMA(self, t):
r','
return t
def t_ADDR(self, t):
r'\$'
return t
def t_LP(self, t):
r'\('
return t
def t_RP(self, t):
r'\)'
return t
def t_RB(self, t):
r'\['
return t
def t_LB(self, t):
r'\]'
return t
def t_LSHIFT(self, t):
r'<<'
return t
def t_RSHIFT(self, t):
r'>>'
return t
def t_BITWISE_OR(self, t):
r'\|'
return t
def t_BITWISE_AND(self, t):
r'\&'
return t
def t_BITWISE_COMPLEMENT(self, t):
r'~'
return t
def t_LOGICAL_NOT(self, t):
r'\!'
return t
def t_PLUS(self, t):
r'\+'
return t
def t_MINUS(self, t):
r'\-'
return t
def t_MUL(self, t):
r'\*'
return t
def t_DIV(self, t):
r'\/'
return t
def t_POW(self, t):
r'\^'
return t
def t_APO(self, t):
r"'"
return t
def t_INITIAL_preproc_STRING(self, t):
r'"[^"]*"' # a doubled quoted string
t.value = t.value[1:-1] # Remove quotes
return t
def t_INITIAL_preproc_error(self, t):
''' error handling rule
'''
self.error("illegal character '%s'" % t.value[0])
def t_INITIAL_preproc_CONTINUE(self, t):
r'\\\r?\n'
t.lexer.lineno += 1
# Allows line breaking
def t_COMMENT(self, t):
r';.*'
# Skip to end of line (except end of line)
def t_INITIAL_preproc_NEWLINE(self, t):
r'\r?\n'
t.lexer.lineno += 1
t.lexer.begin('INITIAL')
return t
def t_INITIAL_SHARP(self, t):
r'\#'
if self.find_column(t) == 1:
t.lexer.begin('preproc')
else:
self.error("illegal character '%s'" % t.value[0])
def __init__(self):
''' Creates a new GLOBAL lexer instance
'''
self.lex = None
self.filestack = [] # Current filename, and line number being parsed
self.input_data = ''
self.tokens = tokens
self.next_token = None # if set to something, this will be returned once
def input(self, str):
''' Defines input string, removing current lexer.
'''
self.input_data = str
self.lex = lex.lex(object = self)
self.lex.input(self.input_data)
def token(self):
return self.lex.token()
def find_column(self, token):
''' Compute column:
- token is a token instance
'''
i = token.lexpos
while i > 0:
if self.input_data[i - 1] == '\n': break
i -= 1
column = token.lexpos - i + 1
return column
def msg(self, str):
''' Prints an error msg.
'''
#print '%s:%i %s' % (FILENAME, self.lex.lineno, str)
print '%s:%s %s' % (FILENAME, "?", str)
def error(self, str):
''' Prints an error msg, and exits.
'''
self.msg('Error: %s' % str)
sys.exit(1)
def warning(self, str):
''' Emmits a warning and continue execution.
'''
self.msg('Warning: %s' % str)
# Needed for states
tmp = lex.lex(object = Lexer(), lextab = 'zxbasmlextab')
if __name__ == '__main__':
FILENAME = sys.argv[1]
tmp.input(open(sys.argv[1]).read())
tok = tmp.token()
while tok:
print tok
tok = tmp.token()