2022-11-03 04:31:26 +00:00
|
|
|
import lexer as lx
|
2023-08-04 16:35:56 +00:00
|
|
|
|
2022-11-03 04:31:26 +00:00
|
|
|
Token = lx.Token
|
|
|
|
|
|
|
|
|
|
|
|
class PLexer:
|
2022-11-18 01:06:07 +00:00
|
|
|
def __init__(self, src: str, filename: str):
|
2022-11-03 04:31:26 +00:00
|
|
|
self.src = src
|
|
|
|
self.filename = filename
|
|
|
|
self.tokens = list(lx.tokenize(self.src, filename=filename))
|
|
|
|
self.pos = 0
|
|
|
|
|
|
|
|
def getpos(self) -> int:
|
|
|
|
# Current position
|
|
|
|
return self.pos
|
|
|
|
|
|
|
|
def eof(self) -> bool:
|
|
|
|
# Are we at EOF?
|
|
|
|
return self.pos >= len(self.tokens)
|
|
|
|
|
|
|
|
def setpos(self, pos: int) -> None:
|
|
|
|
# Reset position
|
|
|
|
assert 0 <= pos <= len(self.tokens), (pos, len(self.tokens))
|
|
|
|
self.pos = pos
|
|
|
|
|
|
|
|
def backup(self) -> None:
|
|
|
|
# Back up position by 1
|
|
|
|
assert self.pos > 0
|
|
|
|
self.pos -= 1
|
|
|
|
|
|
|
|
def next(self, raw: bool = False) -> Token | None:
|
|
|
|
# Return next token and advance position; None if at EOF
|
|
|
|
# TODO: Return synthetic EOF token instead of None?
|
|
|
|
while self.pos < len(self.tokens):
|
|
|
|
tok = self.tokens[self.pos]
|
|
|
|
self.pos += 1
|
|
|
|
if raw or tok.kind != "COMMENT":
|
|
|
|
return tok
|
|
|
|
return None
|
|
|
|
|
|
|
|
def peek(self, raw: bool = False) -> Token | None:
|
|
|
|
# Return next token without advancing position
|
|
|
|
tok = self.next(raw=raw)
|
|
|
|
self.backup()
|
|
|
|
return tok
|
|
|
|
|
|
|
|
def maybe(self, kind: str, raw: bool = False) -> Token | None:
|
|
|
|
# Return next token without advancing position if kind matches
|
|
|
|
tok = self.peek(raw=raw)
|
|
|
|
if tok and tok.kind == kind:
|
|
|
|
return tok
|
|
|
|
return None
|
|
|
|
|
|
|
|
def expect(self, kind: str) -> Token | None:
|
|
|
|
# Return next token and advance position if kind matches
|
|
|
|
tkn = self.next()
|
|
|
|
if tkn is not None:
|
|
|
|
if tkn.kind == kind:
|
|
|
|
return tkn
|
|
|
|
self.backup()
|
|
|
|
return None
|
|
|
|
|
|
|
|
def require(self, kind: str) -> Token:
|
|
|
|
# Return next token and advance position, requiring kind to match
|
|
|
|
tkn = self.next()
|
|
|
|
if tkn is not None and tkn.kind == kind:
|
|
|
|
return tkn
|
2023-08-04 16:35:56 +00:00
|
|
|
raise self.make_syntax_error(
|
|
|
|
f"Expected {kind!r} but got {tkn and tkn.text!r}", tkn
|
|
|
|
)
|
2022-11-03 04:31:26 +00:00
|
|
|
|
|
|
|
def extract_line(self, lineno: int) -> str:
|
|
|
|
# Return source line `lineno` (1-based)
|
|
|
|
lines = self.src.splitlines()
|
|
|
|
if lineno > len(lines):
|
|
|
|
return ""
|
|
|
|
return lines[lineno - 1]
|
|
|
|
|
2023-08-04 16:35:56 +00:00
|
|
|
def make_syntax_error(self, message: str, tkn: Token | None = None) -> SyntaxError:
|
2022-11-03 04:31:26 +00:00
|
|
|
# Construct a SyntaxError instance from message and token
|
|
|
|
if tkn is None:
|
|
|
|
tkn = self.peek()
|
|
|
|
if tkn is None:
|
|
|
|
tkn = self.tokens[-1]
|
2023-08-04 16:35:56 +00:00
|
|
|
return lx.make_syntax_error(
|
|
|
|
message, self.filename, tkn.line, tkn.column, self.extract_line(tkn.line)
|
|
|
|
)
|
2022-11-03 04:31:26 +00:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
import sys
|
2023-08-04 16:35:56 +00:00
|
|
|
|
2022-11-03 04:31:26 +00:00
|
|
|
if sys.argv[1:]:
|
|
|
|
filename = sys.argv[1]
|
|
|
|
if filename == "-c" and sys.argv[2:]:
|
|
|
|
src = sys.argv[2]
|
2022-11-18 01:06:07 +00:00
|
|
|
filename = "<string>"
|
2022-11-03 04:31:26 +00:00
|
|
|
else:
|
|
|
|
with open(filename) as f:
|
|
|
|
src = f.read()
|
|
|
|
else:
|
2022-11-18 01:06:07 +00:00
|
|
|
filename = "<default>"
|
2022-11-03 04:31:26 +00:00
|
|
|
src = "if (x) { x.foo; // comment\n}"
|
|
|
|
p = PLexer(src, filename)
|
|
|
|
while not p.eof():
|
|
|
|
tok = p.next(raw=True)
|
2022-11-18 01:06:07 +00:00
|
|
|
assert tok
|
2022-11-03 04:31:26 +00:00
|
|
|
left = repr(tok)
|
|
|
|
right = lx.to_text([tok]).rstrip()
|
|
|
|
print(f"{left:40.40} {right}")
|