2020-04-22 22:29:27 +00:00
|
|
|
#!/usr/bin/env python3.8
|
|
|
|
|
|
|
|
"""pegen -- PEG Generator.
|
|
|
|
|
|
|
|
Search the web for PEG Parsers for reference.
|
|
|
|
"""
|
|
|
|
|
|
|
|
import argparse
|
|
|
|
import sys
|
|
|
|
import time
|
|
|
|
import token
|
|
|
|
import traceback
|
2020-04-28 12:11:55 +00:00
|
|
|
from typing import Tuple
|
|
|
|
|
2023-08-29 11:23:22 +00:00
|
|
|
from pegen.grammar import Grammar
|
|
|
|
from pegen.parser import Parser
|
|
|
|
from pegen.parser_generator import ParserGenerator
|
|
|
|
from pegen.tokenizer import Tokenizer
|
2020-12-26 19:11:29 +00:00
|
|
|
from pegen.validator import validate_grammar
|
2020-04-28 12:11:55 +00:00
|
|
|
|
|
|
|
|
|
|
|
def generate_c_code(
|
|
|
|
args: argparse.Namespace,
|
|
|
|
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
|
|
|
|
from pegen.build import build_c_parser_and_generator
|
|
|
|
|
|
|
|
verbose = args.verbose
|
|
|
|
verbose_tokenizer = verbose >= 3
|
|
|
|
verbose_parser = verbose == 2 or verbose >= 4
|
|
|
|
try:
|
|
|
|
grammar, parser, tokenizer, gen = build_c_parser_and_generator(
|
|
|
|
args.grammar_filename,
|
|
|
|
args.tokens_filename,
|
|
|
|
args.output,
|
|
|
|
args.compile_extension,
|
|
|
|
verbose_tokenizer,
|
|
|
|
verbose_parser,
|
|
|
|
args.verbose,
|
|
|
|
keep_asserts_in_extension=False if args.optimized else True,
|
|
|
|
skip_actions=args.skip_actions,
|
|
|
|
)
|
|
|
|
return grammar, parser, tokenizer, gen
|
|
|
|
except Exception as err:
|
|
|
|
if args.verbose:
|
|
|
|
raise # Show traceback
|
|
|
|
traceback.print_exception(err.__class__, err, None)
|
|
|
|
sys.stderr.write("For full traceback, use -v\n")
|
|
|
|
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
def generate_python_code(
|
|
|
|
args: argparse.Namespace,
|
|
|
|
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
|
|
|
|
from pegen.build import build_python_parser_and_generator
|
|
|
|
|
|
|
|
verbose = args.verbose
|
|
|
|
verbose_tokenizer = verbose >= 3
|
|
|
|
verbose_parser = verbose == 2 or verbose >= 4
|
|
|
|
try:
|
|
|
|
grammar, parser, tokenizer, gen = build_python_parser_and_generator(
|
|
|
|
args.grammar_filename,
|
|
|
|
args.output,
|
|
|
|
verbose_tokenizer,
|
|
|
|
verbose_parser,
|
|
|
|
skip_actions=args.skip_actions,
|
|
|
|
)
|
|
|
|
return grammar, parser, tokenizer, gen
|
|
|
|
except Exception as err:
|
|
|
|
if args.verbose:
|
|
|
|
raise # Show traceback
|
|
|
|
traceback.print_exception(err.__class__, err, None)
|
|
|
|
sys.stderr.write("For full traceback, use -v\n")
|
|
|
|
sys.exit(1)
|
|
|
|
|
2020-04-22 22:29:27 +00:00
|
|
|
|
|
|
|
argparser = argparse.ArgumentParser(
|
|
|
|
prog="pegen", description="Experimental PEG-like parser generator"
|
|
|
|
)
|
|
|
|
argparser.add_argument("-q", "--quiet", action="store_true", help="Don't print the parsed grammar")
|
|
|
|
argparser.add_argument(
|
|
|
|
"-v",
|
|
|
|
"--verbose",
|
|
|
|
action="count",
|
|
|
|
default=0,
|
|
|
|
help="Print timing stats; repeat for more debug output",
|
|
|
|
)
|
2020-04-28 12:11:55 +00:00
|
|
|
subparsers = argparser.add_subparsers(help="target language for the generated code")
|
|
|
|
|
|
|
|
c_parser = subparsers.add_parser("c", help="Generate C code for inclusion into CPython")
|
|
|
|
c_parser.set_defaults(func=generate_c_code)
|
|
|
|
c_parser.add_argument("grammar_filename", help="Grammar description")
|
|
|
|
c_parser.add_argument("tokens_filename", help="Tokens description")
|
|
|
|
c_parser.add_argument(
|
|
|
|
"-o", "--output", metavar="OUT", default="parse.c", help="Where to write the generated parser"
|
2020-04-22 22:29:27 +00:00
|
|
|
)
|
2020-04-28 12:11:55 +00:00
|
|
|
c_parser.add_argument(
|
2020-04-22 22:29:27 +00:00
|
|
|
"--compile-extension",
|
|
|
|
action="store_true",
|
|
|
|
help="Compile generated C code into an extension module",
|
|
|
|
)
|
2020-04-28 12:11:55 +00:00
|
|
|
c_parser.add_argument(
|
|
|
|
"--optimized", action="store_true", help="Compile the extension in optimized mode"
|
|
|
|
)
|
|
|
|
c_parser.add_argument(
|
2021-08-12 16:37:30 +00:00
|
|
|
"--skip-actions",
|
|
|
|
action="store_true",
|
|
|
|
help="Suppress code emission for rule actions",
|
2020-04-28 12:11:55 +00:00
|
|
|
)
|
|
|
|
|
2024-06-03 07:52:35 +00:00
|
|
|
python_parser = subparsers.add_parser(
|
|
|
|
"python",
|
|
|
|
help="Generate Python code, needs grammar definition with Python actions",
|
|
|
|
)
|
2020-04-28 12:11:55 +00:00
|
|
|
python_parser.set_defaults(func=generate_python_code)
|
|
|
|
python_parser.add_argument("grammar_filename", help="Grammar description")
|
|
|
|
python_parser.add_argument(
|
2020-04-22 22:29:27 +00:00
|
|
|
"-o",
|
|
|
|
"--output",
|
|
|
|
metavar="OUT",
|
2020-04-28 12:11:55 +00:00
|
|
|
default="parse.py",
|
|
|
|
help="Where to write the generated parser",
|
2020-04-22 22:29:27 +00:00
|
|
|
)
|
2020-04-28 12:11:55 +00:00
|
|
|
python_parser.add_argument(
|
2021-08-12 16:37:30 +00:00
|
|
|
"--skip-actions",
|
|
|
|
action="store_true",
|
|
|
|
help="Suppress code emission for rule actions",
|
2020-04-22 22:29:27 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def main() -> None:
|
2020-04-23 23:53:29 +00:00
|
|
|
from pegen.testutil import print_memstats
|
|
|
|
|
2020-04-22 22:29:27 +00:00
|
|
|
args = argparser.parse_args()
|
2020-04-28 12:11:55 +00:00
|
|
|
if "func" not in args:
|
|
|
|
argparser.error("Must specify the target language mode ('c' or 'python')")
|
2020-04-22 22:29:27 +00:00
|
|
|
|
2020-04-28 12:11:55 +00:00
|
|
|
t0 = time.time()
|
|
|
|
grammar, parser, tokenizer, gen = args.func(args)
|
|
|
|
t1 = time.time()
|
2020-04-22 22:29:27 +00:00
|
|
|
|
2020-12-26 19:11:29 +00:00
|
|
|
validate_grammar(grammar)
|
|
|
|
|
2020-04-22 22:29:27 +00:00
|
|
|
if not args.quiet:
|
|
|
|
if args.verbose:
|
|
|
|
print("Raw Grammar:")
|
|
|
|
for line in repr(grammar).splitlines():
|
|
|
|
print(" ", line)
|
|
|
|
|
|
|
|
print("Clean Grammar:")
|
|
|
|
for line in str(grammar).splitlines():
|
|
|
|
print(" ", line)
|
|
|
|
|
|
|
|
if args.verbose:
|
|
|
|
print("First Graph:")
|
|
|
|
for src, dsts in gen.first_graph.items():
|
|
|
|
print(f" {src} -> {', '.join(dsts)}")
|
|
|
|
print("First SCCS:")
|
|
|
|
for scc in gen.first_sccs:
|
|
|
|
print(" ", scc, end="")
|
|
|
|
if len(scc) > 1:
|
|
|
|
print(
|
|
|
|
" # Indirectly left-recursive; leaders:",
|
|
|
|
{name for name in scc if grammar.rules[name].leader},
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
name = next(iter(scc))
|
|
|
|
if name in gen.first_graph[name]:
|
|
|
|
print(" # Left-recursive")
|
|
|
|
else:
|
|
|
|
print()
|
|
|
|
|
|
|
|
if args.verbose:
|
|
|
|
dt = t1 - t0
|
|
|
|
diag = tokenizer.diagnose()
|
|
|
|
nlines = diag.end[0]
|
|
|
|
if diag.type == token.ENDMARKER:
|
|
|
|
nlines -= 1
|
|
|
|
print(f"Total time: {dt:.3f} sec; {nlines} lines", end="")
|
|
|
|
if dt:
|
|
|
|
print(f"; {nlines / dt:.0f} lines/sec")
|
|
|
|
else:
|
|
|
|
print()
|
|
|
|
print("Caches sizes:")
|
|
|
|
print(f" token array : {len(tokenizer._tokens):10}")
|
|
|
|
print(f" cache : {len(parser._cache):10}")
|
|
|
|
if not print_memstats():
|
|
|
|
print("(Can't find psutil; install it for memory stats.)")
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2020-04-23 23:53:29 +00:00
|
|
|
if sys.version_info < (3, 8):
|
|
|
|
print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr)
|
|
|
|
sys.exit(1)
|
2020-04-22 22:29:27 +00:00
|
|
|
main()
|