diff --git a/Tools/peg_generator/Makefile b/Tools/peg_generator/Makefile index fb727c048b3..6ad9c91b985 100644 --- a/Tools/peg_generator/Makefile +++ b/Tools/peg_generator/Makefile @@ -22,7 +22,7 @@ data/xxl.py: build: peg_extension/parse.c -peg_extension/parse.c: $(GRAMMAR) $(TOKENS) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen/pegen.c ../../Parser/pegen/parse_string.c ../../Parser/pegen/*.h pegen/grammar_parser.py +peg_extension/parse.c: $(GRAMMAR) $(TOKENS) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen.c ../../Parser/string_parser.c ../../Parser/*.h pegen/grammar_parser.py $(PYTHON) -m pegen -q c $(GRAMMAR) $(TOKENS) -o peg_extension/parse.c --compile-extension clean: @@ -70,18 +70,10 @@ stats: peg_extension/parse.c data/xxl.py time: time_compile time_compile: venv data/xxl.py - $(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl compile + $(VENVPYTHON) scripts/benchmark.py --target=xxl compile time_parse: venv data/xxl.py - $(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl parse - -time_old: time_old_compile - -time_old_compile: venv data/xxl.py - $(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl compile - -time_old_parse: venv data/xxl.py - $(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl parse + $(VENVPYTHON) scripts/benchmark.py --target=xxl parse time_peg_dir: venv $(VENVPYTHON) scripts/test_parse_directory.py \ diff --git a/Tools/peg_generator/pegen/keywordgen.py b/Tools/peg_generator/pegen/keywordgen.py index 279c34b6dae..86849440966 100644 --- a/Tools/peg_generator/pegen/keywordgen.py +++ b/Tools/peg_generator/pegen/keywordgen.py @@ -41,9 +41,7 @@ def main(): "grammar", type=str, help="The file with the grammar definition in PEG format" ) parser.add_argument( - "tokens_file", - type=argparse.FileType("r"), - help="The file with the token definitions" + "tokens_file", type=argparse.FileType("r"), help="The file with the token definitions" ) parser.add_argument( "keyword_file", @@ -61,9 +59,7 @@ def main(): gen.collect_todo() with args.keyword_file as thefile: - all_keywords = sorted( - list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS - ) + all_keywords = sorted(list(gen.callmakervisitor.keyword_cache.keys()) + EXTRA_KEYWORDS) keywords = ",\n ".join(map(repr, all_keywords)) thefile.write(TEMPLATE.format(keywords=keywords)) diff --git a/Tools/peg_generator/scripts/benchmark.py b/Tools/peg_generator/scripts/benchmark.py index af356bed783..5fbedaa3b0e 100644 --- a/Tools/peg_generator/scripts/benchmark.py +++ b/Tools/peg_generator/scripts/benchmark.py @@ -6,13 +6,13 @@ import os from time import time -import _peg_parser - try: import memory_profiler except ModuleNotFoundError: - print("Please run `make venv` to create a virtual environment and install" - " all the dependencies, before running this script.") + print( + "Please run `make venv` to create a virtual environment and install" + " all the dependencies, before running this script." + ) sys.exit(1) sys.path.insert(0, os.getcwd()) @@ -21,13 +21,6 @@ argparser = argparse.ArgumentParser( prog="benchmark", description="Reproduce the various pegen benchmarks" ) -argparser.add_argument( - "--parser", - action="store", - choices=["new", "old"], - default="pegen", - help="Which parser to benchmark (default is pegen)", -) argparser.add_argument( "--target", action="store", @@ -40,12 +33,7 @@ command_compile = subcommands.add_parser( "compile", help="Benchmark parsing and compiling to bytecode" ) -command_parse = subcommands.add_parser( - "parse", help="Benchmark parsing and generating an ast.AST" -) -command_notree = subcommands.add_parser( - "notree", help="Benchmark parsing and dumping the tree" -) +command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST") def benchmark(func): @@ -66,59 +54,37 @@ def wrapper(*args): @benchmark -def time_compile(source, parser): - if parser == "old": - return _peg_parser.compile_string( - source, - oldparser=True, - ) - else: - return _peg_parser.compile_string(source) +def time_compile(source): + return compile(source, "", "exec") @benchmark -def time_parse(source, parser): - if parser == "old": - return _peg_parser.parse_string(source, oldparser=True) - else: - return _peg_parser.parse_string(source) +def time_parse(source): + return ast.parse(source) -@benchmark -def time_notree(source, parser): - if parser == "old": - return _peg_parser.parse_string(source, oldparser=True, ast=False) - else: - return _peg_parser.parse_string(source, ast=False) - - -def run_benchmark_xxl(subcommand, parser, source): +def run_benchmark_xxl(subcommand, source): if subcommand == "compile": - time_compile(source, parser) + time_compile(source) elif subcommand == "parse": - time_parse(source, parser) - elif subcommand == "notree": - time_notree(source, parser) + time_parse(source) -def run_benchmark_stdlib(subcommand, parser): - modes = {"compile": 2, "parse": 1, "notree": 0} +def run_benchmark_stdlib(subcommand): + modes = {"compile": 2, "parse": 1} for _ in range(3): parse_directory( "../../Lib", verbose=False, excluded_files=["*/bad*", "*/lib2to3/tests/data/*",], - tree_arg=0, short=True, mode=modes[subcommand], - oldparser=(parser == "old"), ) def main(): args = argparser.parse_args() subcommand = args.subcommand - parser = args.parser target = args.target if subcommand is None: @@ -127,9 +93,9 @@ def main(): if target == "xxl": with open(os.path.join("data", "xxl.py"), "r") as f: source = f.read() - run_benchmark_xxl(subcommand, parser, source) + run_benchmark_xxl(subcommand, source) elif target == "stdlib": - run_benchmark_stdlib(subcommand, parser) + run_benchmark_stdlib(subcommand) if __name__ == "__main__": diff --git a/Tools/peg_generator/scripts/find_max_nesting.py b/Tools/peg_generator/scripts/find_max_nesting.py index f2fdd00bfb7..92045c93ff7 100755 --- a/Tools/peg_generator/scripts/find_max_nesting.py +++ b/Tools/peg_generator/scripts/find_max_nesting.py @@ -14,8 +14,7 @@ Usage: python -m scripts.find_max_nesting """ import sys - -from _peg_parser import parse_string +import ast GRAMMAR_FILE = "data/python.gram" INITIAL_NESTING_DEPTH = 10 @@ -28,9 +27,8 @@ def check_nested_expr(nesting_depth: int) -> bool: expr = f"{'(' * nesting_depth}0{')' * nesting_depth}" - try: - parse_string(expr) + ast.parse(expr) print(f"Nesting depth of {nesting_depth} is successful") return True except Exception as err: diff --git a/Tools/peg_generator/scripts/show_parse.py b/Tools/peg_generator/scripts/show_parse.py deleted file mode 100755 index b4ee5a1b357..00000000000 --- a/Tools/peg_generator/scripts/show_parse.py +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/env python3.8 - -"""Show the parse tree for a given program, nicely formatted. - -Example: - -$ scripts/show_parse.py a+b -Module( - body=[ - Expr( - value=BinOp( - left=Name(id="a", ctx=Load()), op=Add(), right=Name(id="b", ctx=Load()) - ) - ) - ], - type_ignores=[], -) -$ - -Use -v to show line numbers and column offsets. - -The formatting is done using black. You can also import this module -and call one of its functions. -""" - -import argparse -import ast -import difflib -import os -import sys -import tempfile - -import _peg_parser - -from typing import List - -sys.path.insert(0, os.getcwd()) -from pegen.ast_dump import ast_dump - -parser = argparse.ArgumentParser() -parser.add_argument( - "-d", "--diff", action="store_true", help="show diff between grammar and ast (requires -g)" -) -parser.add_argument( - "-p", - "--parser", - choices=["new", "old"], - default="new", - help="choose the parser to use" -) -parser.add_argument( - "-m", - "--multiline", - action="store_true", - help="concatenate program arguments using newline instead of space", -) -parser.add_argument("-v", "--verbose", action="store_true", help="show line/column numbers") -parser.add_argument("program", nargs="+", help="program to parse (will be concatenated)") - - -def format_tree(tree: ast.AST, verbose: bool = False) -> str: - with tempfile.NamedTemporaryFile("w+") as tf: - tf.write(ast_dump(tree, include_attributes=verbose)) - tf.write("\n") - tf.flush() - cmd = f"black -q {tf.name}" - sts = os.system(cmd) - if sts: - raise RuntimeError(f"Command {cmd!r} failed with status 0x{sts:x}") - tf.seek(0) - return tf.read() - - -def diff_trees(a: ast.AST, b: ast.AST, verbose: bool = False) -> List[str]: - sa = format_tree(a, verbose) - sb = format_tree(b, verbose) - la = sa.splitlines() - lb = sb.splitlines() - return list(difflib.unified_diff(la, lb, "a", "b", lineterm="")) - - -def show_parse(source: str, verbose: bool = False) -> str: - tree = _peg_parser.parse_string(source, oldparser=True) - return format_tree(tree, verbose).rstrip("\n") - - -def print_parse(source: str, verbose: bool = False) -> None: - print(show_parse(source, verbose)) - - -def main() -> None: - args = parser.parse_args() - new_parser = args.parser == "new" - if args.multiline: - sep = "\n" - else: - sep = " " - program = sep.join(args.program) - if new_parser: - tree = _peg_parser.parse_string(program) - - if args.diff: - a = _peg_parser.parse_string(program, oldparser=True) - b = tree - diff = diff_trees(a, b, args.verbose) - if diff: - for line in diff: - print(line) - else: - print("# Trees are the same") - else: - print("# Parsed using the new parser") - print(format_tree(tree, args.verbose)) - else: - tree = _peg_parser.parse_string(program, oldparser=True) - print("# Parsed using the old parser") - print(format_tree(tree, args.verbose)) - - -if __name__ == "__main__": - main() diff --git a/Tools/peg_generator/scripts/test_parse_directory.py b/Tools/peg_generator/scripts/test_parse_directory.py index 63204ce9dc1..d8f4f0ecd3e 100755 --- a/Tools/peg_generator/scripts/test_parse_directory.py +++ b/Tools/peg_generator/scripts/test_parse_directory.py @@ -7,7 +7,6 @@ import time import traceback import tokenize -import _peg_parser from glob import glob from pathlib import PurePath @@ -16,7 +15,6 @@ sys.path.insert(0, os.getcwd()) from pegen.ast_dump import ast_dump from pegen.testutil import print_memstats -from scripts import show_parse SUCCESS = "\033[92m" FAIL = "\033[91m" @@ -40,9 +38,6 @@ argparser.add_argument( "-v", "--verbose", action="store_true", help="Display detailed errors for failures" ) -argparser.add_argument( - "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0 -) def report_status( @@ -79,66 +74,13 @@ def report_status( print(f" {str(error.__class__.__name__)}: {error}") -def compare_trees( - actual_tree: ast.AST, file: str, verbose: bool, include_attributes: bool = False, -) -> int: - with open(file) as f: - expected_tree = _peg_parser.parse_string(f.read(), oldparser=True) - - expected_text = ast_dump(expected_tree, include_attributes=include_attributes) - actual_text = ast_dump(actual_tree, include_attributes=include_attributes) - if actual_text == expected_text: - if verbose: - print("Tree for {file}:") - print(show_parse.format_tree(actual_tree, include_attributes)) - return 0 - - print(f"Diffing ASTs for {file} ...") - - expected = show_parse.format_tree(expected_tree, include_attributes) - actual = show_parse.format_tree(actual_tree, include_attributes) - - if verbose: - print("Expected for {file}:") - print(expected) - print("Actual for {file}:") - print(actual) - print(f"Diff for {file}:") - - diff = show_parse.diff_trees(expected_tree, actual_tree, include_attributes) - for line in diff: - print(line) - - return 1 - - -def parse_file(source: str, file: str, mode: int, oldparser: bool) -> Tuple[Any, float]: +def parse_file(source: str, file: str) -> Tuple[Any, float]: t0 = time.time() - if mode == COMPILE: - result = _peg_parser.compile_string( - source, - filename=file, - oldparser=oldparser, - ) - else: - result = _peg_parser.parse_string( - source, - filename=file, - oldparser=oldparser, - ast=(mode == PARSE), - ) + result = ast.parse(source, filename=file) t1 = time.time() return result, t1 - t0 -def is_parsing_failure(source: str) -> bool: - try: - _peg_parser.parse_string(source, mode="exec", oldparser=True) - except SyntaxError: - return False - return True - - def generate_time_stats(files, total_seconds) -> None: total_files = len(files) total_bytes = 0 @@ -160,27 +102,11 @@ def generate_time_stats(files, total_seconds) -> None: ) -def parse_directory( - directory: str, - verbose: bool, - excluded_files: List[str], - tree_arg: int, - short: bool, - mode: int, - oldparser: bool, -) -> int: - if tree_arg: - assert mode == PARSE, "Mode should be 1 (parse), when comparing the generated trees" - - if oldparser and tree_arg: - print("Cannot specify tree argument with the cpython parser.", file=sys.stderr) - return 1 - +def parse_directory(directory: str, verbose: bool, excluded_files: List[str], short: bool) -> int: # For a given directory, traverse files and attempt to parse each one # - Output success/failure for each file errors = 0 files = [] - trees = {} # Trees to compare (after everything else is done) total_seconds = 0 for file in sorted(glob(f"{directory}/**/*.py", recursive=True)): @@ -192,39 +118,20 @@ def parse_directory( source = f.read() try: - result, dt = parse_file(source, file, mode, oldparser) + result, dt = parse_file(source, file) total_seconds += dt - if tree_arg: - trees[file] = result report_status(succeeded=True, file=file, verbose=verbose, short=short) except SyntaxError as error: - if is_parsing_failure(source): - print(f"File {file} cannot be parsed by either parser.") - else: - report_status( - succeeded=False, file=file, verbose=verbose, error=error, short=short - ) - errors += 1 + report_status(succeeded=False, file=file, verbose=verbose, error=error, short=short) + errors += 1 files.append(file) - t1 = time.time() - generate_time_stats(files, total_seconds) if short: print_memstats() if errors: print(f"Encountered {errors} failures.", file=sys.stderr) - - # Compare trees (the dict is empty unless -t is given) - compare_trees_errors = 0 - for file, tree in trees.items(): - if not short: - print("Comparing ASTs for", file) - if compare_trees(tree, file, verbose, tree_arg >= 2) == 1: - compare_trees_errors += 1 - - if errors or compare_trees_errors: return 1 return 0 @@ -235,20 +142,8 @@ def main() -> None: directory = args.directory verbose = args.verbose excluded_files = args.exclude - tree = args.tree short = args.short - mode = 1 if args.tree else 2 - sys.exit( - parse_directory( - directory, - verbose, - excluded_files, - tree, - short, - mode, - oldparser=False, - ) - ) + sys.exit(parse_directory(directory, verbose, excluded_files, short)) if __name__ == "__main__":