mirror of https://github.com/python/cpython.git
257 lines
8.8 KiB
Python
257 lines
8.8 KiB
Python
from __future__ import annotations
|
|
import collections
|
|
import dataclasses as dc
|
|
import re
|
|
import shlex
|
|
from typing import Any
|
|
|
|
import libclinic
|
|
from libclinic import fail, ClinicError
|
|
from libclinic.language import Language
|
|
from libclinic.function import (
|
|
Module, Class, Function)
|
|
|
|
|
|
@dc.dataclass(slots=True, repr=False)
|
|
class Block:
|
|
r"""
|
|
Represents a single block of text embedded in
|
|
another file. If dsl_name is None, the block represents
|
|
verbatim text, raw original text from the file, in
|
|
which case "input" will be the only non-false member.
|
|
If dsl_name is not None, the block represents a Clinic
|
|
block.
|
|
|
|
input is always str, with embedded \n characters.
|
|
input represents the original text from the file;
|
|
if it's a Clinic block, it is the original text with
|
|
the body_prefix and redundant leading whitespace removed.
|
|
|
|
dsl_name is either str or None. If str, it's the text
|
|
found on the start line of the block between the square
|
|
brackets.
|
|
|
|
signatures is a list.
|
|
It may only contain clinic.Module, clinic.Class, and
|
|
clinic.Function objects. At the moment it should
|
|
contain at most one of each.
|
|
|
|
output is either str or None. If str, it's the output
|
|
from this block, with embedded '\n' characters.
|
|
|
|
indent is a str. It's the leading whitespace
|
|
that was found on every line of input. (If body_prefix is
|
|
not empty, this is the indent *after* removing the
|
|
body_prefix.)
|
|
|
|
"indent" is different from the concept of "preindent"
|
|
(which is not stored as state on Block objects).
|
|
"preindent" is the whitespace that
|
|
was found in front of every line of input *before* the
|
|
"body_prefix" (see the Language object). If body_prefix
|
|
is empty, preindent must always be empty too.
|
|
|
|
To illustrate the difference between "indent" and "preindent":
|
|
|
|
Assume that '_' represents whitespace.
|
|
If the block processed was in a Python file, and looked like this:
|
|
____#/*[python]
|
|
____#__for a in range(20):
|
|
____#____print(a)
|
|
____#[python]*/
|
|
"preindent" would be "____" and "indent" would be "__".
|
|
|
|
"""
|
|
input: str
|
|
dsl_name: str | None = None
|
|
signatures: list[Module | Class | Function] = dc.field(default_factory=list)
|
|
output: Any = None # TODO: Very dynamic; probably untypeable in its current form?
|
|
indent: str = ''
|
|
|
|
def __repr__(self) -> str:
|
|
dsl_name = self.dsl_name or "text"
|
|
def summarize(s: object) -> str:
|
|
s = repr(s)
|
|
if len(s) > 30:
|
|
return s[:26] + "..." + s[0]
|
|
return s
|
|
parts = (
|
|
repr(dsl_name),
|
|
f"input={summarize(self.input)}",
|
|
f"output={summarize(self.output)}"
|
|
)
|
|
return f"<clinic.Block {' '.join(parts)}>"
|
|
|
|
|
|
class BlockParser:
|
|
"""
|
|
Block-oriented parser for Argument Clinic.
|
|
Iterator, yields Block objects.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
input: str,
|
|
language: Language,
|
|
*,
|
|
verify: bool = True
|
|
) -> None:
|
|
"""
|
|
"input" should be a str object
|
|
with embedded \n characters.
|
|
|
|
"language" should be a Language object.
|
|
"""
|
|
language.validate()
|
|
|
|
self.input = collections.deque(reversed(input.splitlines(keepends=True)))
|
|
self.block_start_line_number = self.line_number = 0
|
|
|
|
self.language = language
|
|
before, _, after = language.start_line.partition('{dsl_name}')
|
|
assert _ == '{dsl_name}'
|
|
self.find_start_re = libclinic.create_regex(before, after,
|
|
whole_line=False)
|
|
self.start_re = libclinic.create_regex(before, after)
|
|
self.verify = verify
|
|
self.last_checksum_re: re.Pattern[str] | None = None
|
|
self.last_dsl_name: str | None = None
|
|
self.dsl_name: str | None = None
|
|
self.first_block = True
|
|
|
|
def __iter__(self) -> BlockParser:
|
|
return self
|
|
|
|
def __next__(self) -> Block:
|
|
while True:
|
|
if not self.input:
|
|
raise StopIteration
|
|
|
|
if self.dsl_name:
|
|
try:
|
|
return_value = self.parse_clinic_block(self.dsl_name)
|
|
except ClinicError as exc:
|
|
exc.filename = self.language.filename
|
|
exc.lineno = self.line_number
|
|
raise
|
|
self.dsl_name = None
|
|
self.first_block = False
|
|
return return_value
|
|
block = self.parse_verbatim_block()
|
|
if self.first_block and not block.input:
|
|
continue
|
|
self.first_block = False
|
|
return block
|
|
|
|
|
|
def is_start_line(self, line: str) -> str | None:
|
|
match = self.start_re.match(line.lstrip())
|
|
return match.group(1) if match else None
|
|
|
|
def _line(self, lookahead: bool = False) -> str:
|
|
self.line_number += 1
|
|
line = self.input.pop()
|
|
if not lookahead:
|
|
self.language.parse_line(line)
|
|
return line
|
|
|
|
def parse_verbatim_block(self) -> Block:
|
|
lines = []
|
|
self.block_start_line_number = self.line_number
|
|
|
|
while self.input:
|
|
line = self._line()
|
|
dsl_name = self.is_start_line(line)
|
|
if dsl_name:
|
|
self.dsl_name = dsl_name
|
|
break
|
|
lines.append(line)
|
|
|
|
return Block("".join(lines))
|
|
|
|
def parse_clinic_block(self, dsl_name: str) -> Block:
|
|
in_lines = []
|
|
self.block_start_line_number = self.line_number + 1
|
|
stop_line = self.language.stop_line.format(dsl_name=dsl_name)
|
|
body_prefix = self.language.body_prefix.format(dsl_name=dsl_name)
|
|
|
|
def is_stop_line(line: str) -> bool:
|
|
# make sure to recognize stop line even if it
|
|
# doesn't end with EOL (it could be the very end of the file)
|
|
if line.startswith(stop_line):
|
|
remainder = line.removeprefix(stop_line)
|
|
if remainder and not remainder.isspace():
|
|
fail(f"Garbage after stop line: {remainder!r}")
|
|
return True
|
|
else:
|
|
# gh-92256: don't allow incorrectly formatted stop lines
|
|
if line.lstrip().startswith(stop_line):
|
|
fail(f"Whitespace is not allowed before the stop line: {line!r}")
|
|
return False
|
|
|
|
# consume body of program
|
|
while self.input:
|
|
line = self._line()
|
|
if is_stop_line(line) or self.is_start_line(line):
|
|
break
|
|
if body_prefix:
|
|
line = line.lstrip()
|
|
assert line.startswith(body_prefix)
|
|
line = line.removeprefix(body_prefix)
|
|
in_lines.append(line)
|
|
|
|
# consume output and checksum line, if present.
|
|
if self.last_dsl_name == dsl_name:
|
|
checksum_re = self.last_checksum_re
|
|
else:
|
|
before, _, after = self.language.checksum_line.format(dsl_name=dsl_name, arguments='{arguments}').partition('{arguments}')
|
|
assert _ == '{arguments}'
|
|
checksum_re = libclinic.create_regex(before, after, word=False)
|
|
self.last_dsl_name = dsl_name
|
|
self.last_checksum_re = checksum_re
|
|
assert checksum_re is not None
|
|
|
|
# scan forward for checksum line
|
|
out_lines = []
|
|
arguments = None
|
|
while self.input:
|
|
line = self._line(lookahead=True)
|
|
match = checksum_re.match(line.lstrip())
|
|
arguments = match.group(1) if match else None
|
|
if arguments:
|
|
break
|
|
out_lines.append(line)
|
|
if self.is_start_line(line):
|
|
break
|
|
|
|
output: str | None
|
|
output = "".join(out_lines)
|
|
if arguments:
|
|
d = {}
|
|
for field in shlex.split(arguments):
|
|
name, equals, value = field.partition('=')
|
|
if not equals:
|
|
fail(f"Mangled Argument Clinic marker line: {line!r}")
|
|
d[name.strip()] = value.strip()
|
|
|
|
if self.verify:
|
|
if 'input' in d:
|
|
checksum = d['output']
|
|
else:
|
|
checksum = d['checksum']
|
|
|
|
computed = libclinic.compute_checksum(output, len(checksum))
|
|
if checksum != computed:
|
|
fail("Checksum mismatch! "
|
|
f"Expected {checksum!r}, computed {computed!r}. "
|
|
"Suggested fix: remove all generated code including "
|
|
"the end marker, or use the '-f' option.")
|
|
else:
|
|
# put back output
|
|
output_lines = output.splitlines(keepends=True)
|
|
self.line_number -= len(output_lines)
|
|
self.input.extend(reversed(output_lines))
|
|
output = None
|
|
|
|
return Block("".join(in_lines), dsl_name, output=output)
|