cpython/Tools/scripts/freeze_modules.py

497 lines
16 KiB
Python
Raw Normal View History

"""Freeze modules and regen related files (e.g. Python/frozen.c).
See the notes at the top of Python/frozen.c for more info.
"""
import os
import os.path
import subprocess
import sys
import textwrap
from update_file import updating_file_with_tmpfile
SCRIPTS_DIR = os.path.abspath(os.path.dirname(__file__))
TOOLS_DIR = os.path.dirname(SCRIPTS_DIR)
ROOT_DIR = os.path.dirname(TOOLS_DIR)
STDLIB_DIR = os.path.join(ROOT_DIR, 'Lib')
# If MODULES_DIR is changed then the .gitattributes file needs to be updated.
MODULES_DIR = os.path.join(ROOT_DIR, 'Python/frozen_modules')
TOOL = os.path.join(ROOT_DIR, 'Programs', '_freeze_module')
FROZEN_FILE = os.path.join(ROOT_DIR, 'Python', 'frozen.c')
MAKEFILE = os.path.join(ROOT_DIR, 'Makefile.pre.in')
PCBUILD_PROJECT = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj')
PCBUILD_FILTERS = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj.filters')
# These are modules that get frozen.
FROZEN = [
# See parse_frozen_spec() for the format.
# In cases where the frozenid is duplicated, the first one is re-used.
('importlib', [
'importlib._bootstrap : _frozen_importlib',
'importlib._bootstrap_external : _frozen_importlib_external',
'zipimport',
]),
('Test module', [
'hello : __hello__ = ' + os.path.join(TOOLS_DIR, 'freeze', 'flag.py'),
'hello : <__phello__>',
'hello : __phello__.spam',
]),
]
#######################################
# specs
def parse_frozen_spec(rawspec, knownids=None, section=None):
"""Yield (frozenid, pyfile, modname, ispkg) for the corresponding modules.
Supported formats:
frozenid
frozenid : modname
frozenid : modname = pyfile
"frozenid" and "modname" must be valid module names (dot-separated
identifiers). If "modname" is not provided then "frozenid" is used.
If "pyfile" is not provided then the filename of the module
corresponding to "frozenid" is used.
Angle brackets around a frozenid (e.g. '<encodings>") indicate
it is a package. This also means it must be an actual module
(i.e. "pyfile" cannot have been provided). Such values can have
patterns to expand submodules:
<encodings.*> - also freeze all direct submodules
<encodings.**.*> - also freeze the full submodule tree
As with "frozenid", angle brackets around "modname" indicate
it is a package. However, in this case "pyfile" should not
have been provided and patterns in "modname" are not supported.
Also, if "modname" has brackets then "frozenid" should not,
and "pyfile" should have been provided..
"""
frozenid, _, remainder = rawspec.partition(':')
modname, _, pyfile = remainder.partition('=')
frozenid = frozenid.strip()
modname = modname.strip()
pyfile = pyfile.strip()
submodules = None
if modname.startswith('<') and modname.endswith('>'):
assert check_modname(frozenid), rawspec
modname = modname[1:-1]
assert check_modname(modname), rawspec
if frozenid in knownids:
pass
elif pyfile:
assert not os.path.isdir(pyfile), rawspec
else:
pyfile = _resolve_module(frozenid, ispkg=False)
ispkg = True
elif pyfile:
assert check_modname(frozenid), rawspec
assert not knownids or frozenid not in knownids, rawspec
assert check_modname(modname), rawspec
assert not os.path.isdir(pyfile), rawspec
ispkg = False
elif knownids and frozenid in knownids:
assert check_modname(frozenid), rawspec
assert check_modname(modname), rawspec
ispkg = False
else:
assert not modname or check_modname(modname), rawspec
resolved = iter(resolve_modules(frozenid))
frozenid, pyfile, ispkg = next(resolved)
if not modname:
modname = frozenid
if ispkg:
pkgid = frozenid
pkgname = modname
def iter_subs():
for frozenid, pyfile, ispkg in resolved:
assert not knownids or frozenid not in knownids, (frozenid, rawspec)
if pkgname:
modname = frozenid.replace(pkgid, pkgname, 1)
else:
modname = frozenid
yield frozenid, pyfile, modname, ispkg, section
submodules = iter_subs()
spec = (frozenid, pyfile or None, modname, ispkg, section)
return spec, submodules
def parse_frozen_specs(rawspecs=FROZEN):
seen = set()
for section, _specs in rawspecs:
for spec in _parse_frozen_specs(_specs, section, seen):
frozenid = spec[0]
yield spec
seen.add(frozenid)
def _parse_frozen_specs(rawspecs, section, seen):
for rawspec in rawspecs:
spec, subs = parse_frozen_spec(rawspec, seen, section)
yield spec
for spec in subs or ():
yield spec
def resolve_frozen_file(spec, destdir=MODULES_DIR):
if isinstance(spec, str):
modname = spec
else:
_, frozenid, _, _, _= spec
modname = frozenid
# We use a consistent naming convention for all frozen modules.
return os.path.join(destdir, modname.replace('.', '_')) + '.h'
def resolve_frozen_files(specs, destdir=MODULES_DIR):
frozen = {}
frozenids = []
lastsection = None
for spec in specs:
frozenid, pyfile, *_, section = spec
if frozenid in frozen:
if section is None:
lastsection = None
else:
assert section == lastsection
continue
lastsection = section
frozenfile = resolve_frozen_file(frozenid, destdir)
frozen[frozenid] = (pyfile, frozenfile)
frozenids.append(frozenid)
return frozen, frozenids
#######################################
# generic helpers
def resolve_modules(modname, pyfile=None):
if modname.startswith('<') and modname.endswith('>'):
if pyfile:
assert os.path.isdir(pyfile) or os.path.basename(pyfile) == '__init__.py', pyfile
ispkg = True
modname = modname[1:-1]
rawname = modname
# For now, we only expect match patterns at the end of the name.
_modname, sep, match = modname.rpartition('.')
if sep:
if _modname.endswith('.**'):
modname = _modname[:-3]
match = f'**.{match}'
elif match and not match.isidentifier():
modname = _modname
# Otherwise it's a plain name so we leave it alone.
else:
match = None
else:
ispkg = False
rawname = modname
match = None
if not check_modname(modname):
raise ValueError(f'not a valid module name ({rawname})')
if not pyfile:
pyfile = _resolve_module(modname, ispkg=ispkg)
elif os.path.isdir(pyfile):
pyfile = _resolve_module(modname, pyfile, ispkg)
yield modname, pyfile, ispkg
if match:
pkgdir = os.path.dirname(pyfile)
yield from iter_submodules(modname, pkgdir, match)
def check_modname(modname):
return all(n.isidentifier() for n in modname.split('.'))
def iter_submodules(pkgname, pkgdir=None, match='*'):
if not pkgdir:
pkgdir = os.path.join(STDLIB_DIR, *pkgname.split('.'))
if not match:
match = '**.*'
match_modname = _resolve_modname_matcher(match, pkgdir)
def _iter_submodules(pkgname, pkgdir):
for entry in sorted(os.scandir(pkgdir), key=lambda e: e.name):
matched, recursive = match_modname(entry.name)
if not matched:
continue
modname = f'{pkgname}.{entry.name}'
if modname.endswith('.py'):
yield modname[:-3], entry.path, False
elif entry.is_dir():
pyfile = os.path.join(entry.path, '__init__.py')
# We ignore namespace packages.
if os.path.exists(pyfile):
yield modname, pyfile, True
if recursive:
yield from _iter_submodules(modname, entry.path)
return _iter_submodules(pkgname, pkgdir)
def _resolve_modname_matcher(match, rootdir=None):
if isinstance(match, str):
if match.startswith('**.'):
recursive = True
pat = match[3:]
assert match
else:
recursive = False
pat = match
if pat == '*':
def match_modname(modname):
return True, recursive
else:
raise NotImplementedError(match)
elif callable(match):
match_modname = match(rootdir)
else:
raise ValueError(f'unsupported matcher {match!r}')
return match_modname
def _resolve_module(modname, pathentry=STDLIB_DIR, ispkg=False):
assert pathentry, pathentry
pathentry = os.path.normpath(pathentry)
assert os.path.isabs(pathentry)
if ispkg:
return os.path.join(pathentry, *modname.split('.'), '__init__.py')
return os.path.join(pathentry, *modname.split('.')) + '.py'
#######################################
# regenerating dependent files
def find_marker(lines, marker, file):
for pos, line in enumerate(lines):
if marker in line:
return pos
raise Exception(f"Can't find {marker!r} in file {file}")
def replace_block(lines, start_marker, end_marker, replacements, file):
start_pos = find_marker(lines, start_marker, file)
end_pos = find_marker(lines, end_marker, file)
if end_pos <= start_pos:
raise Exception(f"End marker {end_marker!r} "
f"occurs before start marker {start_marker!r} "
f"in file {file}")
replacements = [line.rstrip() + os.linesep for line in replacements]
return lines[:start_pos + 1] + replacements + lines[end_pos:]
def regen_frozen(specs, dest=MODULES_DIR):
if isinstance(dest, str):
frozen, frozenids = resolve_frozen_files(specs, destdir)
else:
frozenids, frozen = dest
headerlines = []
parentdir = os.path.dirname(FROZEN_FILE)
for frozenid in frozenids:
# Adding a comment to separate sections here doesn't add much,
# so we don't.
_, frozenfile = frozen[frozenid]
header = os.path.relpath(frozenfile, parentdir)
headerlines.append(f'#include "{header}"')
deflines = []
indent = ' '
lastsection = None
for spec in specs:
frozenid, _, modname, ispkg, section = spec
if section != lastsection:
if lastsection is not None:
deflines.append('')
deflines.append(f'/* {section} */')
lastsection = section
# This matches what we do in Programs/_freeze_module.c:
name = frozenid.replace('.', '_')
symbol = '_Py_M__' + name
pkg = '-' if ispkg else ''
line = ('{"%s", %s, %s(int)sizeof(%s)},'
% (modname, symbol, pkg, symbol))
# TODO: Consider not folding lines
if len(line) < 80:
deflines.append(line)
else:
line1, _, line2 = line.rpartition(' ')
deflines.append(line1)
deflines.append(indent + line2)
if not deflines[0]:
del deflines[0]
for i, line in enumerate(deflines):
if line:
deflines[i] = indent + line
print(f'# Updating {os.path.relpath(FROZEN_FILE)}')
with updating_file_with_tmpfile(FROZEN_FILE) as (infile, outfile):
lines = infile.readlines()
# TODO: Use more obvious markers, e.g.
# $START GENERATED FOOBAR$ / $END GENERATED FOOBAR$
lines = replace_block(
lines,
"/* Includes for frozen modules: */",
"/* End includes */",
headerlines,
FROZEN_FILE,
)
lines = replace_block(
lines,
"static const struct _frozen _PyImport_FrozenModules[] =",
"/* sentinel */",
deflines,
FROZEN_FILE,
)
outfile.writelines(lines)
def regen_makefile(frozenids, frozen):
frozenfiles = []
rules = ['']
for frozenid in frozenids:
pyfile, frozenfile = frozen[frozenid]
header = os.path.relpath(frozenfile, ROOT_DIR)
relfile = header.replace('\\', '/')
frozenfiles.append(f'\t\t$(srcdir)/{relfile} \\')
_pyfile = os.path.relpath(pyfile, ROOT_DIR)
tmpfile = f'{header}.new'
# Note that we freeze the module to the target .h file
# instead of going through an intermediate file like we used to.
rules.append(f'{header}: $(srcdir)/Programs/_freeze_module $(srcdir)/{_pyfile}')
rules.append(f'\t$(srcdir)/Programs/_freeze_module {frozenid} \\')
rules.append(f'\t\t$(srcdir)/{_pyfile} \\')
rules.append(f'\t\t$(srcdir)/{header}')
rules.append('')
frozenfiles[-1] = frozenfiles[-1].rstrip(" \\")
print(f'# Updating {os.path.relpath(MAKEFILE)}')
with updating_file_with_tmpfile(MAKEFILE) as (infile, outfile):
lines = infile.readlines()
lines = replace_block(
lines,
"FROZEN_FILES =",
"# End FROZEN_FILES",
frozenfiles,
MAKEFILE,
)
lines = replace_block(
lines,
"# BEGIN: freezing modules",
"# END: freezing modules",
rules,
MAKEFILE,
)
outfile.writelines(lines)
def regen_pcbuild(frozenids, frozen):
projlines = []
filterlines = []
for frozenid in frozenids:
pyfile, frozenfile = frozen[frozenid]
_pyfile = os.path.relpath(pyfile, ROOT_DIR).replace('/', '\\')
header = os.path.relpath(frozenfile, ROOT_DIR).replace('/', '\\')
intfile = header.split('\\')[-1].strip('.h') + '.g.h'
projlines.append(f' <None Include="..\\{_pyfile}">')
projlines.append(f' <ModName>{frozenid}</ModName>')
projlines.append(f' <IntFile>$(IntDir){intfile}</IntFile>')
projlines.append(f' <OutFile>$(PySourcePath){header}</OutFile>')
projlines.append(f' </None>')
filterlines.append(f' <None Include="..\\{_pyfile}">')
filterlines.append(' <Filter>Python Files</Filter>')
filterlines.append(' </None>')
print(f'# Updating {os.path.relpath(PCBUILD_PROJECT)}')
with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile):
lines = infile.readlines()
lines = replace_block(
lines,
'<!-- BEGIN frozen modules -->',
'<!-- END frozen modules -->',
projlines,
PCBUILD_PROJECT,
)
outfile.writelines(lines)
print(f'# Updating {os.path.relpath(PCBUILD_FILTERS)}')
with updating_file_with_tmpfile(PCBUILD_FILTERS) as (infile, outfile):
lines = infile.readlines()
lines = replace_block(
lines,
'<!-- BEGIN frozen modules -->',
'<!-- END frozen modules -->',
filterlines,
PCBUILD_FILTERS,
)
outfile.writelines(lines)
#######################################
# freezing modules
def freeze_module(modname, pyfile=None, destdir=MODULES_DIR):
"""Generate the frozen module .h file for the given module."""
for modname, pyfile, ispkg in resolve_modules(modname, pyfile):
frozenfile = _resolve_frozen(modname, destdir)
_freeze_module(modname, pyfile, frozenfile)
def _freeze_module(frozenid, pyfile, frozenfile):
tmpfile = frozenfile + '.new'
argv = [TOOL, frozenid, pyfile, tmpfile]
print('#', ' '.join(os.path.relpath(a) for a in argv))
try:
subprocess.run(argv, check=True)
except subprocess.CalledProcessError:
if not os.path.exists(TOOL):
sys.exit(f'ERROR: missing {TOOL}; you need to run "make regen-frozen"')
raise # re-raise
os.replace(tmpfile, frozenfile)
#######################################
# the script
def main():
# Expand the raw specs, preserving order.
specs = list(parse_frozen_specs())
frozen, frozenids = resolve_frozen_files(specs, MODULES_DIR)
# Regen build-related files.
regen_frozen(specs, (frozenids, frozen))
regen_makefile(frozenids, frozen)
regen_pcbuild(frozenids, frozen)
# Freeze the target modules.
for frozenid in frozenids:
pyfile, frozenfile = frozen[frozenid]
_freeze_module(frozenid, pyfile, frozenfile)
if __name__ == '__main__':
argv = sys.argv[1:]
if argv:
sys.exit('ERROR: got unexpected args {argv}')
main()