diff --git a/Include/internal/pycore_import.h b/Include/internal/pycore_import.h index e21ed0a7a06..6439b7369fb 100644 --- a/Include/internal/pycore_import.h +++ b/Include/internal/pycore_import.h @@ -10,6 +10,13 @@ extern PyStatus _PyImport_ReInitLock(void); #endif extern PyObject* _PyImport_BootstrapImp(PyThreadState *tstate); +struct _module_alias { + const char *name; /* ASCII encoded string */ + const char *orig; /* ASCII encoded string */ +}; + +extern const struct _module_alias * _PyImport_FrozenAliases; + #ifdef __cplusplus } #endif diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py index 5807577c74b..49f08814e95 100644 --- a/Lib/importlib/_bootstrap.py +++ b/Lib/importlib/_bootstrap.py @@ -824,16 +824,39 @@ def module_repr(m): "slated for removal in Python 3.12", DeprecationWarning) return ''.format(m.__name__, FrozenImporter._ORIGIN) + @classmethod + def _setup_module(cls, module): + assert not hasattr(module, '__file__'), module.__file__ + ispkg = hasattr(module, '__path__') + assert not ispkg or not module.__path__, module.__path__ + spec = module.__spec__ + assert not ispkg or not spec.submodule_search_locations + + if spec.loader_state is None: + spec.loader_state = type(sys.implementation)( + data=None, + origname=None, + ) + elif not hasattr(spec.loader_state, 'data'): + spec.loader_state.data = None + if not getattr(spec.loader_state, 'origname', None): + origname = vars(module).pop('__origname__', None) + assert origname, 'see PyImport_ImportFrozenModuleObject()' + spec.loader_state.origname = origname + @classmethod def find_spec(cls, fullname, path=None, target=None): info = _call_with_frames_removed(_imp.find_frozen, fullname) if info is None: return None - data, ispkg = info + data, ispkg, origname = info spec = spec_from_loader(fullname, cls, origin=cls._ORIGIN, is_package=ispkg) - spec.loader_state = data + spec.loader_state = type(sys.implementation)( + data=data, + origname=origname, + ) return spec @classmethod @@ -857,7 +880,7 @@ def exec_module(module): spec = module.__spec__ name = spec.name try: - data = spec.loader_state + data = spec.loader_state.data except AttributeError: if not _imp.is_frozen(name): raise ImportError('{!r} is not a frozen module'.format(name), @@ -868,7 +891,7 @@ def exec_module(module): # Note that if this method is called again (e.g. by # importlib.reload()) then _imp.get_frozen_object() will notice # no data was provided and will look it up. - spec.loader_state = None + spec.loader_state.data = None code = _call_with_frames_removed(_imp.get_frozen_object, name, data) exec(code, module.__dict__) @@ -1220,6 +1243,8 @@ def _setup(sys_module, _imp_module): continue spec = _spec_from_module(module, loader) _init_module_attrs(spec, module) + if loader is FrozenImporter: + loader._setup_module(module) # Directly load built-in modules needed during bootstrap. self_module = sys.modules[__name__] diff --git a/Lib/test/test_importlib/frozen/test_finder.py b/Lib/test/test_importlib/frozen/test_finder.py index 0b15aeb598d..cd5586d524c 100644 --- a/Lib/test/test_importlib/frozen/test_finder.py +++ b/Lib/test/test_importlib/frozen/test_finder.py @@ -9,7 +9,15 @@ import unittest import warnings -from test.support import import_helper, REPO_ROOT +from test.support import import_helper, REPO_ROOT, STDLIB_DIR + + +def resolve_stdlib_file(name, ispkg=False): + assert name + if ispkg: + return os.path.join(STDLIB_DIR, *name.split('.'), '__init__.py') + else: + return os.path.join(STDLIB_DIR, *name.split('.')) + '.py' class FindSpecTests(abc.FinderTests): @@ -32,16 +40,30 @@ def check_basic(self, spec, name, ispkg=False): self.assertIsNone(spec.submodule_search_locations) self.assertIsNotNone(spec.loader_state) - def check_data(self, spec): + def check_loader_state(self, spec, origname=None, filename=None): + if not filename: + if not origname: + origname = spec.name + + actual = dict(vars(spec.loader_state)) + + # Check the code object used to import the frozen module. + # We can't compare the marshaled data directly because + # marshal.dumps() would mark "expected" (below) as a ref, + # which slightly changes the output. + # (See https://bugs.python.org/issue34093.) + data = actual.pop('data') with import_helper.frozen_modules(): expected = _imp.get_frozen_object(spec.name) - data = spec.loader_state - # We can't compare the marshaled data directly because - # marshal.dumps() would mark "expected" as a ref, which slightly - # changes the output. (See https://bugs.python.org/issue34093.) code = marshal.loads(data) self.assertEqual(code, expected) + # Check the rest of spec.loader_state. + expected = dict( + origname=origname, + ) + self.assertDictEqual(actual, expected) + def check_search_locations(self, spec): # Frozen packages do not have any path entries. # (See https://bugs.python.org/issue21736.) @@ -58,7 +80,7 @@ def test_module(self): with self.subTest(f'{name} -> {name}'): spec = self.find(name) self.check_basic(spec, name) - self.check_data(spec) + self.check_loader_state(spec) modules = { '__hello_alias__': '__hello__', '_frozen_importlib': 'importlib._bootstrap', @@ -67,26 +89,28 @@ def test_module(self): with self.subTest(f'{name} -> {origname}'): spec = self.find(name) self.check_basic(spec, name) - self.check_data(spec) + self.check_loader_state(spec, origname) modules = [ '__phello__.__init__', '__phello__.ham.__init__', ] for name in modules: - origname = name.rpartition('.')[0] + origname = '<' + name.rpartition('.')[0] + filename = resolve_stdlib_file(name) with self.subTest(f'{name} -> {origname}'): spec = self.find(name) self.check_basic(spec, name) - self.check_data(spec) + self.check_loader_state(spec, origname, filename) modules = { '__hello_only__': ('Tools', 'freeze', 'flag.py'), } for name, path in modules.items(): + origname = None filename = os.path.join(REPO_ROOT, *path) with self.subTest(f'{name} -> {filename}'): spec = self.find(name) self.check_basic(spec, name) - self.check_data(spec) + self.check_loader_state(spec, origname, filename) def test_package(self): packages = [ @@ -94,19 +118,21 @@ def test_package(self): '__phello__.ham', ] for name in packages: + filename = resolve_stdlib_file(name, ispkg=True) with self.subTest(f'{name} -> {name}'): spec = self.find(name) self.check_basic(spec, name, ispkg=True) - self.check_data(spec) + self.check_loader_state(spec, name, filename) self.check_search_locations(spec) packages = { '__phello_alias__': '__hello__', } for name, origname in packages.items(): + filename = resolve_stdlib_file(origname, ispkg=False) with self.subTest(f'{name} -> {origname}'): spec = self.find(name) self.check_basic(spec, name, ispkg=True) - self.check_data(spec) + self.check_loader_state(spec, origname, filename) self.check_search_locations(spec) # These are covered by test_module() and test_package(). diff --git a/Lib/test/test_importlib/frozen/test_loader.py b/Lib/test/test_importlib/frozen/test_loader.py index 992dcef05bc..d6f39fa98a6 100644 --- a/Lib/test/test_importlib/frozen/test_loader.py +++ b/Lib/test/test_importlib/frozen/test_loader.py @@ -32,17 +32,19 @@ def fresh(name, *, oldapi=False): class ExecModuleTests(abc.LoaderTests): - def exec_module(self, name): + def exec_module(self, name, origname=None): with import_helper.frozen_modules(): is_package = self.machinery.FrozenImporter.is_package(name) code = _imp.get_frozen_object(name) - data = marshal.dumps(code) spec = self.machinery.ModuleSpec( name, self.machinery.FrozenImporter, origin='frozen', is_package=is_package, - loader_state=data, + loader_state=types.SimpleNamespace( + data=marshal.dumps(code), + origname=origname or name, + ), ) module = types.ModuleType(name) module.__spec__ = spec @@ -66,7 +68,8 @@ def test_module(self): self.assertEqual(getattr(module, attr), value) self.assertEqual(output, 'Hello world!\n') self.assertTrue(hasattr(module, '__spec__')) - self.assertIsNone(module.__spec__.loader_state) + self.assertIsNone(module.__spec__.loader_state.data) + self.assertEqual(module.__spec__.loader_state.origname, name) def test_package(self): name = '__phello__' @@ -79,7 +82,8 @@ def test_package(self): name=name, attr=attr, given=attr_value, expected=value)) self.assertEqual(output, 'Hello world!\n') - self.assertIsNone(module.__spec__.loader_state) + self.assertIsNone(module.__spec__.loader_state.data) + self.assertEqual(module.__spec__.loader_state.origname, name) def test_lacking_parent(self): name = '__phello__.spam' diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-10-01-09-06-54.bpo-45020.Cj5VQN.rst b/Misc/NEWS.d/next/Core and Builtins/2021-10-01-09-06-54.bpo-45020.Cj5VQN.rst new file mode 100644 index 00000000000..839604357d1 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-10-01-09-06-54.bpo-45020.Cj5VQN.rst @@ -0,0 +1,5 @@ +For frozen stdlib modules, record the original module name as +``module.__spec__.loader_state.origname``. If the value is different than +``module.__spec__.name`` then the module was defined as an alias in +Tools/scripts/freeze_modules.py. If it is ``None`` then the module comes +from a source file outside the stdlib. diff --git a/Programs/_freeze_module.c b/Programs/_freeze_module.c index 4b0633e7e7a..dd90d92e512 100644 --- a/Programs/_freeze_module.c +++ b/Programs/_freeze_module.c @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -24,8 +25,12 @@ static const struct _frozen _PyImport_FrozenModules[] = { {0, 0, 0} /* sentinel */ }; +static const struct _module_alias aliases[] = { + {0, 0} /* sentinel */ +}; const struct _frozen *PyImport_FrozenModules; +const struct _module_alias *_PyImport_FrozenAliases; static const char header[] = "/* Auto-generated by Programs/_freeze_module.c */"; @@ -183,6 +188,7 @@ main(int argc, char *argv[]) const char *name, *inpath, *outpath; PyImport_FrozenModules = _PyImport_FrozenModules; + _PyImport_FrozenAliases = aliases; if (argc != 4) { fprintf(stderr, "need to specify the name, input and output paths\n"); diff --git a/Python/clinic/import.c.h b/Python/clinic/import.c.h index 09738834195..dfb59de3b5c 100644 --- a/Python/clinic/import.c.h +++ b/Python/clinic/import.c.h @@ -178,7 +178,10 @@ PyDoc_STRVAR(_imp_find_frozen__doc__, "The returned info (a 2-tuple):\n" "\n" " * data the raw marshalled bytes\n" -" * is_package whether or not it is a package"); +" * is_package whether or not it is a package\n" +" * origname the originally frozen module\'s name, or None if not\n" +" a stdlib module (this will usually be the same as\n" +" the module\'s current name)"); #define _IMP_FIND_FROZEN_METHODDEF \ {"find_frozen", (PyCFunction)_imp_find_frozen, METH_O, _imp_find_frozen__doc__}, @@ -545,4 +548,4 @@ exit: #ifndef _IMP_EXEC_DYNAMIC_METHODDEF #define _IMP_EXEC_DYNAMIC_METHODDEF #endif /* !defined(_IMP_EXEC_DYNAMIC_METHODDEF) */ -/*[clinic end generated code: output=a31e1c00653359ff input=a9049054013a1b77]*/ +/*[clinic end generated code: output=8c8dd08158f9ac7c input=a9049054013a1b77]*/ diff --git a/Python/frozen.c b/Python/frozen.c index b4f7121fda3..499b3b99570 100644 --- a/Python/frozen.c +++ b/Python/frozen.c @@ -36,6 +36,7 @@ and __phello__.spam. Loading any will print some famous words... */ #include "Python.h" +#include "pycore_import.h" /* Includes for frozen modules: */ #include "frozen_modules/importlib._bootstrap.h" @@ -102,9 +103,24 @@ static const struct _frozen _PyImport_FrozenModules[] = { {"__phello__.spam", _Py_M____phello___spam, (int)sizeof(_Py_M____phello___spam)}, {"__hello_only__", _Py_M__frozen_only, (int)sizeof(_Py_M__frozen_only)}, - {0, 0, 0} /* sentinel */ + {0, 0, 0} /* modules sentinel */ }; +static const struct _module_alias aliases[] = { + {"_frozen_importlib", "importlib._bootstrap"}, + {"_frozen_importlib_external", "importlib._bootstrap_external"}, + {"os.path", "posixpath"}, + {"__hello_alias__", "__hello__"}, + {"__phello_alias__", "__hello__"}, + {"__phello_alias__.spam", "__hello__"}, + {"__phello__.__init__", "<__phello__"}, + {"__phello__.ham.__init__", "<__phello__.ham"}, + {"__hello_only__", NULL}, + {0, 0} /* aliases sentinel */ +}; +const struct _module_alias *_PyImport_FrozenAliases = aliases; + + /* Embedding apps may change this pointer to point to their favorite collection of frozen modules: */ diff --git a/Python/import.c b/Python/import.c index 22cefdf08b4..a6170a39c7f 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1046,6 +1046,29 @@ _imp_create_builtin(PyObject *module, PyObject *spec) } +/* Return true if the name is an alias. In that case, "alias" is set + to the original module name. If it is an alias but the original + module isn't known then "alias" is set to NULL while true is returned. */ +static bool +resolve_module_alias(const char *name, const struct _module_alias *aliases, + const char **alias) +{ + const struct _module_alias *entry; + for (entry = aliases; ; entry++) { + if (entry->name == NULL) { + /* It isn't an alias. */ + return false; + } + if (strcmp(name, entry->name) == 0) { + if (alias != NULL) { + *alias = entry->orig; + } + return true; + } + } +} + + /* Frozen modules */ static bool @@ -1161,16 +1184,15 @@ struct frozen_info { const char *data; Py_ssize_t size; bool is_package; + bool is_alias; + const char *origname; }; static frozen_status find_frozen(PyObject *nameobj, struct frozen_info *info) { if (info != NULL) { - info->nameobj = NULL; - info->data = NULL; - info->size = 0; - info->is_package = false; + memset(info, 0, sizeof(*info)); } if (nameobj == NULL || nameobj == Py_None) { @@ -1205,6 +1227,9 @@ find_frozen(PyObject *nameobj, struct frozen_info *info) info->data = (const char *)p->code; info->size = p->size < 0 ? -(p->size) : p->size; info->is_package = p->size < 0 ? true : false; + info->origname = name; + info->is_alias = resolve_module_alias(name, _PyImport_FrozenAliases, + &info->origname); } if (p->code == NULL) { @@ -1246,7 +1271,8 @@ int PyImport_ImportFrozenModuleObject(PyObject *name) { PyThreadState *tstate = _PyThreadState_GET(); - PyObject *co, *m, *d; + PyObject *co, *m, *d = NULL; + int err; struct frozen_info info; frozen_status status = find_frozen(name, &info); @@ -1267,7 +1293,6 @@ PyImport_ImportFrozenModuleObject(PyObject *name) if (info.is_package) { /* Set __path__ to the empty list */ PyObject *l; - int err; m = import_add_module(tstate, name); if (m == NULL) goto err_return; @@ -1288,15 +1313,33 @@ PyImport_ImportFrozenModuleObject(PyObject *name) goto err_return; } m = exec_code_in_module(tstate, name, d, co); - Py_DECREF(d); if (m == NULL) { goto err_return; } - Py_DECREF(co); Py_DECREF(m); + /* Set __origname__ (consumed in FrozenImporter._setup_module()). */ + PyObject *origname; + if (info.origname) { + origname = PyUnicode_FromString(info.origname); + if (origname == NULL) { + goto err_return; + } + } + else { + Py_INCREF(Py_None); + origname = Py_None; + } + err = PyDict_SetItemString(d, "__origname__", origname); + Py_DECREF(origname); + if (err != 0) { + goto err_return; + } + Py_DECREF(d); + Py_DECREF(co); return 1; err_return: + Py_XDECREF(d); Py_DECREF(co); return -1; } @@ -2014,11 +2057,14 @@ The returned info (a 2-tuple): * data the raw marshalled bytes * is_package whether or not it is a package + * origname the originally frozen module's name, or None if not + a stdlib module (this will usually be the same as + the module's current name) [clinic start generated code]*/ static PyObject * _imp_find_frozen_impl(PyObject *module, PyObject *name) -/*[clinic end generated code: output=3fd17da90d417e4e input=4e52b3ac95f6d7ab]*/ +/*[clinic end generated code: output=3fd17da90d417e4e input=6aa7b9078a89280a]*/ { struct frozen_info info; frozen_status status = find_frozen(name, &info); @@ -2032,12 +2078,25 @@ _imp_find_frozen_impl(PyObject *module, PyObject *name) set_frozen_error(status, name); return NULL; } + PyObject *data = PyBytes_FromStringAndSize(info.data, info.size); if (data == NULL) { return NULL; } - PyObject *result = PyTuple_Pack(2, data, - info.is_package ? Py_True : Py_False); + + PyObject *origname = NULL; + if (info.origname != NULL && info.origname[0] != '\0') { + origname = PyUnicode_FromString(info.origname); + if (origname == NULL) { + Py_DECREF(data); + return NULL; + } + } + + PyObject *result = PyTuple_Pack(3, data, + info.is_package ? Py_True : Py_False, + origname ? origname : Py_None); + Py_XDECREF(origname); Py_DECREF(data); return result; } diff --git a/Tools/scripts/freeze_modules.py b/Tools/scripts/freeze_modules.py index 6091d831a8d..36e284100ed 100644 --- a/Tools/scripts/freeze_modules.py +++ b/Tools/scripts/freeze_modules.py @@ -274,6 +274,15 @@ def symbol(self): name = self.frozenid.replace('.', '_') return '_Py_M__' + name + @property + def ispkg(self): + if not self.pyfile: + return False + elif self.frozenid.endswith('.__init__'): + return False + else: + return os.path.basename(self.pyfile) == '__init__.py' + def resolve_frozen_file(frozenid, destdir=MODULES_DIR): """Return the filename corresponding to the given frozen ID. @@ -305,6 +314,17 @@ def __getattr__(self, name): def modname(self): return self.name + @property + def orig(self): + return self.source.modname + + @property + def isalias(self): + orig = self.source.modname + if not orig: + return True + return self.name != orig + def summarize(self): source = self.source.modname if source: @@ -507,6 +527,7 @@ def regen_frozen(modules): headerlines.append(f'#include "{header}"') deflines = [] + aliaslines = [] indent = ' ' lastsection = None for mod in modules: @@ -528,6 +549,15 @@ def regen_frozen(modules): deflines.append(line1) deflines.append(indent + line2) + if mod.isalias: + if not mod.orig: + entry = '{"%s", NULL},' % (mod.name,) + elif mod.source.ispkg: + entry = '{"%s", "<%s"},' % (mod.name, mod.orig) + else: + entry = '{"%s", "%s"},' % (mod.name, mod.orig) + aliaslines.append(indent + entry) + if not deflines[0]: del deflines[0] for i, line in enumerate(deflines): @@ -549,10 +579,17 @@ def regen_frozen(modules): lines = replace_block( lines, "static const struct _frozen _PyImport_FrozenModules[] =", - "/* sentinel */", + "/* modules sentinel */", deflines, FROZEN_FILE, ) + lines = replace_block( + lines, + "const struct _module_alias aliases[] =", + "/* aliases sentinel */", + aliaslines, + FROZEN_FILE, + ) outfile.writelines(lines)