From 08285d563e64c179a56ab2f952345b3dbcdb54f3 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Tue, 5 Oct 2021 11:26:37 -0600 Subject: [PATCH] bpo-45020: Identify which frozen modules are actually aliases. (gh-28655) In the list of generated frozen modules at the top of Tools/scripts/freeze_modules.py, you will find that some of the modules have a different name than the module (or .py file) that is actually frozen. Let's call each case an "alias". Aliases do not come into play until we get to the (generated) list of modules in Python/frozen.c. (The tool for freezing modules, Programs/_freeze_module, is only concerned with the source file, not the module it will be used for.) Knowledge of which frozen modules are aliases (and the identity of the original module) normally isn't important. However, this information is valuable when we go to set __file__ on frozen stdlib modules. This change updates Tools/scripts/freeze_modules.py to map aliases to the original module name (or None if not a stdlib module) in Python/frozen.c. We also add a helper function in Python/import.c to look up a frozen module's alias and add the result of that function to the frozen info returned from find_frozen(). https://bugs.python.org/issue45020 --- Include/internal/pycore_import.h | 7 ++ Lib/importlib/_bootstrap.py | 33 +++++++- Lib/test/test_importlib/frozen/test_finder.py | 52 +++++++++--- Lib/test/test_importlib/frozen/test_loader.py | 14 ++-- .../2021-10-01-09-06-54.bpo-45020.Cj5VQN.rst | 5 ++ Programs/_freeze_module.c | 6 ++ Python/clinic/import.c.h | 7 +- Python/frozen.c | 18 ++++- Python/import.c | 81 ++++++++++++++++--- Tools/scripts/freeze_modules.py | 39 ++++++++- 10 files changed, 225 insertions(+), 37 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-10-01-09-06-54.bpo-45020.Cj5VQN.rst diff --git a/Include/internal/pycore_import.h b/Include/internal/pycore_import.h index e21ed0a7a06..6439b7369fb 100644 --- a/Include/internal/pycore_import.h +++ b/Include/internal/pycore_import.h @@ -10,6 +10,13 @@ extern PyStatus _PyImport_ReInitLock(void); #endif extern PyObject* _PyImport_BootstrapImp(PyThreadState *tstate); +struct _module_alias { + const char *name; /* ASCII encoded string */ + const char *orig; /* ASCII encoded string */ +}; + +extern const struct _module_alias * _PyImport_FrozenAliases; + #ifdef __cplusplus } #endif diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py index 5807577c74b..49f08814e95 100644 --- a/Lib/importlib/_bootstrap.py +++ b/Lib/importlib/_bootstrap.py @@ -824,16 +824,39 @@ def module_repr(m): "slated for removal in Python 3.12", DeprecationWarning) return ''.format(m.__name__, FrozenImporter._ORIGIN) + @classmethod + def _setup_module(cls, module): + assert not hasattr(module, '__file__'), module.__file__ + ispkg = hasattr(module, '__path__') + assert not ispkg or not module.__path__, module.__path__ + spec = module.__spec__ + assert not ispkg or not spec.submodule_search_locations + + if spec.loader_state is None: + spec.loader_state = type(sys.implementation)( + data=None, + origname=None, + ) + elif not hasattr(spec.loader_state, 'data'): + spec.loader_state.data = None + if not getattr(spec.loader_state, 'origname', None): + origname = vars(module).pop('__origname__', None) + assert origname, 'see PyImport_ImportFrozenModuleObject()' + spec.loader_state.origname = origname + @classmethod def find_spec(cls, fullname, path=None, target=None): info = _call_with_frames_removed(_imp.find_frozen, fullname) if info is None: return None - data, ispkg = info + data, ispkg, origname = info spec = spec_from_loader(fullname, cls, origin=cls._ORIGIN, is_package=ispkg) - spec.loader_state = data + spec.loader_state = type(sys.implementation)( + data=data, + origname=origname, + ) return spec @classmethod @@ -857,7 +880,7 @@ def exec_module(module): spec = module.__spec__ name = spec.name try: - data = spec.loader_state + data = spec.loader_state.data except AttributeError: if not _imp.is_frozen(name): raise ImportError('{!r} is not a frozen module'.format(name), @@ -868,7 +891,7 @@ def exec_module(module): # Note that if this method is called again (e.g. by # importlib.reload()) then _imp.get_frozen_object() will notice # no data was provided and will look it up. - spec.loader_state = None + spec.loader_state.data = None code = _call_with_frames_removed(_imp.get_frozen_object, name, data) exec(code, module.__dict__) @@ -1220,6 +1243,8 @@ def _setup(sys_module, _imp_module): continue spec = _spec_from_module(module, loader) _init_module_attrs(spec, module) + if loader is FrozenImporter: + loader._setup_module(module) # Directly load built-in modules needed during bootstrap. self_module = sys.modules[__name__] diff --git a/Lib/test/test_importlib/frozen/test_finder.py b/Lib/test/test_importlib/frozen/test_finder.py index 0b15aeb598d..cd5586d524c 100644 --- a/Lib/test/test_importlib/frozen/test_finder.py +++ b/Lib/test/test_importlib/frozen/test_finder.py @@ -9,7 +9,15 @@ import unittest import warnings -from test.support import import_helper, REPO_ROOT +from test.support import import_helper, REPO_ROOT, STDLIB_DIR + + +def resolve_stdlib_file(name, ispkg=False): + assert name + if ispkg: + return os.path.join(STDLIB_DIR, *name.split('.'), '__init__.py') + else: + return os.path.join(STDLIB_DIR, *name.split('.')) + '.py' class FindSpecTests(abc.FinderTests): @@ -32,16 +40,30 @@ def check_basic(self, spec, name, ispkg=False): self.assertIsNone(spec.submodule_search_locations) self.assertIsNotNone(spec.loader_state) - def check_data(self, spec): + def check_loader_state(self, spec, origname=None, filename=None): + if not filename: + if not origname: + origname = spec.name + + actual = dict(vars(spec.loader_state)) + + # Check the code object used to import the frozen module. + # We can't compare the marshaled data directly because + # marshal.dumps() would mark "expected" (below) as a ref, + # which slightly changes the output. + # (See https://bugs.python.org/issue34093.) + data = actual.pop('data') with import_helper.frozen_modules(): expected = _imp.get_frozen_object(spec.name) - data = spec.loader_state - # We can't compare the marshaled data directly because - # marshal.dumps() would mark "expected" as a ref, which slightly - # changes the output. (See https://bugs.python.org/issue34093.) code = marshal.loads(data) self.assertEqual(code, expected) + # Check the rest of spec.loader_state. + expected = dict( + origname=origname, + ) + self.assertDictEqual(actual, expected) + def check_search_locations(self, spec): # Frozen packages do not have any path entries. # (See https://bugs.python.org/issue21736.) @@ -58,7 +80,7 @@ def test_module(self): with self.subTest(f'{name} -> {name}'): spec = self.find(name) self.check_basic(spec, name) - self.check_data(spec) + self.check_loader_state(spec) modules = { '__hello_alias__': '__hello__', '_frozen_importlib': 'importlib._bootstrap', @@ -67,26 +89,28 @@ def test_module(self): with self.subTest(f'{name} -> {origname}'): spec = self.find(name) self.check_basic(spec, name) - self.check_data(spec) + self.check_loader_state(spec, origname) modules = [ '__phello__.__init__', '__phello__.ham.__init__', ] for name in modules: - origname = name.rpartition('.')[0] + origname = '<' + name.rpartition('.')[0] + filename = resolve_stdlib_file(name) with self.subTest(f'{name} -> {origname}'): spec = self.find(name) self.check_basic(spec, name) - self.check_data(spec) + self.check_loader_state(spec, origname, filename) modules = { '__hello_only__': ('Tools', 'freeze', 'flag.py'), } for name, path in modules.items(): + origname = None filename = os.path.join(REPO_ROOT, *path) with self.subTest(f'{name} -> {filename}'): spec = self.find(name) self.check_basic(spec, name) - self.check_data(spec) + self.check_loader_state(spec, origname, filename) def test_package(self): packages = [ @@ -94,19 +118,21 @@ def test_package(self): '__phello__.ham', ] for name in packages: + filename = resolve_stdlib_file(name, ispkg=True) with self.subTest(f'{name} -> {name}'): spec = self.find(name) self.check_basic(spec, name, ispkg=True) - self.check_data(spec) + self.check_loader_state(spec, name, filename) self.check_search_locations(spec) packages = { '__phello_alias__': '__hello__', } for name, origname in packages.items(): + filename = resolve_stdlib_file(origname, ispkg=False) with self.subTest(f'{name} -> {origname}'): spec = self.find(name) self.check_basic(spec, name, ispkg=True) - self.check_data(spec) + self.check_loader_state(spec, origname, filename) self.check_search_locations(spec) # These are covered by test_module() and test_package(). diff --git a/Lib/test/test_importlib/frozen/test_loader.py b/Lib/test/test_importlib/frozen/test_loader.py index 992dcef05bc..d6f39fa98a6 100644 --- a/Lib/test/test_importlib/frozen/test_loader.py +++ b/Lib/test/test_importlib/frozen/test_loader.py @@ -32,17 +32,19 @@ def fresh(name, *, oldapi=False): class ExecModuleTests(abc.LoaderTests): - def exec_module(self, name): + def exec_module(self, name, origname=None): with import_helper.frozen_modules(): is_package = self.machinery.FrozenImporter.is_package(name) code = _imp.get_frozen_object(name) - data = marshal.dumps(code) spec = self.machinery.ModuleSpec( name, self.machinery.FrozenImporter, origin='frozen', is_package=is_package, - loader_state=data, + loader_state=types.SimpleNamespace( + data=marshal.dumps(code), + origname=origname or name, + ), ) module = types.ModuleType(name) module.__spec__ = spec @@ -66,7 +68,8 @@ def test_module(self): self.assertEqual(getattr(module, attr), value) self.assertEqual(output, 'Hello world!\n') self.assertTrue(hasattr(module, '__spec__')) - self.assertIsNone(module.__spec__.loader_state) + self.assertIsNone(module.__spec__.loader_state.data) + self.assertEqual(module.__spec__.loader_state.origname, name) def test_package(self): name = '__phello__' @@ -79,7 +82,8 @@ def test_package(self): name=name, attr=attr, given=attr_value, expected=value)) self.assertEqual(output, 'Hello world!\n') - self.assertIsNone(module.__spec__.loader_state) + self.assertIsNone(module.__spec__.loader_state.data) + self.assertEqual(module.__spec__.loader_state.origname, name) def test_lacking_parent(self): name = '__phello__.spam' diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-10-01-09-06-54.bpo-45020.Cj5VQN.rst b/Misc/NEWS.d/next/Core and Builtins/2021-10-01-09-06-54.bpo-45020.Cj5VQN.rst new file mode 100644 index 00000000000..839604357d1 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-10-01-09-06-54.bpo-45020.Cj5VQN.rst @@ -0,0 +1,5 @@ +For frozen stdlib modules, record the original module name as +``module.__spec__.loader_state.origname``. If the value is different than +``module.__spec__.name`` then the module was defined as an alias in +Tools/scripts/freeze_modules.py. If it is ``None`` then the module comes +from a source file outside the stdlib. diff --git a/Programs/_freeze_module.c b/Programs/_freeze_module.c index 4b0633e7e7a..dd90d92e512 100644 --- a/Programs/_freeze_module.c +++ b/Programs/_freeze_module.c @@ -9,6 +9,7 @@ #include #include +#include #include #include @@ -24,8 +25,12 @@ static const struct _frozen _PyImport_FrozenModules[] = { {0, 0, 0} /* sentinel */ }; +static const struct _module_alias aliases[] = { + {0, 0} /* sentinel */ +}; const struct _frozen *PyImport_FrozenModules; +const struct _module_alias *_PyImport_FrozenAliases; static const char header[] = "/* Auto-generated by Programs/_freeze_module.c */"; @@ -183,6 +188,7 @@ main(int argc, char *argv[]) const char *name, *inpath, *outpath; PyImport_FrozenModules = _PyImport_FrozenModules; + _PyImport_FrozenAliases = aliases; if (argc != 4) { fprintf(stderr, "need to specify the name, input and output paths\n"); diff --git a/Python/clinic/import.c.h b/Python/clinic/import.c.h index 09738834195..dfb59de3b5c 100644 --- a/Python/clinic/import.c.h +++ b/Python/clinic/import.c.h @@ -178,7 +178,10 @@ PyDoc_STRVAR(_imp_find_frozen__doc__, "The returned info (a 2-tuple):\n" "\n" " * data the raw marshalled bytes\n" -" * is_package whether or not it is a package"); +" * is_package whether or not it is a package\n" +" * origname the originally frozen module\'s name, or None if not\n" +" a stdlib module (this will usually be the same as\n" +" the module\'s current name)"); #define _IMP_FIND_FROZEN_METHODDEF \ {"find_frozen", (PyCFunction)_imp_find_frozen, METH_O, _imp_find_frozen__doc__}, @@ -545,4 +548,4 @@ exit: #ifndef _IMP_EXEC_DYNAMIC_METHODDEF #define _IMP_EXEC_DYNAMIC_METHODDEF #endif /* !defined(_IMP_EXEC_DYNAMIC_METHODDEF) */ -/*[clinic end generated code: output=a31e1c00653359ff input=a9049054013a1b77]*/ +/*[clinic end generated code: output=8c8dd08158f9ac7c input=a9049054013a1b77]*/ diff --git a/Python/frozen.c b/Python/frozen.c index b4f7121fda3..499b3b99570 100644 --- a/Python/frozen.c +++ b/Python/frozen.c @@ -36,6 +36,7 @@ and __phello__.spam. Loading any will print some famous words... */ #include "Python.h" +#include "pycore_import.h" /* Includes for frozen modules: */ #include "frozen_modules/importlib._bootstrap.h" @@ -102,9 +103,24 @@ static const struct _frozen _PyImport_FrozenModules[] = { {"__phello__.spam", _Py_M____phello___spam, (int)sizeof(_Py_M____phello___spam)}, {"__hello_only__", _Py_M__frozen_only, (int)sizeof(_Py_M__frozen_only)}, - {0, 0, 0} /* sentinel */ + {0, 0, 0} /* modules sentinel */ }; +static const struct _module_alias aliases[] = { + {"_frozen_importlib", "importlib._bootstrap"}, + {"_frozen_importlib_external", "importlib._bootstrap_external"}, + {"os.path", "posixpath"}, + {"__hello_alias__", "__hello__"}, + {"__phello_alias__", "__hello__"}, + {"__phello_alias__.spam", "__hello__"}, + {"__phello__.__init__", "<__phello__"}, + {"__phello__.ham.__init__", "<__phello__.ham"}, + {"__hello_only__", NULL}, + {0, 0} /* aliases sentinel */ +}; +const struct _module_alias *_PyImport_FrozenAliases = aliases; + + /* Embedding apps may change this pointer to point to their favorite collection of frozen modules: */ diff --git a/Python/import.c b/Python/import.c index 22cefdf08b4..a6170a39c7f 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1046,6 +1046,29 @@ _imp_create_builtin(PyObject *module, PyObject *spec) } +/* Return true if the name is an alias. In that case, "alias" is set + to the original module name. If it is an alias but the original + module isn't known then "alias" is set to NULL while true is returned. */ +static bool +resolve_module_alias(const char *name, const struct _module_alias *aliases, + const char **alias) +{ + const struct _module_alias *entry; + for (entry = aliases; ; entry++) { + if (entry->name == NULL) { + /* It isn't an alias. */ + return false; + } + if (strcmp(name, entry->name) == 0) { + if (alias != NULL) { + *alias = entry->orig; + } + return true; + } + } +} + + /* Frozen modules */ static bool @@ -1161,16 +1184,15 @@ struct frozen_info { const char *data; Py_ssize_t size; bool is_package; + bool is_alias; + const char *origname; }; static frozen_status find_frozen(PyObject *nameobj, struct frozen_info *info) { if (info != NULL) { - info->nameobj = NULL; - info->data = NULL; - info->size = 0; - info->is_package = false; + memset(info, 0, sizeof(*info)); } if (nameobj == NULL || nameobj == Py_None) { @@ -1205,6 +1227,9 @@ find_frozen(PyObject *nameobj, struct frozen_info *info) info->data = (const char *)p->code; info->size = p->size < 0 ? -(p->size) : p->size; info->is_package = p->size < 0 ? true : false; + info->origname = name; + info->is_alias = resolve_module_alias(name, _PyImport_FrozenAliases, + &info->origname); } if (p->code == NULL) { @@ -1246,7 +1271,8 @@ int PyImport_ImportFrozenModuleObject(PyObject *name) { PyThreadState *tstate = _PyThreadState_GET(); - PyObject *co, *m, *d; + PyObject *co, *m, *d = NULL; + int err; struct frozen_info info; frozen_status status = find_frozen(name, &info); @@ -1267,7 +1293,6 @@ PyImport_ImportFrozenModuleObject(PyObject *name) if (info.is_package) { /* Set __path__ to the empty list */ PyObject *l; - int err; m = import_add_module(tstate, name); if (m == NULL) goto err_return; @@ -1288,15 +1313,33 @@ PyImport_ImportFrozenModuleObject(PyObject *name) goto err_return; } m = exec_code_in_module(tstate, name, d, co); - Py_DECREF(d); if (m == NULL) { goto err_return; } - Py_DECREF(co); Py_DECREF(m); + /* Set __origname__ (consumed in FrozenImporter._setup_module()). */ + PyObject *origname; + if (info.origname) { + origname = PyUnicode_FromString(info.origname); + if (origname == NULL) { + goto err_return; + } + } + else { + Py_INCREF(Py_None); + origname = Py_None; + } + err = PyDict_SetItemString(d, "__origname__", origname); + Py_DECREF(origname); + if (err != 0) { + goto err_return; + } + Py_DECREF(d); + Py_DECREF(co); return 1; err_return: + Py_XDECREF(d); Py_DECREF(co); return -1; } @@ -2014,11 +2057,14 @@ The returned info (a 2-tuple): * data the raw marshalled bytes * is_package whether or not it is a package + * origname the originally frozen module's name, or None if not + a stdlib module (this will usually be the same as + the module's current name) [clinic start generated code]*/ static PyObject * _imp_find_frozen_impl(PyObject *module, PyObject *name) -/*[clinic end generated code: output=3fd17da90d417e4e input=4e52b3ac95f6d7ab]*/ +/*[clinic end generated code: output=3fd17da90d417e4e input=6aa7b9078a89280a]*/ { struct frozen_info info; frozen_status status = find_frozen(name, &info); @@ -2032,12 +2078,25 @@ _imp_find_frozen_impl(PyObject *module, PyObject *name) set_frozen_error(status, name); return NULL; } + PyObject *data = PyBytes_FromStringAndSize(info.data, info.size); if (data == NULL) { return NULL; } - PyObject *result = PyTuple_Pack(2, data, - info.is_package ? Py_True : Py_False); + + PyObject *origname = NULL; + if (info.origname != NULL && info.origname[0] != '\0') { + origname = PyUnicode_FromString(info.origname); + if (origname == NULL) { + Py_DECREF(data); + return NULL; + } + } + + PyObject *result = PyTuple_Pack(3, data, + info.is_package ? Py_True : Py_False, + origname ? origname : Py_None); + Py_XDECREF(origname); Py_DECREF(data); return result; } diff --git a/Tools/scripts/freeze_modules.py b/Tools/scripts/freeze_modules.py index 6091d831a8d..36e284100ed 100644 --- a/Tools/scripts/freeze_modules.py +++ b/Tools/scripts/freeze_modules.py @@ -274,6 +274,15 @@ def symbol(self): name = self.frozenid.replace('.', '_') return '_Py_M__' + name + @property + def ispkg(self): + if not self.pyfile: + return False + elif self.frozenid.endswith('.__init__'): + return False + else: + return os.path.basename(self.pyfile) == '__init__.py' + def resolve_frozen_file(frozenid, destdir=MODULES_DIR): """Return the filename corresponding to the given frozen ID. @@ -305,6 +314,17 @@ def __getattr__(self, name): def modname(self): return self.name + @property + def orig(self): + return self.source.modname + + @property + def isalias(self): + orig = self.source.modname + if not orig: + return True + return self.name != orig + def summarize(self): source = self.source.modname if source: @@ -507,6 +527,7 @@ def regen_frozen(modules): headerlines.append(f'#include "{header}"') deflines = [] + aliaslines = [] indent = ' ' lastsection = None for mod in modules: @@ -528,6 +549,15 @@ def regen_frozen(modules): deflines.append(line1) deflines.append(indent + line2) + if mod.isalias: + if not mod.orig: + entry = '{"%s", NULL},' % (mod.name,) + elif mod.source.ispkg: + entry = '{"%s", "<%s"},' % (mod.name, mod.orig) + else: + entry = '{"%s", "%s"},' % (mod.name, mod.orig) + aliaslines.append(indent + entry) + if not deflines[0]: del deflines[0] for i, line in enumerate(deflines): @@ -549,10 +579,17 @@ def regen_frozen(modules): lines = replace_block( lines, "static const struct _frozen _PyImport_FrozenModules[] =", - "/* sentinel */", + "/* modules sentinel */", deflines, FROZEN_FILE, ) + lines = replace_block( + lines, + "const struct _module_alias aliases[] =", + "/* aliases sentinel */", + aliaslines, + FROZEN_FILE, + ) outfile.writelines(lines)