From e9927e1820caea01e576141d9a623ea394d43dad Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Sun, 14 Apr 2019 10:09:09 +0200 Subject: [PATCH] bpo-30485: support a default prefix mapping in ElementPath by passing None as prefix (#1823) --- Doc/library/xml.etree.elementtree.rst | 9 +++-- Lib/test/test_xml_etree.py | 6 ++++ Lib/xml/etree/ElementPath.py | 33 ++++++++++++++----- .../2019-04-13-23-42-33.bpo-30485.JHhjJS.rst | 3 ++ 4 files changed, 39 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-04-13-23-42-33.bpo-30485.JHhjJS.rst diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 9bee0eadc28..c83e719e959 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -764,7 +764,8 @@ Element Objects Finds the first subelement matching *match*. *match* may be a tag name or a :ref:`path `. Returns an element instance or ``None``. *namespaces* is an optional mapping from namespace prefix - to full name. + to full name. Pass ``None`` as prefix to move all unprefixed tag names + in the expression into the given namespace. .. method:: findall(match, namespaces=None) @@ -772,7 +773,8 @@ Element Objects Finds all matching subelements, by tag name or :ref:`path `. Returns a list containing all matching elements in document order. *namespaces* is an optional mapping from - namespace prefix to full name. + namespace prefix to full name. Pass ``None`` as prefix to move all + unprefixed tag names in the expression into the given namespace. .. method:: findtext(match, default=None, namespaces=None) @@ -782,7 +784,8 @@ Element Objects of the first matching element, or *default* if no element was found. Note that if the matching element has no text content an empty string is returned. *namespaces* is an optional mapping from namespace prefix - to full name. + to full name. Pass ``None`` as prefix to move all unprefixed tag names + in the expression into the given namespace. .. method:: getchildren() diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index bdcd4e0d19a..2f7a3b60b22 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -2463,6 +2463,12 @@ def test_findall_different_nsmaps(self): nsmap = {'xx': 'Y'} self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 1) self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 2) + nsmap = {'xx': 'X', None: 'Y'} + self.assertEqual(len(root.findall(".//xx:b", namespaces=nsmap)), 2) + self.assertEqual(len(root.findall(".//b", namespaces=nsmap)), 1) + nsmap = {'xx': 'X', '': 'Y'} + with self.assertRaisesRegex(ValueError, 'namespace prefix'): + root.findall(".//xx:b", namespaces=nsmap) def test_bad_find(self): e = ET.XML(SAMPLE_XML) diff --git a/Lib/xml/etree/ElementPath.py b/Lib/xml/etree/ElementPath.py index ef32917b14d..0e3854f9db2 100644 --- a/Lib/xml/etree/ElementPath.py +++ b/Lib/xml/etree/ElementPath.py @@ -71,16 +71,22 @@ ) def xpath_tokenizer(pattern, namespaces=None): + default_namespace = namespaces.get(None) if namespaces else None for token in xpath_tokenizer_re.findall(pattern): tag = token[1] - if tag and tag[0] != "{" and ":" in tag: - try: + if tag and tag[0] != "{": + if ":" in tag: prefix, uri = tag.split(":", 1) - if not namespaces: - raise KeyError - yield token[0], "{%s}%s" % (namespaces[prefix], uri) - except KeyError: - raise SyntaxError("prefix %r not found in prefix map" % prefix) from None + try: + if not namespaces: + raise KeyError + yield token[0], "{%s}%s" % (namespaces[prefix], uri) + except KeyError: + raise SyntaxError("prefix %r not found in prefix map" % prefix) from None + elif default_namespace: + yield token[0], "{%s}%s" % (default_namespace, tag) + else: + yield token else: yield token @@ -264,10 +270,19 @@ def __init__(self, root): def iterfind(elem, path, namespaces=None): # compile selector pattern - cache_key = (path, None if namespaces is None - else tuple(sorted(namespaces.items()))) if path[-1:] == "/": path = path + "*" # implicit all (FIXME: keep this?) + + cache_key = (path,) + if namespaces: + if '' in namespaces: + raise ValueError("empty namespace prefix must be passed as None, not the empty string") + if None in namespaces: + cache_key += (namespaces[None],) + tuple(sorted( + item for item in namespaces.items() if item[0] is not None)) + else: + cache_key += tuple(sorted(namespaces.items())) + try: selector = _cache[cache_key] except KeyError: diff --git a/Misc/NEWS.d/next/Library/2019-04-13-23-42-33.bpo-30485.JHhjJS.rst b/Misc/NEWS.d/next/Library/2019-04-13-23-42-33.bpo-30485.JHhjJS.rst new file mode 100644 index 00000000000..6c82efd3e00 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-04-13-23-42-33.bpo-30485.JHhjJS.rst @@ -0,0 +1,3 @@ +Path expressions in xml.etree.ElementTree can now avoid explicit namespace +prefixes for tags (or the "{namespace}tag" notation) by passing a default +namespace with a 'None' prefix.