bpo-14465: Add an indent() function to xml.etree.ElementTree to pretty-print XML trees (GH-15200)

This commit is contained in:
Stefan Behnel 2019-08-23 16:44:25 +02:00 committed by GitHub
parent 81446fd0d4
commit b5d3ceea48
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 183 additions and 1 deletions

View File

@ -572,6 +572,18 @@ Functions
.. versionadded:: 3.2
.. function:: indent(tree, space=" ", level=0)
Appends whitespace to the subtree to indent the tree visually.
This can be used to generate pretty-printed XML output.
*tree* can be an Element or ElementTree. *space* is the whitespace
string that will be inserted for each indentation level, two space
characters by default. For indenting partial subtrees inside of an
already indented tree, pass the initial indentation level as *level*.
.. versionadded:: 3.9
.. function:: iselement(element)
Checks if an object appears to be a valid element object. *element* is an

View File

@ -788,6 +788,123 @@ def test_writestring(self):
elem = ET.fromstring("<html><body>text</body></html>")
self.assertEqual(ET.tostring(elem), b'<html><body>text</body></html>')
def test_indent(self):
elem = ET.XML("<root></root>")
ET.indent(elem)
self.assertEqual(ET.tostring(elem), b'<root />')
elem = ET.XML("<html><body>text</body></html>")
ET.indent(elem)
self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
elem = ET.XML("<html> <body>text</body> </html>")
ET.indent(elem)
self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>\n</html>')
elem = ET.XML("<html><body>text</body>tail</html>")
ET.indent(elem)
self.assertEqual(ET.tostring(elem), b'<html>\n <body>text</body>tail</html>')
elem = ET.XML("<html><body><p>par</p>\n<p>text</p>\t<p><br/></p></body></html>")
ET.indent(elem)
self.assertEqual(
ET.tostring(elem),
b'<html>\n'
b' <body>\n'
b' <p>par</p>\n'
b' <p>text</p>\n'
b' <p>\n'
b' <br />\n'
b' </p>\n'
b' </body>\n'
b'</html>'
)
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
ET.indent(elem)
self.assertEqual(
ET.tostring(elem),
b'<html>\n'
b' <body>\n'
b' <p>pre<br />post</p>\n'
b' <p>text</p>\n'
b' </body>\n'
b'</html>'
)
def test_indent_space(self):
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
ET.indent(elem, space='\t')
self.assertEqual(
ET.tostring(elem),
b'<html>\n'
b'\t<body>\n'
b'\t\t<p>pre<br />post</p>\n'
b'\t\t<p>text</p>\n'
b'\t</body>\n'
b'</html>'
)
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
ET.indent(elem, space='')
self.assertEqual(
ET.tostring(elem),
b'<html>\n'
b'<body>\n'
b'<p>pre<br />post</p>\n'
b'<p>text</p>\n'
b'</body>\n'
b'</html>'
)
def test_indent_space_caching(self):
elem = ET.XML("<html><body><p>par</p><p>text</p><p><br/></p><p /></body></html>")
ET.indent(elem)
self.assertEqual(
{el.tail for el in elem.iter()},
{None, "\n", "\n ", "\n "}
)
self.assertEqual(
{el.text for el in elem.iter()},
{None, "\n ", "\n ", "\n ", "par", "text"}
)
self.assertEqual(
len({el.tail for el in elem.iter()}),
len({id(el.tail) for el in elem.iter()}),
)
def test_indent_level(self):
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
with self.assertRaises(ValueError):
ET.indent(elem, level=-1)
self.assertEqual(
ET.tostring(elem),
b"<html><body><p>pre<br />post</p><p>text</p></body></html>"
)
ET.indent(elem, level=2)
self.assertEqual(
ET.tostring(elem),
b'<html>\n'
b' <body>\n'
b' <p>pre<br />post</p>\n'
b' <p>text</p>\n'
b' </body>\n'
b' </html>'
)
elem = ET.XML("<html><body><p>pre<br/>post</p><p>text</p></body></html>")
ET.indent(elem, level=1, space=' ')
self.assertEqual(
ET.tostring(elem),
b'<html>\n'
b' <body>\n'
b' <p>pre<br />post</p>\n'
b' <p>text</p>\n'
b' </body>\n'
b' </html>'
)
def test_tostring_default_namespace(self):
elem = ET.XML('<body xmlns="http://effbot.org/ns"><tag/></body>')
self.assertEqual(

View File

@ -76,7 +76,7 @@
"dump",
"Element", "ElementTree",
"fromstring", "fromstringlist",
"iselement", "iterparse",
"indent", "iselement", "iterparse",
"parse", "ParseError",
"PI", "ProcessingInstruction",
"QName",
@ -1185,6 +1185,57 @@ def dump(elem):
if not tail or tail[-1] != "\n":
sys.stdout.write("\n")
def indent(tree, space=" ", level=0):
"""Indent an XML document by inserting newlines and indentation space
after elements.
*tree* is the ElementTree or Element to modify. The (root) element
itself will not be changed, but the tail text of all elements in its
subtree will be adapted.
*space* is the whitespace to insert for each indentation level, two
space characters by default.
*level* is the initial indentation level. Setting this to a higher
value than 0 can be used for indenting subtrees that are more deeply
nested inside of a document.
"""
if isinstance(tree, ElementTree):
tree = tree.getroot()
if level < 0:
raise ValueError(f"Initial indentation level must be >= 0, got {level}")
if not len(tree):
return
# Reduce the memory consumption by reusing indentation strings.
indentations = ["\n" + level * space]
def _indent_children(elem, level):
# Start a new indentation level for the first child.
child_level = level + 1
try:
child_indentation = indentations[child_level]
except IndexError:
child_indentation = indentations[level] + space
indentations.append(child_indentation)
if not elem.text or not elem.text.strip():
elem.text = child_indentation
for child in elem:
if len(child):
_indent_children(child, child_level)
if not child.tail or not child.tail.strip():
child.tail = child_indentation
# Dedent after the last child by overwriting the previous indentation.
if not child.tail.strip():
child.tail = indentations[level]
_indent_children(tree, 0)
# --------------------------------------------------------------------
# parsing

View File

@ -0,0 +1,2 @@
Add an xml.etree.ElementTree.indent() function for pretty-printing XML trees.
Contributed by Stefan Behnel.