From 5fd8123dfdf6df0a9c29363c8327ccfa0c1d41ac Mon Sep 17 00:00:00 2001 From: mefistotelis Date: Sun, 12 Apr 2020 14:51:58 +0200 Subject: [PATCH] bpo-39011: Preserve line endings within ElementTree attributes (GH-18468) * bpo-39011: Preserve line endings within attributes Line endings within attributes were previously normalized to "\n" in Py3.7/3.8. This patch removes that normalization, as line endings which were replaced by entity numbers should be preserved in original form. --- Doc/whatsnew/3.9.rst | 9 +++++++++ Lib/test/test_xml_etree.py | 5 +++-- Lib/xml/etree/ElementTree.py | 14 +++++++------- .../2020-02-12-01-48-51.bpo-39011.hGve_t.rst | 3 +++ 4 files changed, 22 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-02-12-01-48-51.bpo-39011.hGve_t.rst diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index 3beb721ed31..6cd80ce8e4f 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -412,6 +412,15 @@ customization consistently by always using the value specified by case), and one used ``__VENV_NAME__`` instead. (Contributed by Brett Cannon in :issue:`37663`.) +xml +--- + +White space characters within attributes are now preserved when serializing +:mod:`xml.etree.ElementTree` to XML file. EOLNs are no longer normalized +to "\n". This is the result of discussion about how to interpret +section 2.11 of XML spec. +(Contributed by Mefistotelis in :issue:`39011`.) + Optimizations ============= diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 785edb73702..d01649d1c31 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -430,13 +430,14 @@ def test_attrib(self): self.assertEqual(ET.tostring(elem), b'aa') + # Test preserving white space chars in attributes elem = ET.Element('test') elem.set('a', '\r') elem.set('b', '\r\n') elem.set('c', '\t\n\r ') - elem.set('d', '\n\n') + elem.set('d', '\n\n\r\r\t\t ') self.assertEqual(ET.tostring(elem), - b'') + b'') def test_makeelement(self): # Test makeelement handling. diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index c8d898f3281..da2bcad0b4d 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -1057,15 +1057,15 @@ def _escape_attrib(text): text = text.replace(">", ">") if "\"" in text: text = text.replace("\"", """) - # The following business with carriage returns is to satisfy - # Section 2.11 of the XML specification, stating that - # CR or CR LN should be replaced with just LN + # Although section 2.11 of the XML specification states that CR or + # CR LN should be replaced with just LN, it applies only to EOLNs + # which take part of organizing file into lines. Within attributes, + # we are replacing these with entity numbers, so they do not count. # http://www.w3.org/TR/REC-xml/#sec-line-ends - if "\r\n" in text: - text = text.replace("\r\n", "\n") + # The current solution, contained in following six lines, was + # discussed in issue 17582 and 39011. if "\r" in text: - text = text.replace("\r", "\n") - #The following four lines are issue 17582 + text = text.replace("\r", " ") if "\n" in text: text = text.replace("\n", " ") if "\t" in text: diff --git a/Misc/NEWS.d/next/Library/2020-02-12-01-48-51.bpo-39011.hGve_t.rst b/Misc/NEWS.d/next/Library/2020-02-12-01-48-51.bpo-39011.hGve_t.rst new file mode 100644 index 00000000000..43962f0bf17 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-02-12-01-48-51.bpo-39011.hGve_t.rst @@ -0,0 +1,3 @@ +Normalization of line endings in ElementTree attributes was removed, as line +endings which were replaced by entity numbers should be preserved in +original form.