diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index c73e4bed1c6..f83db7f6f69 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -12,6 +12,7 @@ import sys import types import unittest +import warnings import weakref from itertools import product @@ -2237,6 +2238,20 @@ class MyParser(ET.XMLParser): parser.feed(self.sample1) self._check_sample_element(parser.close()) + def test_doctype_warning(self): + parser = ET.XMLParser() + with self.assertWarns(DeprecationWarning): + parser.doctype('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', + 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd') + parser.feed('') + parser.close() + + with warnings.catch_warnings(): + warnings.simplefilter('error', DeprecationWarning) + parser = ET.XMLParser() + parser.feed(self.sample2) + parser.close() + def test_subclass_doctype(self): _doctype = None class MyParserWithDoctype(ET.XMLParser): @@ -2252,6 +2267,32 @@ def doctype(self, name, pubid, system): ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) + _doctype = _doctype2 = None + with warnings.catch_warnings(): + warnings.simplefilter('error', DeprecationWarning) + class DoctypeParser: + def doctype(self, name, pubid, system): + nonlocal _doctype2 + _doctype2 = (name, pubid, system) + + parser = MyParserWithDoctype(target=DoctypeParser()) + parser.feed(self.sample2) + parser.close() + self.assertIsNone(_doctype) + self.assertEqual(_doctype2, + ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN', + 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd')) + + def test_inherited_doctype(self): + '''Ensure that ordinary usage is not deprecated (Issue 19176)''' + with warnings.catch_warnings(): + warnings.simplefilter('error', DeprecationWarning) + class MyParserWithoutDoctype(ET.XMLParser): + pass + parser = MyParserWithoutDoctype() + parser.feed(self.sample2) + parser.close() + def test_parse_string(self): parser = ET.XMLParser(target=ET.TreeBuilder()) parser.feed(self.sample3) diff --git a/Misc/NEWS b/Misc/NEWS index 12633e22f0b..60b5e86ef74 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -29,6 +29,12 @@ Core and Builtins Library ------- +- Issue #19176: Fixed doctype() related bugs in C implementation of ElementTree. + A deprecation warning no longer issued by XMLParser subclass with default + doctype() method. Direct call of doctype() now issues a warning. Parser's + doctype() now is not called if target's doctype() is called. Based on patch + by Martin Panter. + - Issue #20387: Restore semantic round-trip correctness in tokenize/untokenize for tab-indented blocks. diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index ea770517c14..911b5ac5a9a 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -2782,7 +2782,11 @@ typedef struct { } XMLParserObject; -#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type) +static PyObject* +_elementtree_XMLParser_doctype(XMLParserObject* self, PyObject* args); +static PyObject * +_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name, + PyObject *pubid, PyObject *system); /* helpers */ @@ -3182,20 +3186,22 @@ expat_start_doctype_handler(XMLParserObject *self, doctype_name_obj, pubid_obj, sysid_obj); Py_CLEAR(res); } - - /* Now see if the parser itself has a doctype method. If yes and it's - * a subclass, call it but warn about deprecation. If it's not a subclass - * (i.e. vanilla XMLParser), do nothing. - */ - parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype"); - if (parser_doctype) { - if (!XMLParser_CheckExact(self_pyobj)) { - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "This method of XMLParser is deprecated. Define" - " doctype() method on the TreeBuilder target.", - 1) < 0) { + else { + /* Now see if the parser itself has a doctype method. If yes and it's + * a custom method, call it but warn about deprecation. If it's only + * the vanilla XMLParser method, do nothing. + */ + parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype"); + if (parser_doctype && + !(PyCFunction_Check(parser_doctype) && + PyCFunction_GET_SELF(parser_doctype) == self_pyobj && + PyCFunction_GET_FUNCTION(parser_doctype) == + (PyCFunction) _elementtree_XMLParser_doctype)) { + res = _elementtree_XMLParser_doctype_impl(self, doctype_name_obj, + pubid_obj, sysid_obj); + if (!res) goto clear; - } + Py_DECREF(res); res = PyObject_CallFunction(parser_doctype, "OOO", doctype_name_obj, pubid_obj, sysid_obj); Py_CLEAR(res); @@ -3572,12 +3578,24 @@ _elementtree_XMLParser__parse_whole(XMLParserObject *self, PyObject *file) /*[clinic input] _elementtree.XMLParser.doctype + name: object + pubid: object + system: object + / + [clinic start generated code]*/ static PyObject * -_elementtree_XMLParser_doctype_impl(XMLParserObject *self) -/*[clinic end generated code: output=d09fdb9c45f3a602 input=20d5e0febf902a2f]*/ +_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name, + PyObject *pubid, PyObject *system) +/*[clinic end generated code: output=10fb50c2afded88d input=84050276cca045e1]*/ { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "This method of XMLParser is deprecated. Define" + " doctype() method on the TreeBuilder target.", + 1) < 0) { + return NULL; + } Py_RETURN_NONE; } diff --git a/Modules/clinic/_elementtree.c.h b/Modules/clinic/_elementtree.c.h index a4c3f91790c..86b4c4cec75 100644 --- a/Modules/clinic/_elementtree.c.h +++ b/Modules/clinic/_elementtree.c.h @@ -619,20 +619,33 @@ PyDoc_STRVAR(_elementtree_XMLParser__parse_whole__doc__, {"_parse_whole", (PyCFunction)_elementtree_XMLParser__parse_whole, METH_O, _elementtree_XMLParser__parse_whole__doc__}, PyDoc_STRVAR(_elementtree_XMLParser_doctype__doc__, -"doctype($self, /)\n" +"doctype($self, name, pubid, system, /)\n" "--\n" "\n"); #define _ELEMENTTREE_XMLPARSER_DOCTYPE_METHODDEF \ - {"doctype", (PyCFunction)_elementtree_XMLParser_doctype, METH_NOARGS, _elementtree_XMLParser_doctype__doc__}, + {"doctype", (PyCFunction)_elementtree_XMLParser_doctype, METH_VARARGS, _elementtree_XMLParser_doctype__doc__}, static PyObject * -_elementtree_XMLParser_doctype_impl(XMLParserObject *self); +_elementtree_XMLParser_doctype_impl(XMLParserObject *self, PyObject *name, + PyObject *pubid, PyObject *system); static PyObject * -_elementtree_XMLParser_doctype(XMLParserObject *self, PyObject *Py_UNUSED(ignored)) +_elementtree_XMLParser_doctype(XMLParserObject *self, PyObject *args) { - return _elementtree_XMLParser_doctype_impl(self); + PyObject *return_value = NULL; + PyObject *name; + PyObject *pubid; + PyObject *system; + + if (!PyArg_UnpackTuple(args, "doctype", + 3, 3, + &name, &pubid, &system)) + goto exit; + return_value = _elementtree_XMLParser_doctype_impl(self, name, pubid, system); + +exit: + return return_value; } PyDoc_STRVAR(_elementtree_XMLParser__setevents__doc__, @@ -663,4 +676,4 @@ _elementtree_XMLParser__setevents(XMLParserObject *self, PyObject *args) exit: return return_value; } -/*[clinic end generated code: output=119aed84c1545187 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=25b8bf7e7f2151ca input=a9049054013a1b77]*/