]> granicus.if.org Git - python/commitdiff
Issue #14007: implement doctype() method calling in XMLParser of _elementtree.
authorEli Bendersky <eliben@gmail.com>
Fri, 1 Jun 2012 08:32:34 +0000 (11:32 +0300)
committerEli Bendersky <eliben@gmail.com>
Fri, 1 Jun 2012 08:32:34 +0000 (11:32 +0300)
Includes exposing a doctype handler from expat through pyexpat.

Include/pyexpat.h
Lib/test/test_xml_etree.py
Modules/_elementtree.c
Modules/pyexpat.c

index 5340ef5fa386744e727a009abe84c1ee94abafda..168b5b2ae1dd433ebc0268073a1bb9e8741b2134 100644 (file)
@@ -43,6 +43,8 @@ struct PyExpat_CAPI
         XML_Parser parser, XML_UnknownEncodingHandler handler,
         void *encodingHandlerData);
     void (*SetUserData)(XML_Parser parser, void *userData);
+    void (*SetStartDoctypeDeclHandler)(XML_Parser parser,
+                                       XML_StartDoctypeDeclHandler start);
     /* always add new stuff to the end! */
 };
 
index 31e005b0079d10af20cfde1e16f987c85f8d27ea..49a5633a830512d7de5f2b8d491ced1181761aad 100644 (file)
@@ -2009,7 +2009,6 @@ class TreeBuilderTest(unittest.TestCase):
 
         self.assertEqual(lst, ['toplevel'])
 
-    @unittest.expectedFailure   # XXX issue 14007 with C ElementTree
     def test_doctype(self):
         class DoctypeParser:
             _doctype = None
@@ -2030,6 +2029,10 @@ class TreeBuilderTest(unittest.TestCase):
 
 class XMLParserTest(unittest.TestCase):
     sample1 = '<file><line>22</line></file>'
+    sample2 = ('<!DOCTYPE html PUBLIC'
+        ' "-//W3C//DTD XHTML 1.0 Transitional//EN"'
+        ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">'
+        '<html>text</html>')
 
     def _check_sample_element(self, e):
         self.assertEqual(e.tag, 'file')
@@ -2055,6 +2058,20 @@ class XMLParserTest(unittest.TestCase):
         parser.feed(self.sample1)
         self._check_sample_element(parser.close())
 
+    def test_subclass_doctype(self):
+        _doctype = None
+        class MyParserWithDoctype(ET.XMLParser):
+            def doctype(self, name, pubid, system):
+                nonlocal _doctype
+                _doctype = (name, pubid, system)
+
+        parser = MyParserWithDoctype()
+        parser.feed(self.sample2)
+        parser.close()
+        self.assertEqual(_doctype,
+            ('html', '-//W3C//DTD XHTML 1.0 Transitional//EN',
+             'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'))
+
 
 class NoAcceleratorTest(unittest.TestCase):
     # Test that the C accelerator was not imported for pyET
index 7bc18808faa55eb99f8b7f09cc1b235cf8da287b..d984b51bed15aa966dacf75daab7d37f97226b85 100644 (file)
@@ -2257,24 +2257,27 @@ typedef struct {
 
     XML_Parser parser;
 
-    PyObjecttarget;
-    PyObjectentity;
+    PyObject *target;
+    PyObject *entity;
 
-    PyObjectnames;
+    PyObject *names;
 
-    PyObjecthandle_start;
-    PyObjecthandle_data;
-    PyObjecthandle_end;
+    PyObject *handle_start;
+    PyObject *handle_data;
+    PyObject *handle_end;
 
-    PyObject* handle_comment;
-    PyObject* handle_pi;
+    PyObject *handle_comment;
+    PyObject *handle_pi;
+    PyObject *handle_doctype;
 
-    PyObjecthandle_close;
+    PyObject *handle_close;
 
 } XMLParserObject;
 
 static PyTypeObject XMLParser_Type;
 
+#define XMLParser_CheckExact(op) (Py_TYPE(op) == &XMLParser_Type)
+
 /* helpers */
 
 LOCAL(PyObject*)
@@ -2601,6 +2604,78 @@ expat_comment_handler(XMLParserObject* self, const XML_Char* comment_in)
     }
 }
 
+static void 
+expat_start_doctype_handler(XMLParserObject *self,
+                            const XML_Char *doctype_name,
+                            const XML_Char *sysid,
+                            const XML_Char *pubid,
+                            int has_internal_subset)
+{
+    PyObject *self_pyobj = (PyObject *)self;
+    PyObject *doctype_name_obj, *sysid_obj, *pubid_obj;
+    PyObject *parser_doctype = NULL;
+    PyObject *res = NULL;
+
+    doctype_name_obj = makeuniversal(self, doctype_name);
+    if (!doctype_name_obj)
+        return;
+
+    if (sysid) {
+        sysid_obj = makeuniversal(self, sysid);
+        if (!sysid_obj) {
+            Py_DECREF(doctype_name_obj);
+            return;
+        }
+    } else {
+        Py_INCREF(Py_None);
+        sysid_obj = Py_None;
+    }
+
+    if (pubid) {
+        pubid_obj = makeuniversal(self, pubid);
+        if (!pubid_obj) {
+            Py_DECREF(doctype_name_obj);
+            Py_DECREF(sysid_obj);
+            return;
+        }
+    } else {
+        Py_INCREF(Py_None);
+        pubid_obj = Py_None;
+    }
+
+    /* If the target has a handler for doctype, call it. */
+    if (self->handle_doctype) {
+        res = PyObject_CallFunction(self->handle_doctype, "OOO",
+                                    doctype_name_obj, pubid_obj, sysid_obj);
+        Py_CLEAR(res);
+    }
+
+    /* Now see if the parser itself has a doctype method. If yes and it's
+     * a subclass, call it but warn about deprecation. If it's not a subclass
+     * (i.e. vanilla XMLParser), do nothing.
+     */
+    parser_doctype = PyObject_GetAttrString(self_pyobj, "doctype");
+    if (parser_doctype) {
+        if (!XMLParser_CheckExact(self_pyobj)) {
+            if (PyErr_WarnEx(PyExc_DeprecationWarning,
+                            "This method of XMLParser is deprecated.  Define"
+                            " doctype() method on the TreeBuilder target.",
+                            1) < 0) {
+                goto clear;
+            }
+            res = PyObject_CallFunction(parser_doctype, "OOO",
+                                        doctype_name_obj, pubid_obj, sysid_obj);
+            Py_CLEAR(res);
+        }
+    }
+
+clear:
+    Py_XDECREF(parser_doctype);
+    Py_DECREF(doctype_name_obj);
+    Py_DECREF(pubid_obj);
+    Py_DECREF(sysid_obj);
+}
+
 static void
 expat_pi_handler(XMLParserObject* self, const XML_Char* target_in,
                  const XML_Char* data_in)
@@ -2676,6 +2751,7 @@ xmlparser_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
         self->target = self->entity = self->names = NULL;
         self->handle_start = self->handle_data = self->handle_end = NULL;
         self->handle_comment = self->handle_pi = self->handle_close = NULL;
+        self->handle_doctype = NULL;
     }
     return (PyObject *)self;
 }
@@ -2730,6 +2806,7 @@ xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
     self_xp->handle_comment = PyObject_GetAttrString(target, "comment");
     self_xp->handle_pi = PyObject_GetAttrString(target, "pi");
     self_xp->handle_close = PyObject_GetAttrString(target, "close");
+    self_xp->handle_doctype = PyObject_GetAttrString(target, "doctype");
 
     PyErr_Clear();
     
@@ -2758,6 +2835,10 @@ xmlparser_init(PyObject *self, PyObject *args, PyObject *kwds)
             self_xp->parser,
             (XML_ProcessingInstructionHandler) expat_pi_handler
             );
+    EXPAT(SetStartDoctypeDeclHandler)(
+        self_xp->parser,
+        (XML_StartDoctypeDeclHandler) expat_start_doctype_handler
+        );
     EXPAT(SetUnknownEncodingHandler)(
         self_xp->parser,
         (XML_UnknownEncodingHandler) expat_unknown_encoding_handler, NULL
@@ -2794,6 +2875,7 @@ xmlparser_gc_clear(XMLParserObject *self)
     Py_XDECREF(self->handle_end);
     Py_XDECREF(self->handle_data);
     Py_XDECREF(self->handle_start);
+    Py_XDECREF(self->handle_doctype);
 
     Py_XDECREF(self->target);
     Py_XDECREF(self->entity);
@@ -2950,7 +3032,13 @@ xmlparser_parse(XMLParserObject* self, PyObject* args)
 }
 
 static PyObject*
-xmlparser_setevents(XMLParserObject* self, PyObject* args)
+xmlparser_doctype(XMLParserObject *self, PyObject *args)
+{
+    Py_RETURN_NONE;
+}
+
+static PyObject*
+xmlparser_setevents(XMLParserObject *self, PyObject* args)
 {
     /* activate element event reporting */
 
@@ -3054,6 +3142,7 @@ static PyMethodDef xmlparser_methods[] = {
     {"close", (PyCFunction) xmlparser_close, METH_VARARGS},
     {"_parse", (PyCFunction) xmlparser_parse, METH_VARARGS},
     {"_setevents", (PyCFunction) xmlparser_setevents, METH_VARARGS},
+    {"doctype", (PyCFunction) xmlparser_doctype, METH_VARARGS},
     {NULL, NULL}
 };
 
index fb023298f540faa85dac256237011d4896db8aa4..a500a1e25d32e7fcc04152290429b16cbb696a36 100644 (file)
@@ -1904,6 +1904,7 @@ MODULE_INITFUNC(void)
     capi.SetProcessingInstructionHandler = XML_SetProcessingInstructionHandler;
     capi.SetUnknownEncodingHandler = XML_SetUnknownEncodingHandler;
     capi.SetUserData = XML_SetUserData;
+    capi.SetStartDoctypeDeclHandler = XML_SetStartDoctypeDeclHandler;
 
     /* export using capsule */
     capi_object = PyCapsule_New(&capi, PyExpat_CAPSULE_NAME, NULL);