]> granicus.if.org Git - postgresql/commitdiff
Rewrite libxml error handling to be more robust.
authorTom Lane <tgl@sss.pgh.pa.us>
Wed, 20 Jul 2011 17:03:12 +0000 (13:03 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Wed, 20 Jul 2011 17:03:49 +0000 (13:03 -0400)
libxml reports some errors (like invalid xmlns attributes) via the error
handler hook, but still returns a success indicator to the library caller.
This causes us to miss some errors that are important to report.  Since the
"generic" error handler hook doesn't know whether the message it's getting
is for an error, warning, or notice, stop using that and instead start
using the "structured" error handler hook, which gets enough information
to be useful.

While at it, arrange to save and restore the error handler hook setting in
each libxml-using function, rather than assuming we can set and forget the
hook.  This should improve the odds of working nicely with third-party
libraries that also use libxml.

In passing, volatile-ize some local variables that get modified within
PG_TRY blocks.  I noticed this while testing with an older gcc version
than I'd previously tried to compile xml.c with.

Florian Pflug and Tom Lane, with extensive review/testing by Noah Misch

configure
configure.in
contrib/xml2/xpath.c
contrib/xml2/xslt_proc.c
src/backend/utils/adt/xml.c
src/include/pg_config.h.in
src/include/utils/xml.h
src/test/regress/expected/xml.out
src/test/regress/expected/xml_1.out
src/test/regress/sql/xml.sql

index 9b8cb48931da9d7f0ab42625c9eb6806b0718c52..ba01548186a7bf4e97d0b7cf976441e94feb1646 100755 (executable)
--- a/configure
+++ b/configure
 
 
 
+# Older versions of libxml2 lack the xmlStructuredErrorContext variable
+# (which could be a macro referring to a function, if threading is enabled)
+if test "$with_libxml" = yes ; then
+  { $as_echo "$as_me:$LINENO: checking for xmlStructuredErrorContext" >&5
+$as_echo_n "checking for xmlStructuredErrorContext... " >&6; }
+if test "${pgac_cv_libxml_structerrctx+set}" = set; then
+  $as_echo_n "(cached) " >&6
+else
+  cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+#include <libxml/globals.h>
+                void *globptr;
+int
+main ()
+{
+globptr = xmlStructuredErrorContext
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\""
+$as_echo "$ac_try_echo") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } && {
+        test -z "$ac_c_werror_flag" ||
+        test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+        test "$cross_compiling" = yes ||
+        $as_test_x conftest$ac_exeext
+       }; then
+  pgac_cv_libxml_structerrctx=yes
+else
+  $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+       pgac_cv_libxml_structerrctx=no
+fi
+
+rm -rf conftest.dSYM
+rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \
+      conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:$LINENO: result: $pgac_cv_libxml_structerrctx" >&5
+$as_echo "$pgac_cv_libxml_structerrctx" >&6; }
+  if test x"$pgac_cv_libxml_structerrctx" = x"yes"; then
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
+_ACEOF
+
+  fi
+fi
+
+
 # This test makes sure that run tests work at all.  Sometimes a shared
 # library is found by the linker, but the runtime linker can't find it.
 # This check should come after all modifications of compiler or linker
index e873c7b157237d1161d6a24a8e5c4b320949238c..b1f15f131f4a54fe365c9a81bdc839b57bad74c7 100644 (file)
@@ -1519,6 +1519,23 @@ AC_SUBST(LDAP_LIBS_FE)
 AC_SUBST(LDAP_LIBS_BE)
 
 
+# Older versions of libxml2 lack the xmlStructuredErrorContext variable
+# (which could be a macro referring to a function, if threading is enabled)
+if test "$with_libxml" = yes ; then
+  AC_CACHE_CHECK([for xmlStructuredErrorContext], pgac_cv_libxml_structerrctx,
+  [AC_TRY_LINK([#include <libxml/globals.h>
+                void *globptr;],
+               [globptr = xmlStructuredErrorContext],
+               [pgac_cv_libxml_structerrctx=yes],
+               [pgac_cv_libxml_structerrctx=no])])
+  if test x"$pgac_cv_libxml_structerrctx" = x"yes"; then
+    AC_DEFINE(HAVE_XMLSTRUCTUREDERRORCONTEXT,
+             1,
+             [Define to 1 if your libxml has xmlStructuredErrorContext.])
+  fi
+fi
+
+
 # This test makes sure that run tests work at all.  Sometimes a shared
 # library is found by the linker, but the runtime linker can't find it.
 # This check should come after all modifications of compiler or linker
index 44c600e1348d1227ae50ba8e6ed0a11dd47a8330..2ddee59fcb779ba3677affdbe16eb8e7fc3ff204 100644 (file)
@@ -38,7 +38,7 @@ Datum         xpath_table(PG_FUNCTION_ARGS);
 
 /* exported for use by xslt_proc.c */
 
-void           pgxml_parser_init(void);
+PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness);
 
 /* workspace for pgxml_xpath() */
 
@@ -68,18 +68,27 @@ static void cleanup_workspace(xpath_workspace *workspace);
 
 /*
  * Initialize for xml parsing.
+ *
+ * As with the underlying pg_xml_init function, calls to this MUST be followed
+ * by a PG_TRY block that guarantees that pg_xml_done is called.
  */
-void
-pgxml_parser_init(void)
+PgXmlErrorContext *
+pgxml_parser_init(PgXmlStrictness strictness)
 {
+       PgXmlErrorContext *xmlerrcxt;
+
        /* Set up error handling (we share the core's error handler) */
-       pg_xml_init();
+       xmlerrcxt = pg_xml_init(strictness);
+
+       /* Note: we're assuming an elog cannot be thrown by the following calls */
 
        /* Initialize libxml */
        xmlInitParser();
 
        xmlSubstituteEntitiesDefault(1);
        xmlLoadExtDtdDefaultValue = 1;
+
+       return xmlerrcxt;
 }
 
 
@@ -98,16 +107,33 @@ Datum
 xml_is_well_formed(PG_FUNCTION_ARGS)
 {
        text       *t = PG_GETARG_TEXT_P(0);            /* document buffer */
+       bool            result = false;
        int32           docsize = VARSIZE(t) - VARHDRSZ;
        xmlDocPtr       doctree;
+       PgXmlErrorContext *xmlerrcxt;
+
+       xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
+
+       PG_TRY();
+       {
+               doctree = xmlParseMemory((char *) VARDATA(t), docsize);
+
+               result = (doctree != NULL);
+
+               if (doctree != NULL)
+                       xmlFreeDoc(doctree);
+       }
+       PG_CATCH();
+       {
+               pg_xml_done(xmlerrcxt, true);
 
-       pgxml_parser_init();
+               PG_RE_THROW();
+       }
+       PG_END_TRY();
 
-       doctree = xmlParseMemory((char *) VARDATA(t), docsize);
-       if (doctree == NULL)
-               PG_RETURN_BOOL(false);  /* i.e. not well-formed */
-       xmlFreeDoc(doctree);
-       PG_RETURN_BOOL(true);
+       pg_xml_done(xmlerrcxt, false);
+
+       PG_RETURN_BOOL(result);
 }
 
 
@@ -399,41 +425,52 @@ static xmlXPathObjectPtr
 pgxml_xpath(text *document, xmlChar *xpath, xpath_workspace *workspace)
 {
        int32           docsize = VARSIZE(document) - VARHDRSZ;
-       xmlXPathObjectPtr res;
+       PgXmlErrorContext *xmlerrcxt;
        xmlXPathCompExprPtr comppath;
 
        workspace->doctree = NULL;
        workspace->ctxt = NULL;
        workspace->res = NULL;
 
-       pgxml_parser_init();
+       xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
 
-       workspace->doctree = xmlParseMemory((char *) VARDATA(document), docsize);
-       if (workspace->doctree == NULL)
-               return NULL;                    /* not well-formed */
+       PG_TRY();
+       {
+               workspace->doctree = xmlParseMemory((char *) VARDATA(document),
+                                                                                       docsize);
+               if (workspace->doctree != NULL)
+               {
+                       workspace->ctxt = xmlXPathNewContext(workspace->doctree);
+                       workspace->ctxt->node = xmlDocGetRootElement(workspace->doctree);
 
-       workspace->ctxt = xmlXPathNewContext(workspace->doctree);
-       workspace->ctxt->node = xmlDocGetRootElement(workspace->doctree);
+                       /* compile the path */
+                       comppath = xmlXPathCompile(xpath);
+                       if (comppath == NULL)
+                               xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
+                                                       "XPath Syntax Error");
 
-       /* compile the path */
-       comppath = xmlXPathCompile(xpath);
-       if (comppath == NULL)
+                       /* Now evaluate the path expression. */
+                       workspace->res = xmlXPathCompiledEval(comppath, workspace->ctxt);
+
+                       xmlXPathFreeCompExpr(comppath);
+               }
+       }
+       PG_CATCH();
        {
                cleanup_workspace(workspace);
-               xml_ereport(ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
-                                       "XPath Syntax Error");
-       }
 
-       /* Now evaluate the path expression. */
-       res = xmlXPathCompiledEval(comppath, workspace->ctxt);
-       workspace->res = res;
+               pg_xml_done(xmlerrcxt, true);
 
-       xmlXPathFreeCompExpr(comppath);
+               PG_RE_THROW();
+       }
+       PG_END_TRY();
 
-       if (res == NULL)
+       if (workspace->res == NULL)
                cleanup_workspace(workspace);
 
-       return res;
+       pg_xml_done(xmlerrcxt, false);
+
+       return workspace->res;
 }
 
 /* Clean up after processing the result of pgxml_xpath() */
@@ -534,6 +571,8 @@ xpath_table(PG_FUNCTION_ARGS)
                                                                 * document */
        bool            had_values;             /* To determine end of nodeset results */
        StringInfoData query_buf;
+       PgXmlErrorContext *xmlerrcxt;
+       volatile xmlDocPtr doctree = NULL;
 
        /* We only have a valid tuple description in table function mode */
        if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
@@ -659,14 +698,15 @@ xpath_table(PG_FUNCTION_ARGS)
         * Setup the parser.  This should happen after we are done evaluating the
         * query, in case it calls functions that set up libxml differently.
         */
-       pgxml_parser_init();
+       xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
 
+       PG_TRY();
+       {
        /* For each row i.e. document returned from SPI */
        for (i = 0; i < proc; i++)
        {
                char       *pkey;
                char       *xmldoc;
-               xmlDocPtr       doctree;
                xmlXPathContextPtr ctxt;
                xmlXPathObjectPtr res;
                xmlChar    *resstr;
@@ -718,11 +758,9 @@ xpath_table(PG_FUNCTION_ARGS)
                                        /* compile the path */
                                        comppath = xmlXPathCompile(xpaths[j]);
                                        if (comppath == NULL)
-                                       {
-                                               xmlFreeDoc(doctree);
-                                               xml_ereport(ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
+                                               xml_ereport(xmlerrcxt, ERROR,
+                                                                       ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
                                                                        "XPath Syntax Error");
-                                       }
 
                                        /* Now evaluate the path expression. */
                                        res = xmlXPathCompiledEval(comppath, ctxt);
@@ -737,8 +775,7 @@ xpath_table(PG_FUNCTION_ARGS)
                                                                if (res->nodesetval != NULL &&
                                                                        rownr < res->nodesetval->nodeNr)
                                                                {
-                                                                       resstr =
-                                                                               xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
+                                                                       resstr = xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
                                                                        had_values = true;
                                                                }
                                                                else
@@ -776,13 +813,31 @@ xpath_table(PG_FUNCTION_ARGS)
                        } while (had_values);
                }
 
-               xmlFreeDoc(doctree);
+               if (doctree != NULL)
+                       xmlFreeDoc(doctree);
+               doctree = NULL;
 
                if (pkey)
                        pfree(pkey);
                if (xmldoc)
                        pfree(xmldoc);
        }
+       }
+       PG_CATCH();
+       {
+               if (doctree != NULL)
+                       xmlFreeDoc(doctree);
+
+               pg_xml_done(xmlerrcxt, true);
+
+               PG_RE_THROW();
+       }
+       PG_END_TRY();
+
+       if (doctree != NULL)
+               xmlFreeDoc(doctree);
+
+       pg_xml_done(xmlerrcxt, false);
 
        tuplestore_donestoring(tupstore);
 
index f8f7d7263f989d0348735b66d5594cc3506f8103..a8e481a3ce85315f349697d479bf59fdea3d6ef6 100644 (file)
@@ -38,7 +38,7 @@ Datum         xslt_process(PG_FUNCTION_ARGS);
 #ifdef USE_LIBXSLT
 
 /* declarations to come from xpath.c */
-extern void pgxml_parser_init(void);
+extern PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness);
 
 /* local defs */
 static const char **parse_params(text *paramstr);
@@ -56,13 +56,14 @@ xslt_process(PG_FUNCTION_ARGS)
        text       *ssheet = PG_GETARG_TEXT_P(1);
        text       *paramstr;
        const char **params;
-       xsltStylesheetPtr stylesheet = NULL;
-       xmlDocPtr       doctree;
-       xmlDocPtr       restree;
-       xmlDocPtr       ssdoc = NULL;
-       xmlChar    *resstr;
-       int                     resstat;
-       int                     reslen;
+       PgXmlErrorContext *xmlerrcxt;
+       volatile xsltStylesheetPtr stylesheet = NULL;
+       volatile xmlDocPtr doctree = NULL;
+       volatile xmlDocPtr restree = NULL;
+       volatile xmlDocPtr ssdoc = NULL;
+       volatile int resstat = -1;
+       xmlChar    *resstr = NULL;
+       int                     reslen = 0;
 
        if (fcinfo->nargs == 3)
        {
@@ -77,9 +78,11 @@ xslt_process(PG_FUNCTION_ARGS)
        }
 
        /* Setup parser */
-       pgxml_parser_init();
+       xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
 
-       /* Check to see if document is a file or a literal */
+       PG_TRY();
+       {
+               /* Check to see if document is a file or a literal */
 
        if (VARDATA(doct)[0] == '<')
                doctree = xmlParseMemory((char *) VARDATA(doct), VARSIZE(doct) - VARHDRSZ);
@@ -87,7 +90,7 @@ xslt_process(PG_FUNCTION_ARGS)
                doctree = xmlParseFile(text_to_cstring(doct));
 
        if (doctree == NULL)
-               xml_ereport(ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
+               xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
                                        "error parsing XML document");
 
        /* Same for stylesheet */
@@ -96,35 +99,44 @@ xslt_process(PG_FUNCTION_ARGS)
                ssdoc = xmlParseMemory((char *) VARDATA(ssheet),
                                                           VARSIZE(ssheet) - VARHDRSZ);
                if (ssdoc == NULL)
-               {
-                       xmlFreeDoc(doctree);
-                       xml_ereport(ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
+                       xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
                                                "error parsing stylesheet as XML document");
-               }
 
                stylesheet = xsltParseStylesheetDoc(ssdoc);
        }
        else
                stylesheet = xsltParseStylesheetFile((xmlChar *) text_to_cstring(ssheet));
 
-
        if (stylesheet == NULL)
-       {
-               xmlFreeDoc(doctree);
-               xsltCleanupGlobals();
-               xml_ereport(ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
+               xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
                                        "failed to parse stylesheet");
-       }
 
        restree = xsltApplyStylesheet(stylesheet, doctree, params);
        resstat = xsltSaveResultToString(&resstr, &reslen, restree, stylesheet);
+       }
+       PG_CATCH();
+       {
+               if (stylesheet != NULL)
+                       xsltFreeStylesheet(stylesheet);
+               if (restree != NULL)
+                       xmlFreeDoc(restree);
+               if (doctree != NULL)
+                       xmlFreeDoc(doctree);
+               xsltCleanupGlobals();
+
+               pg_xml_done(xmlerrcxt, true);
+
+               PG_RE_THROW();
+       }
+       PG_END_TRY();
 
        xsltFreeStylesheet(stylesheet);
        xmlFreeDoc(restree);
        xmlFreeDoc(doctree);
-
        xsltCleanupGlobals();
 
+       pg_xml_done(xmlerrcxt, false);
+
        if (resstat < 0)
                PG_RETURN_NULL();
 
index 945bc2901faf6969421ed1e01692180e988f4c9e..6786cd91bb553aa88fb5aedfa0ed19ba78cb999e 100644 (file)
@@ -85,11 +85,27 @@ int                 xmloption;
 
 #ifdef USE_LIBXML
 
-static StringInfo xml_err_buf = NULL;
-
-static void xml_errorHandler(void *ctxt, const char *msg,...);
+/* random number to identify PgXmlErrorContext */
+#define ERRCXT_MAGIC   68275028
+
+struct PgXmlErrorContext
+{
+       int                     magic;
+       /* strictness argument passed to pg_xml_init */
+       PgXmlStrictness strictness;
+       /* current error status and accumulated message, if any */
+       bool            err_occurred;
+       StringInfoData err_buf;
+       /* previous libxml error handling state (saved by pg_xml_init) */
+       xmlStructuredErrorFunc saved_errfunc;
+       void       *saved_errcxt;
+};
+
+static void xml_errorHandler(void *data, xmlErrorPtr error);
 static void xml_ereport_by_code(int level, int sqlcode,
                                        const char *msg, int errcode);
+static void chopStringInfoNewlines(StringInfo str);
+static void appendStringInfoLineSeparator(StringInfo str);
 
 #ifdef USE_LIBXMLCONTEXT
 
@@ -552,8 +568,9 @@ xmlelement(XmlExprState *xmlExpr, ExprContext *econtext)
        int                     i;
        ListCell   *arg;
        ListCell   *narg;
-       xmlBufferPtr buf = NULL;
-       xmlTextWriterPtr writer = NULL;
+       PgXmlErrorContext *xmlerrcxt;
+       volatile xmlBufferPtr buf = NULL;
+       volatile xmlTextWriterPtr writer = NULL;
 
        /*
         * We first evaluate all the arguments, then start up libxml and create
@@ -598,17 +615,17 @@ xmlelement(XmlExprState *xmlExpr, ExprContext *econtext)
        }
 
        /* now safe to run libxml */
-       pg_xml_init();
+       xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
 
        PG_TRY();
        {
                buf = xmlBufferCreate();
-               if (!buf)
-                       xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
+               if (buf == NULL || xmlerrcxt->err_occurred)
+                       xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
                                                "could not allocate xmlBuffer");
                writer = xmlNewTextWriterMemory(buf, 0);
-               if (!writer)
-                       xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
+               if (writer == NULL || xmlerrcxt->err_occurred)
+                       xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
                                                "could not allocate xmlTextWriter");
 
                xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
@@ -645,12 +662,17 @@ xmlelement(XmlExprState *xmlExpr, ExprContext *econtext)
                        xmlFreeTextWriter(writer);
                if (buf)
                        xmlBufferFree(buf);
+
+               pg_xml_done(xmlerrcxt, true);
+
                PG_RE_THROW();
        }
        PG_END_TRY();
 
        xmlBufferFree(buf);
 
+       pg_xml_done(xmlerrcxt, false);
+
        return result;
 #else
        NO_XML_SUPPORT();
@@ -800,7 +822,7 @@ xml_is_document(xmltype *arg)
 {
 #ifdef USE_LIBXML
        bool            result;
-       xmlDocPtr       doc = NULL;
+       volatile xmlDocPtr doc = NULL;
        MemoryContext ccxt = CurrentMemoryContext;
 
        /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
@@ -844,30 +866,24 @@ xml_is_document(xmltype *arg)
 #ifdef USE_LIBXML
 
 /*
- * pg_xml_init --- set up for use of libxml
+ * pg_xml_init_library --- set up for use of libxml
  *
  * This should be called by each function that is about to use libxml
- * facilities. It has two responsibilities: verify compatibility with the
- * loaded libxml version (done on first call in a session) and establish
- * or re-establish our libxml error handler.  The latter needs to be done
- * anytime we might have passed control to add-on modules (eg libperl) which
- * might have set their own error handler for libxml.
- *
- * This is exported for use by contrib/xml2, as well as other code that might
- * wish to share use of this module's libxml error handler.
+ * facilities but doesn't require error handling.  It initializes libxml
+ * and verifies compatibility with the loaded libxml version.  These are
+ * once-per-session activities.
  *
  * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
  * check)
  */
 void
-pg_xml_init(void)
+pg_xml_init_library(void)
 {
        static bool first_time = true;
 
        if (first_time)
        {
                /* Stuff we need do only once per session */
-               MemoryContext oldcontext;
 
                /*
                 * Currently, we have no pure UTF-8 support for internals -- check if
@@ -879,16 +895,8 @@ pg_xml_init(void)
                                         errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
                                                           (int) sizeof(char), (int) sizeof(xmlChar))));
 
-               /* create error buffer in permanent context */
-               oldcontext = MemoryContextSwitchTo(TopMemoryContext);
-               xml_err_buf = makeStringInfo();
-               MemoryContextSwitchTo(oldcontext);
-
-               /* Now that xml_err_buf exists, safe to call xml_errorHandler */
-               xmlSetGenericErrorFunc(NULL, xml_errorHandler);
-
 #ifdef USE_LIBXMLCONTEXT
-               /* Set up memory allocation our way, too */
+               /* Set up libxml's memory allocation our way */
                xml_memory_init();
 #endif
 
@@ -897,21 +905,119 @@ pg_xml_init(void)
 
                first_time = false;
        }
-       else
-       {
-               /* Reset pre-existing buffer to empty */
-               Assert(xml_err_buf != NULL);
-               resetStringInfo(xml_err_buf);
+}
 
-               /*
-                * We re-establish the error callback function every time.      This makes
-                * it safe for other subsystems (PL/Perl, say) to also use libxml with
-                * their own callbacks ... so long as they likewise set up the
-                * callbacks on every use. It's cheap enough to not be worth worrying
-                * about, anyway.
-                */
-               xmlSetGenericErrorFunc(NULL, xml_errorHandler);
-       }
+/*
+ * pg_xml_init --- set up for use of libxml and register an error handler
+ *
+ * This should be called by each function that is about to use libxml
+ * facilities and requires error handling.  It initializes libxml with
+ * pg_xml_init_library() and establishes our libxml error handler.
+ *
+ * strictness determines which errors are reported and which are ignored.
+ *
+ * Calls to this function MUST be followed by a PG_TRY block that guarantees
+ * that pg_xml_done() is called during either normal or error exit.
+ *
+ * This is exported for use by contrib/xml2, as well as other code that might
+ * wish to share use of this module's libxml error handler.
+ */
+PgXmlErrorContext *
+pg_xml_init(PgXmlStrictness strictness)
+{
+       PgXmlErrorContext *errcxt;
+
+       /* Do one-time setup if needed */
+       pg_xml_init_library();
+
+       /* Create error handling context structure */
+       errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
+       errcxt->magic = ERRCXT_MAGIC;
+       errcxt->strictness = strictness;
+       errcxt->err_occurred = false;
+       initStringInfo(&errcxt->err_buf);
+
+       /*
+        * Save original error handler and install ours. libxml originally didn't
+        * distinguish between the contexts for generic and for structured error
+        * handlers.  If we're using an old libxml version, we must thus save
+        * the generic error context, even though we're using a structured
+        * error handler.
+        */
+       errcxt->saved_errfunc = xmlStructuredError;
+
+#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
+       errcxt->saved_errcxt = xmlStructuredErrorContext;
+#else
+       errcxt->saved_errcxt = xmlGenericErrorContext;
+#endif
+
+       xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler);
+
+       return errcxt;
+}
+
+
+/*
+ * pg_xml_done --- restore previous libxml error handling
+ *
+ * Resets libxml's global error-handling state to what it was before
+ * pg_xml_init() was called.
+ *
+ * This routine verifies that all pending errors have been dealt with
+ * (in assert-enabled builds, anyway).
+ */
+void
+pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
+{
+       void       *cur_errcxt;
+
+       /* An assert seems like enough protection here */
+       Assert(errcxt->magic == ERRCXT_MAGIC);
+
+       /*
+        * In a normal exit, there should be no un-handled libxml errors.  But we
+        * shouldn't try to enforce this during error recovery, since the longjmp
+        * could have been thrown before xml_ereport had a chance to run.
+        */
+       Assert(!errcxt->err_occurred || isError);
+
+       /*
+        * Check that libxml's global state is correct, warn if not.  This is
+        * a real test and not an Assert because it has a higher probability
+        * of happening.
+        */
+#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
+       cur_errcxt = xmlStructuredErrorContext;
+#else
+       cur_errcxt = xmlGenericErrorContext;
+#endif
+
+       if (cur_errcxt != (void *) errcxt)
+               elog(WARNING, "libxml error handling state is out of sync with xml.c");
+
+       /* Restore the saved handler */
+       xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
+
+       /*
+        * Mark the struct as invalid, just in case somebody somehow manages to
+        * call xml_errorHandler or xml_ereport with it.
+        */
+       errcxt->magic = 0;
+
+       /* Release memory */
+       pfree(errcxt->err_buf.data);
+       pfree(errcxt);
+}
+
+
+/*
+ * pg_xml_error_occurred() --- test the error flag
+ */
+bool
+pg_xml_error_occurred(PgXmlErrorContext *errcxt)
+{
+       return errcxt->err_occurred;
 }
 
 
@@ -970,7 +1076,13 @@ parse_xml_decl(const xmlChar *str, size_t *lenp,
        int                     utf8char;
        int                     utf8len;
 
-       pg_xml_init();
+       /*
+        * Only initialize libxml.  We don't need error handling here, but we do
+        * need to make sure libxml is initialized before calling any of its
+        * functions.  Note that this is safe (and a no-op) if caller has already
+        * done pg_xml_init().
+        */
+       pg_xml_init_library();
 
        /* Initialize output arguments to "not present" */
        if (version)
@@ -1124,8 +1236,6 @@ static bool
 print_xml_decl(StringInfo buf, const xmlChar *version,
                           pg_enc encoding, int standalone)
 {
-       pg_xml_init();                          /* why is this here? */
-
        if ((version && strcmp((char *) version, PG_XML_DEFAULT_VERSION) != 0)
                || (encoding && encoding != PG_UTF8)
                || standalone != -1)
@@ -1176,8 +1286,9 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
        int32           len;
        xmlChar    *string;
        xmlChar    *utf8string;
-       xmlParserCtxtPtr ctxt;
-       xmlDocPtr       doc;
+       PgXmlErrorContext *xmlerrcxt;
+       volatile xmlParserCtxtPtr ctxt = NULL;
+       volatile xmlDocPtr doc = NULL;
 
        len = VARSIZE(data) - VARHDRSZ;         /* will be useful later */
        string = xml_text2xmlChar(data);
@@ -1187,18 +1298,19 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
                                                                                   encoding,
                                                                                   PG_UTF8);
 
-       /* Start up libxml and its parser (no-ops if already done) */
-       pg_xml_init();
-       xmlInitParser();
+       /* Start up libxml and its parser */
+       xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
 
-       ctxt = xmlNewParserCtxt();
-       if (ctxt == NULL)
-               xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
-                                       "could not allocate parser context");
-
-       /* Use a TRY block to ensure the ctxt is released */
+       /* Use a TRY block to ensure we clean up correctly */
        PG_TRY();
        {
+               xmlInitParser();
+
+               ctxt = xmlNewParserCtxt();
+               if (ctxt == NULL || xmlerrcxt->err_occurred)
+                       xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+                                               "could not allocate parser context");
+
                if (xmloption_arg == XMLOPTION_DOCUMENT)
                {
                        /*
@@ -1213,8 +1325,8 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
                                                                 "UTF-8",
                                                                 XML_PARSE_NOENT | XML_PARSE_DTDATTR
                                                   | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
-                       if (doc == NULL)
-                               xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
+                       if (doc == NULL || xmlerrcxt->err_occurred)
+                               xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
                                                        "invalid XML document");
                }
                else
@@ -1238,23 +1350,28 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
 
                        res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
                                                                                                   utf8string + count, NULL);
-                       if (res_code != 0)
-                       {
-                               xmlFreeDoc(doc);
-                               xml_ereport(ERROR, ERRCODE_INVALID_XML_CONTENT,
+                       if (res_code != 0 || xmlerrcxt->err_occurred)
+                               xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
                                                        "invalid XML content");
-                       }
                }
        }
        PG_CATCH();
        {
-               xmlFreeParserCtxt(ctxt);
+               if (doc != NULL)
+                       xmlFreeDoc(doc);
+               if (ctxt != NULL)
+                       xmlFreeParserCtxt(ctxt);
+
+               pg_xml_done(xmlerrcxt, true);
+
                PG_RE_THROW();
        }
        PG_END_TRY();
 
        xmlFreeParserCtxt(ctxt);
 
+       pg_xml_done(xmlerrcxt, false);
+
        return doc;
 }
 
@@ -1335,70 +1452,180 @@ xml_pstrdup(const char *string)
  * handler.  Note that pg_xml_init() *must* have been called previously.
  */
 void
-xml_ereport(int level, int sqlcode, const char *msg)
+xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
 {
        char       *detail;
 
-       /*
-        * It might seem that we should just pass xml_err_buf->data directly to
-        * errdetail.  However, we want to clean out xml_err_buf before throwing
-        * error, in case there is another function using libxml further down the
-        * call stack.
-        */
-       if (xml_err_buf->len > 0)
-       {
-               detail = pstrdup(xml_err_buf->data);
-               resetStringInfo(xml_err_buf);
-       }
+       /* Defend against someone passing us a bogus context struct */
+       if (errcxt->magic != ERRCXT_MAGIC)
+               elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
+
+       /* Flag that the current libxml error has been reported */
+       errcxt->err_occurred = false;
+
+       /* Include detail only if we have some text from libxml */
+       if (errcxt->err_buf.len > 0)
+               detail = errcxt->err_buf.data;
        else
                detail = NULL;
 
-       if (detail)
+       ereport(level,
+                       (errcode(sqlcode),
+                        errmsg_internal("%s", msg),
+                        detail ? errdetail_internal("%s", detail) : 0));
+}
+
+
+/*
+ * Error handler for libxml errors and warnings
+ */
+static void
+xml_errorHandler(void *data, xmlErrorPtr error)
+{
+       PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
+       xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
+       xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
+       xmlNodePtr node = error->node;
+       const xmlChar *name = (node != NULL &&
+                                                  node->type == XML_ELEMENT_NODE) ? node->name : NULL;
+       int                     domain = error->domain;
+       int                     level = error->level;
+       StringInfo      errorBuf;
+
+       /* Defend against someone passing us a bogus context struct */
+       if (xmlerrcxt->magic != ERRCXT_MAGIC)
+               elog(ERROR, "xml_errorHandler called with invalid PgXmlErrorContext");
+
+       /*----------
+        * Older libxml versions report some errors differently.
+        * First, some errors were previously reported as coming from the parser
+        * domain but are now reported as coming from the namespace domain.
+        * Second, some warnings were upgraded to errors.
+        * We attempt to compensate for that here.
+        *----------
+        */
+       switch (error->code)
        {
-               size_t          len;
+               case XML_WAR_NS_URI:
+                       level = XML_ERR_ERROR;
+                       domain = XML_FROM_NAMESPACE;
+                       break;
+
+               case XML_ERR_NS_DECL_ERROR:
+               case XML_WAR_NS_URI_RELATIVE:
+               case XML_WAR_NS_COLUMN:
+               case XML_NS_ERR_XML_NAMESPACE:
+               case XML_NS_ERR_UNDEFINED_NAMESPACE:
+               case XML_NS_ERR_QNAME:
+               case XML_NS_ERR_ATTRIBUTE_REDEFINED:
+               case XML_NS_ERR_EMPTY:
+                       domain = XML_FROM_NAMESPACE;
+                       break;
+       }
 
-               /* libxml error messages end in '\n'; get rid of it */
-               len = strlen(detail);
-               if (len > 0 && detail[len - 1] == '\n')
-                       detail[len - 1] = '\0';
+       /* Decide whether to act on the error or not */
+       switch (domain)
+       {
+               case XML_FROM_PARSER:
+               case XML_FROM_NONE:
+               case XML_FROM_MEMORY:
+               case XML_FROM_IO:
+                       /* Accept error regardless of the parsing purpose */
+                       break;
 
-               ereport(level,
-                               (errcode(sqlcode),
-                                errmsg_internal("%s", msg),
-                                errdetail_internal("%s", detail)));
+               default:
+                       /* Ignore error if only doing well-formedness check */
+                       if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
+                               return;
+                       break;
        }
-       else
+
+       /* Prepare error message in errorBuf */
+       errorBuf = makeStringInfo();
+
+       if (error->line > 0)
+               appendStringInfo(errorBuf, "line %d: ", error->line);
+       if (name != NULL)
+               appendStringInfo(errorBuf, "element %s: ", name);
+       appendStringInfoString(errorBuf, error->message);
+
+       /*
+        * Append context information to errorBuf.
+        *
+        * xmlParserPrintFileContext() uses libxml's "generic" error handler to
+        * write the context.  Since we don't want to duplicate libxml
+        * functionality here, we set up a generic error handler temporarily.
+        *
+        * We use appendStringInfo() directly as libxml's generic error handler.
+        * This should work because it has essentially the same signature as
+        * libxml expects, namely (void *ptr, const char *msg, ...).
+        */
+       if (input != NULL)
        {
-               ereport(level,
-                               (errcode(sqlcode),
-                                errmsg_internal("%s", msg)));
+               xmlGenericErrorFunc errFuncSaved = xmlGenericError;
+               void   *errCtxSaved = xmlGenericErrorContext;
+
+               xmlSetGenericErrorFunc((void *) errorBuf,
+                                                          (xmlGenericErrorFunc) appendStringInfo);
+
+               /* Add context information to errorBuf */
+               appendStringInfoLineSeparator(errorBuf);
+
+               xmlParserPrintFileContext(input);
+
+               /* Restore generic error func */
+               xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
        }
-}
 
+       /* Get rid of any trailing newlines in errorBuf */
+       chopStringInfoNewlines(errorBuf);
 
-/*
- * Error handler for libxml error messages
- */
-static void
-xml_errorHandler(void *ctxt, const char *msg,...)
-{
-       /* Append the formatted text to xml_err_buf */
-       for (;;)
+       /*
+        * Legacy error handling mode.  err_occurred is never set, we just add the
+        * message to err_buf.  This mode exists because the xml2 contrib module
+        * uses our error-handling infrastructure, but we don't want to change its
+        * behaviour since it's deprecated anyway.  This is also why we don't
+        * distinguish between notices, warnings and errors here --- the old-style
+        * generic error handler wouldn't have done that either.
+        */
+       if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
        {
-               va_list         args;
-               bool            success;
+               appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
+               appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
 
-               /* Try to format the data. */
-               va_start(args, msg);
-               success = appendStringInfoVA(xml_err_buf, msg, args);
-               va_end(args);
+               pfree(errorBuf->data);
+               pfree(errorBuf);
+               return;
+       }
 
-               if (success)
-                       break;
+       /*
+        * We don't want to ereport() here because that'd probably leave libxml in
+        * an inconsistent state.  Instead, we remember the error and ereport()
+        * from xml_ereport().
+        *
+        * Warnings and notices can be reported immediately since they won't cause
+        * a longjmp() out of libxml.
+        */
+       if (level >= XML_ERR_ERROR)
+       {
+               appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
+               appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
 
-               /* Double the buffer size and try again. */
-               enlargeStringInfo(xml_err_buf, xml_err_buf->maxlen);
+               xmlerrcxt->err_occurred = true;
+       }
+       else if (level >= XML_ERR_WARNING)
+       {
+               ereport(WARNING,
+                               (errmsg_internal("%s", errorBuf->data)));
+       }
+       else
+       {
+               ereport(NOTICE,
+                               (errmsg_internal("%s", errorBuf->data)));
        }
+
+       pfree(errorBuf->data);
+       pfree(errorBuf);
 }
 
 
@@ -1447,6 +1674,29 @@ xml_ereport_by_code(int level, int sqlcode,
 }
 
 
+/*
+ * Remove all trailing newlines from a StringInfo string
+ */
+static void
+chopStringInfoNewlines(StringInfo str)
+{
+       while (str->len > 0 && str->data[str->len - 1] == '\n')
+               str->data[--str->len] = '\0';
+}
+
+
+/*
+ * Append a newline after removing any existing trailing newlines
+ */
+static void
+appendStringInfoLineSeparator(StringInfo str)
+{
+       chopStringInfoNewlines(str);
+       if (str->len > 0)
+               appendStringInfoChar(str, '\n');
+}
+
+
 /*
  * Convert one char in the current server encoding to a Unicode codepoint.
  */
@@ -1753,21 +2003,22 @@ map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
                        case BYTEAOID:
                                {
                                        bytea      *bstr = DatumGetByteaPP(value);
-                                       xmlBufferPtr buf = NULL;
-                                       xmlTextWriterPtr writer = NULL;
+                                       PgXmlErrorContext *xmlerrcxt;
+                                       volatile xmlBufferPtr buf = NULL;
+                                       volatile xmlTextWriterPtr writer = NULL;
                                        char       *result;
 
-                                       pg_xml_init();
+                                       xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
 
                                        PG_TRY();
                                        {
                                                buf = xmlBufferCreate();
-                                               if (!buf)
-                                                       xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
+                                               if (buf == NULL || xmlerrcxt->err_occurred)
+                                                       xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
                                                                                "could not allocate xmlBuffer");
                                                writer = xmlNewTextWriterMemory(buf, 0);
-                                               if (!writer)
-                                                       xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
+                                               if (writer == NULL || xmlerrcxt->err_occurred)
+                                                       xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
                                                                                "could not allocate xmlTextWriter");
 
                                                if (xmlbinary == XMLBINARY_BASE64)
@@ -1789,12 +2040,17 @@ map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
                                                        xmlFreeTextWriter(writer);
                                                if (buf)
                                                        xmlBufferFree(buf);
+
+                                               pg_xml_done(xmlerrcxt, true);
+
                                                PG_RE_THROW();
                                        }
                                        PG_END_TRY();
 
                                        xmlBufferFree(buf);
 
+                                       pg_xml_done(xmlerrcxt, false);
+
                                        return result;
                                }
 #endif   /* USE_LIBXML */
@@ -3312,11 +3568,12 @@ static void
 xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
                           int *res_nitems, ArrayBuildState **astate)
 {
-       xmlParserCtxtPtr ctxt = NULL;
-       xmlDocPtr       doc = NULL;
-       xmlXPathContextPtr xpathctx = NULL;
-       xmlXPathCompExprPtr xpathcomp = NULL;
-       xmlXPathObjectPtr xpathobj = NULL;
+       PgXmlErrorContext *xmlerrcxt;
+       volatile xmlParserCtxtPtr ctxt = NULL;
+       volatile xmlDocPtr doc = NULL;
+       volatile xmlXPathContextPtr xpathctx = NULL;
+       volatile xmlXPathCompExprPtr xpathcomp = NULL;
+       volatile xmlXPathObjectPtr xpathobj = NULL;
        char       *datastr;
        int32           len;
        int32           xpath_len;
@@ -3382,30 +3639,31 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
        memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
        xpath_expr[xpath_len] = '\0';
 
-       pg_xml_init();
-       xmlInitParser();
+       xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
 
        PG_TRY();
        {
+               xmlInitParser();
+
                /*
                 * redundant XML parsing (two parsings for the same value during one
                 * command execution are possible)
                 */
                ctxt = xmlNewParserCtxt();
-               if (ctxt == NULL)
-                       xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
+               if (ctxt == NULL || xmlerrcxt->err_occurred)
+                       xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
                                                "could not allocate parser context");
                doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
-               if (doc == NULL)
-                       xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
+               if (doc == NULL || xmlerrcxt->err_occurred)
+                       xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
                                                "could not parse XML document");
                xpathctx = xmlXPathNewContext(doc);
-               if (xpathctx == NULL)
-                       xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
+               if (xpathctx == NULL || xmlerrcxt->err_occurred)
+                       xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
                                                "could not allocate XPath context");
                xpathctx->node = xmlDocGetRootElement(doc);
-               if (xpathctx->node == NULL)
-                       xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+               if (xpathctx->node == NULL || xmlerrcxt->err_occurred)
+                       xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
                                                "could not find root XML element");
 
                /* register namespaces, if any */
@@ -3433,8 +3691,8 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
                }
 
                xpathcomp = xmlXPathCompile(xpath_expr);
-               if (xpathcomp == NULL)  /* TODO: show proper XPath error details */
-                       xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+               if (xpathcomp == NULL || xmlerrcxt->err_occurred)
+                       xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
                                                "invalid XPath expression");
 
                /*
@@ -3445,8 +3703,8 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
                 * the same.
                 */
                xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
-               if (xpathobj == NULL)   /* TODO: reason? */
-                       xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+               if (xpathobj == NULL || xmlerrcxt->err_occurred)
+                       xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
                                                "could not create XPath object");
 
                /* return empty array in cases when nothing is found */
@@ -3482,6 +3740,9 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
                        xmlFreeDoc(doc);
                if (ctxt)
                        xmlFreeParserCtxt(ctxt);
+
+               pg_xml_done(xmlerrcxt, true);
+
                PG_RE_THROW();
        }
        PG_END_TRY();
@@ -3491,6 +3752,8 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
        xmlXPathFreeContext(xpathctx);
        xmlFreeDoc(doc);
        xmlFreeParserCtxt(ctxt);
+
+       pg_xml_done(xmlerrcxt, false);
 }
 #endif   /* USE_LIBXML */
 
@@ -3579,7 +3842,7 @@ static bool
 wellformed_xml(text *data, XmlOptionType xmloption_arg)
 {
        bool            result;
-       xmlDocPtr       doc = NULL;
+       volatile xmlDocPtr doc = NULL;
 
        /* We want to catch any exceptions and return false */
        PG_TRY();
index 19f38cc0fd0839fe8b41f80b4fb30b7a332ecfde..24b951e7bd866cf895ea6589065e0d4e394d5ece 100644 (file)
 /* Define to 1 if you have the <winldap.h> header file. */
 #undef HAVE_WINLDAP_H
 
+/* Define to 1 if your libxml has xmlStructuredErrorContext. */
+#undef HAVE_XMLSTRUCTUREDERRORCONTEXT
+
 /* Define to the appropriate snprintf format for 64-bit ints. */
 #undef INT64_FORMAT
 
index 6359cd6043b0ef880a75b5fdca53a2a498febc9a..f9f6f568592ab31b1500accd2a541c6559f04c15 100644 (file)
 
 typedef struct varlena xmltype;
 
+typedef enum
+{
+       XML_STANDALONE_YES,
+       XML_STANDALONE_NO,
+       XML_STANDALONE_NO_VALUE,
+       XML_STANDALONE_OMITTED
+}      XmlStandaloneType;
+
+typedef enum
+{
+       XMLBINARY_BASE64,
+       XMLBINARY_HEX
+}      XmlBinaryType;
+
+typedef enum
+{
+       PG_XML_STRICTNESS_LEGACY,               /* ignore errors unless function result
+                                                                        * indicates error condition */
+       PG_XML_STRICTNESS_WELLFORMED,   /* ignore non-parser messages */
+       PG_XML_STRICTNESS_ALL                   /* report all notices/warnings/errors */
+}      PgXmlStrictness;
+
+/* struct PgXmlErrorContext is private to xml.c */
+typedef struct PgXmlErrorContext PgXmlErrorContext;
+
 #define DatumGetXmlP(X)                ((xmltype *) PG_DETOAST_DATUM(X))
 #define XmlPGetDatum(X)                PointerGetDatum(X)
 
@@ -60,16 +85,13 @@ extern Datum database_to_xml(PG_FUNCTION_ARGS);
 extern Datum database_to_xmlschema(PG_FUNCTION_ARGS);
 extern Datum database_to_xml_and_xmlschema(PG_FUNCTION_ARGS);
 
-typedef enum
-{
-       XML_STANDALONE_YES,
-       XML_STANDALONE_NO,
-       XML_STANDALONE_NO_VALUE,
-       XML_STANDALONE_OMITTED
-}      XmlStandaloneType;
+extern void pg_xml_init_library(void);
+extern PgXmlErrorContext *pg_xml_init(PgXmlStrictness strictness);
+extern void pg_xml_done(PgXmlErrorContext *errcxt, bool isError);
+extern bool pg_xml_error_occurred(PgXmlErrorContext *errcxt);
+extern void xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode,
+                                               const char *msg);
 
-extern void pg_xml_init(void);
-extern void xml_ereport(int level, int sqlcode, const char *msg);
 extern xmltype *xmlconcat(List *args);
 extern xmltype *xmlelement(XmlExprState *xmlExpr, ExprContext *econtext);
 extern xmltype *xmlparse(text *data, XmlOptionType xmloption, bool preserve_whitespace);
@@ -83,12 +105,6 @@ extern char *map_sql_identifier_to_xml_name(char *ident, bool fully_escaped, boo
 extern char *map_xml_name_to_sql_identifier(char *name);
 extern char *map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings);
 
-typedef enum
-{
-       XMLBINARY_BASE64,
-       XMLBINARY_HEX
-}      XmlBinaryType;
-
 extern int     xmlbinary;                      /* XmlBinaryType, but int for guc enum */
 
 extern int     xmloption;                      /* XmlOptionType, but int for guc enum */
index eaa5a74ef07be3af472d31af820d6ed90ba92b7f..379777aced811a3eb0b9d6a455795da4a04ed13a 100644 (file)
@@ -8,7 +8,7 @@ INSERT INTO xmltest VALUES (3, '<wrong');
 ERROR:  invalid XML content
 LINE 1: INSERT INTO xmltest VALUES (3, '<wrong');
                                        ^
-DETAIL:  Entity: line 1: parser error : Couldn't find end of Start Tag wrong line 1
+DETAIL:  line 1: Couldn't find end of Start Tag wrong line 1
 <wrong
       ^
 SELECT * FROM xmltest;
@@ -62,7 +62,7 @@ SELECT xmlconcat('bad', '<syntax');
 ERROR:  invalid XML content
 LINE 1: SELECT xmlconcat('bad', '<syntax');
                                 ^
-DETAIL:  Entity: line 1: parser error : Couldn't find end of Start Tag syntax line 1
+DETAIL:  line 1: Couldn't find end of Start Tag syntax line 1
 <syntax
        ^
 SELECT xmlconcat('<foo/>', NULL, '<?xml version="1.1" standalone="no"?><bar/>');
@@ -206,9 +206,54 @@ SELECT xmlparse(content '<abc>x</abc>');
  <abc>x</abc>
 (1 row)
 
+SELECT xmlparse(content '<invalidentity>&</invalidentity>');
+ERROR:  invalid XML content
+DETAIL:  line 1: xmlParseEntityRef: no name
+<invalidentity>&</invalidentity>
+                ^
+line 1: chunk is not well balanced
+<invalidentity>&</invalidentity>
+                                ^
+SELECT xmlparse(content '<undefinedentity>&idontexist;</undefinedentity>');
+ERROR:  invalid XML content
+DETAIL:  line 1: Entity 'idontexist' not defined
+<undefinedentity>&idontexist;</undefinedentity>
+                             ^
+line 1: chunk is not well balanced
+<undefinedentity>&idontexist;</undefinedentity>
+                                               ^
+SELECT xmlparse(content '<invalidns xmlns=''&lt;''/>');
+         xmlparse          
+---------------------------
+ <invalidns xmlns='&lt;'/>
+(1 row)
+
+SELECT xmlparse(content '<relativens xmlns=''relative''/>');
+            xmlparse            
+--------------------------------
+ <relativens xmlns='relative'/>
+(1 row)
+
+SELECT xmlparse(content '<twoerrors>&idontexist;</unbalanced>');
+ERROR:  invalid XML content
+DETAIL:  line 1: Entity 'idontexist' not defined
+<twoerrors>&idontexist;</unbalanced>
+                       ^
+line 1: Opening and ending tag mismatch: twoerrors line 1 and unbalanced
+<twoerrors>&idontexist;</unbalanced>
+                                    ^
+line 1: chunk is not well balanced
+<twoerrors>&idontexist;</unbalanced>
+                                    ^
+SELECT xmlparse(content '<nosuchprefix:tag/>');
+      xmlparse       
+---------------------
+ <nosuchprefix:tag/>
+(1 row)
+
 SELECT xmlparse(document 'abc');
 ERROR:  invalid XML document
-DETAIL:  Entity: line 1: parser error : Start tag expected, '<' not found
+DETAIL:  line 1: Start tag expected, '<' not found
 abc
 ^
 SELECT xmlparse(document '<abc>x</abc>');
@@ -217,6 +262,48 @@ SELECT xmlparse(document '<abc>x</abc>');
  <abc>x</abc>
 (1 row)
 
+SELECT xmlparse(document '<invalidentity>&</abc>');
+ERROR:  invalid XML document
+DETAIL:  line 1: xmlParseEntityRef: no name
+<invalidentity>&</abc>
+                ^
+line 1: Opening and ending tag mismatch: invalidentity line 1 and abc
+<invalidentity>&</abc>
+                      ^
+SELECT xmlparse(document '<undefinedentity>&idontexist;</abc>');
+ERROR:  invalid XML document
+DETAIL:  line 1: Entity 'idontexist' not defined
+<undefinedentity>&idontexist;</abc>
+                             ^
+line 1: Opening and ending tag mismatch: undefinedentity line 1 and abc
+<undefinedentity>&idontexist;</abc>
+                                   ^
+SELECT xmlparse(document '<invalidns xmlns=''&lt;''/>');
+         xmlparse          
+---------------------------
+ <invalidns xmlns='&lt;'/>
+(1 row)
+
+SELECT xmlparse(document '<relativens xmlns=''relative''/>');
+            xmlparse            
+--------------------------------
+ <relativens xmlns='relative'/>
+(1 row)
+
+SELECT xmlparse(document '<twoerrors>&idontexist;</unbalanced>');
+ERROR:  invalid XML document
+DETAIL:  line 1: Entity 'idontexist' not defined
+<twoerrors>&idontexist;</unbalanced>
+                       ^
+line 1: Opening and ending tag mismatch: twoerrors line 1 and unbalanced
+<twoerrors>&idontexist;</unbalanced>
+                                    ^
+SELECT xmlparse(document '<nosuchprefix:tag/>');
+      xmlparse       
+---------------------
+ <nosuchprefix:tag/>
+(1 row)
+
 SELECT xmlpi(name foo);
   xmlpi  
 ---------
@@ -379,7 +466,7 @@ SELECT '<>' IS NOT DOCUMENT;
 ERROR:  invalid XML content
 LINE 1: SELECT '<>' IS NOT DOCUMENT;
                ^
-DETAIL:  Entity: line 1: parser error : StartTag: invalid element name
+DETAIL:  line 1: StartTag: invalid element name
 <>
  ^
 SELECT xmlagg(data) FROM xmltest;
@@ -425,7 +512,7 @@ EXECUTE foo ('bad');
 ERROR:  invalid XML document
 LINE 1: EXECUTE foo ('bad');
                      ^
-DETAIL:  Entity: line 1: parser error : Start tag expected, '<' not found
+DETAIL:  line 1: Start tag expected, '<' not found
 bad
 ^
 SET XML OPTION CONTENT;
@@ -679,6 +766,36 @@ SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</p
  t
 (1 row)
 
+SELECT xml_is_well_formed('<invalidentity>&</abc>');
+ xml_is_well_formed 
+--------------------
+ f
+(1 row)
+
+SELECT xml_is_well_formed('<undefinedentity>&idontexist;</abc>');
+ xml_is_well_formed 
+--------------------
+ f
+(1 row)
+
+SELECT xml_is_well_formed('<invalidns xmlns=''&lt;''/>');
+ xml_is_well_formed 
+--------------------
+ t
+(1 row)
+
+SELECT xml_is_well_formed('<relativens xmlns=''relative''/>');
+ xml_is_well_formed 
+--------------------
+ t
+(1 row)
+
+SELECT xml_is_well_formed('<twoerrors>&idontexist;</unbalanced>');
+ xml_is_well_formed 
+--------------------
+ f
+(1 row)
+
 SET xmloption TO CONTENT;
 SELECT xml_is_well_formed('abc');
  xml_is_well_formed 
@@ -686,3 +803,36 @@ SELECT xml_is_well_formed('abc');
  t
 (1 row)
 
+-- Since xpath() deals with namespaces, it's a bit stricter about
+-- what's well-formed and what's not. If we don't obey these rules
+-- (i.e. ignore namespace-related errors from libxml), xpath()
+-- fails in subtle ways. The following would for example produce
+-- the xml value
+--   <invalidns xmlns='<'/>
+-- which is invalid because '<' may not appear un-escaped in
+-- attribute values.
+-- Since different libxml versions emit slightly different
+-- error messages, we suppress the DETAIL in this test.
+\set VERBOSITY terse
+SELECT xpath('/*', '<invalidns xmlns=''&lt;''/>');
+ERROR:  could not parse XML document
+\set VERBOSITY default
+-- Again, the XML isn't well-formed for namespace purposes
+SELECT xpath('/*', '<nosuchprefix:tag/>');
+ERROR:  could not parse XML document
+DETAIL:  line 1: Namespace prefix nosuchprefix on tag is not defined
+<nosuchprefix:tag/>
+                 ^
+CONTEXT:  SQL function "xpath" statement 1
+-- XPath deprecates relative namespaces, but they're not supposed to
+-- throw an error, only a warning.
+SELECT xpath('/*', '<relativens xmlns=''relative''/>');
+WARNING:  line 1: xmlns: URI relative is not absolute
+<relativens xmlns='relative'/>
+                            ^
+CONTEXT:  SQL function "xpath" statement 1
+                xpath                 
+--------------------------------------
+ {"<relativens xmlns=\"relative\"/>"}
+(1 row)
+
index 711b4358a25f413c032721cb58919dbc2617fd7d..1f17bffc0b2088f2afbb1f9793ea1247a79cbedd 100644 (file)
@@ -172,6 +172,30 @@ SELECT xmlparse(content '<abc>x</abc>');
 ERROR:  unsupported XML feature
 DETAIL:  This functionality requires the server to be built with libxml support.
 HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xmlparse(content '<invalidentity>&</invalidentity>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xmlparse(content '<undefinedentity>&idontexist;</undefinedentity>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xmlparse(content '<invalidns xmlns=''&lt;''/>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xmlparse(content '<relativens xmlns=''relative''/>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xmlparse(content '<twoerrors>&idontexist;</unbalanced>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xmlparse(content '<nosuchprefix:tag/>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
 SELECT xmlparse(document 'abc');
 ERROR:  unsupported XML feature
 DETAIL:  This functionality requires the server to be built with libxml support.
@@ -180,6 +204,30 @@ SELECT xmlparse(document '<abc>x</abc>');
 ERROR:  unsupported XML feature
 DETAIL:  This functionality requires the server to be built with libxml support.
 HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xmlparse(document '<invalidentity>&</abc>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xmlparse(document '<undefinedentity>&idontexist;</abc>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xmlparse(document '<invalidns xmlns=''&lt;''/>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xmlparse(document '<relativens xmlns=''relative''/>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xmlparse(document '<twoerrors>&idontexist;</unbalanced>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xmlparse(document '<nosuchprefix:tag/>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
 SELECT xmlpi(name foo);
 ERROR:  unsupported XML feature
 DETAIL:  This functionality requires the server to be built with libxml support.
@@ -627,8 +675,57 @@ SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</p
 ERROR:  unsupported XML feature
 DETAIL:  This functionality requires the server to be built with libxml support.
 HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml_is_well_formed('<invalidentity>&</abc>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml_is_well_formed('<undefinedentity>&idontexist;</abc>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml_is_well_formed('<invalidns xmlns=''&lt;''/>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml_is_well_formed('<relativens xmlns=''relative''/>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml_is_well_formed('<twoerrors>&idontexist;</unbalanced>');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
 SET xmloption TO CONTENT;
 SELECT xml_is_well_formed('abc');
 ERROR:  unsupported XML feature
 DETAIL:  This functionality requires the server to be built with libxml support.
 HINT:  You need to rebuild PostgreSQL using --with-libxml.
+-- Since xpath() deals with namespaces, it's a bit stricter about
+-- what's well-formed and what's not. If we don't obey these rules
+-- (i.e. ignore namespace-related errors from libxml), xpath()
+-- fails in subtle ways. The following would for example produce
+-- the xml value
+--   <invalidns xmlns='<'/>
+-- which is invalid because '<' may not appear un-escaped in
+-- attribute values.
+-- Since different libxml versions emit slightly different
+-- error messages, we suppress the DETAIL in this test.
+\set VERBOSITY terse
+SELECT xpath('/*', '<invalidns xmlns=''&lt;''/>');
+ERROR:  unsupported XML feature at character 20
+\set VERBOSITY default
+-- Again, the XML isn't well-formed for namespace purposes
+SELECT xpath('/*', '<nosuchprefix:tag/>');
+ERROR:  unsupported XML feature
+LINE 1: SELECT xpath('/*', '<nosuchprefix:tag/>');
+                           ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+-- XPath deprecates relative namespaces, but they're not supposed to
+-- throw an error, only a warning.
+SELECT xpath('/*', '<relativens xmlns=''relative''/>');
+ERROR:  unsupported XML feature
+LINE 1: SELECT xpath('/*', '<relativens xmlns=''relative''/>');
+                           ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
index 717a1e7170e0fc97548a952d90cc395da9a04ed3..f4e423618ecdae34f24a38177ddb337f7ee17183 100644 (file)
@@ -62,9 +62,21 @@ SELECT xmlelement(name foo, xmlattributes('<>&"''' as funny, xml 'b<a/>r' as fun
 
 SELECT xmlparse(content 'abc');
 SELECT xmlparse(content '<abc>x</abc>');
+SELECT xmlparse(content '<invalidentity>&</invalidentity>');
+SELECT xmlparse(content '<undefinedentity>&idontexist;</undefinedentity>');
+SELECT xmlparse(content '<invalidns xmlns=''&lt;''/>');
+SELECT xmlparse(content '<relativens xmlns=''relative''/>');
+SELECT xmlparse(content '<twoerrors>&idontexist;</unbalanced>');
+SELECT xmlparse(content '<nosuchprefix:tag/>');
 
 SELECT xmlparse(document 'abc');
 SELECT xmlparse(document '<abc>x</abc>');
+SELECT xmlparse(document '<invalidentity>&</abc>');
+SELECT xmlparse(document '<undefinedentity>&idontexist;</abc>');
+SELECT xmlparse(document '<invalidns xmlns=''&lt;''/>');
+SELECT xmlparse(document '<relativens xmlns=''relative''/>');
+SELECT xmlparse(document '<twoerrors>&idontexist;</unbalanced>');
+SELECT xmlparse(document '<nosuchprefix:tag/>');
 
 
 SELECT xmlpi(name foo);
@@ -208,6 +220,32 @@ SELECT xml_is_well_formed('<foo><bar>baz</foo>');
 SELECT xml_is_well_formed('<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
 SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</my:foo>');
 SELECT xml_is_well_formed('<pg:foo xmlns:pg="http://postgresql.org/stuff">bar</pg:foo>');
+SELECT xml_is_well_formed('<invalidentity>&</abc>');
+SELECT xml_is_well_formed('<undefinedentity>&idontexist;</abc>');
+SELECT xml_is_well_formed('<invalidns xmlns=''&lt;''/>');
+SELECT xml_is_well_formed('<relativens xmlns=''relative''/>');
+SELECT xml_is_well_formed('<twoerrors>&idontexist;</unbalanced>');
 
 SET xmloption TO CONTENT;
 SELECT xml_is_well_formed('abc');
+
+-- Since xpath() deals with namespaces, it's a bit stricter about
+-- what's well-formed and what's not. If we don't obey these rules
+-- (i.e. ignore namespace-related errors from libxml), xpath()
+-- fails in subtle ways. The following would for example produce
+-- the xml value
+--   <invalidns xmlns='<'/>
+-- which is invalid because '<' may not appear un-escaped in
+-- attribute values.
+-- Since different libxml versions emit slightly different
+-- error messages, we suppress the DETAIL in this test.
+\set VERBOSITY terse
+SELECT xpath('/*', '<invalidns xmlns=''&lt;''/>');
+\set VERBOSITY default
+
+-- Again, the XML isn't well-formed for namespace purposes
+SELECT xpath('/*', '<nosuchprefix:tag/>');
+
+-- XPath deprecates relative namespaces, but they're not supposed to
+-- throw an error, only a warning.
+SELECT xpath('/*', '<relativens xmlns=''relative''/>');