From cacd42d62cb2ddf32135b151f627780a5509780f Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 20 Jul 2011 13:03:12 -0400 Subject: [PATCH] Rewrite libxml error handling to be more robust. libxml reports some errors (like invalid xmlns attributes) via the error handler hook, but still returns a success indicator to the library caller. This causes us to miss some errors that are important to report. Since the "generic" error handler hook doesn't know whether the message it's getting is for an error, warning, or notice, stop using that and instead start using the "structured" error handler hook, which gets enough information to be useful. While at it, arrange to save and restore the error handler hook setting in each libxml-using function, rather than assuming we can set and forget the hook. This should improve the odds of working nicely with third-party libraries that also use libxml. In passing, volatile-ize some local variables that get modified within PG_TRY blocks. I noticed this while testing with an older gcc version than I'd previously tried to compile xml.c with. Florian Pflug and Tom Lane, with extensive review/testing by Noah Misch --- configure | 69 ++++ configure.in | 17 + contrib/xml2/xpath.c | 131 +++++-- contrib/xml2/xslt_proc.c | 56 +-- src/backend/utils/adt/xml.c | 541 +++++++++++++++++++++------- src/include/pg_config.h.in | 3 + src/include/utils/xml.h | 46 ++- src/test/regress/expected/xml.out | 160 +++++++- src/test/regress/expected/xml_1.out | 97 +++++ src/test/regress/sql/xml.sql | 38 ++ 10 files changed, 939 insertions(+), 219 deletions(-) diff --git a/configure b/configure index 9b8cb48931..ba01548186 100755 --- a/configure +++ b/configure @@ -23655,6 +23655,75 @@ fi +# Older versions of libxml2 lack the xmlStructuredErrorContext variable +# (which could be a macro referring to a function, if threading is enabled) +if test "$with_libxml" = yes ; then + { $as_echo "$as_me:$LINENO: checking for xmlStructuredErrorContext" >&5 +$as_echo_n "checking for xmlStructuredErrorContext... " >&6; } +if test "${pgac_cv_libxml_structerrctx+set}" = set; then + $as_echo_n "(cached) " >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include + void *globptr; +int +main () +{ +globptr = xmlStructuredErrorContext + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext conftest$ac_exeext +if { (ac_try="$ac_link" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval ac_try_echo="\"\$as_me:$LINENO: $ac_try_echo\"" +$as_echo "$ac_try_echo") >&5 + (eval "$ac_link") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + $as_echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && { + test -z "$ac_c_werror_flag" || + test ! -s conftest.err + } && test -s conftest$ac_exeext && { + test "$cross_compiling" = yes || + $as_test_x conftest$ac_exeext + }; then + pgac_cv_libxml_structerrctx=yes +else + $as_echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + pgac_cv_libxml_structerrctx=no +fi + +rm -rf conftest.dSYM +rm -f core conftest.err conftest.$ac_objext conftest_ipa8_conftest.oo \ + conftest$ac_exeext conftest.$ac_ext +fi +{ $as_echo "$as_me:$LINENO: result: $pgac_cv_libxml_structerrctx" >&5 +$as_echo "$pgac_cv_libxml_structerrctx" >&6; } + if test x"$pgac_cv_libxml_structerrctx" = x"yes"; then + +cat >>confdefs.h <<\_ACEOF +#define HAVE_XMLSTRUCTUREDERRORCONTEXT 1 +_ACEOF + + fi +fi + + # This test makes sure that run tests work at all. Sometimes a shared # library is found by the linker, but the runtime linker can't find it. # This check should come after all modifications of compiler or linker diff --git a/configure.in b/configure.in index e873c7b157..b1f15f131f 100644 --- a/configure.in +++ b/configure.in @@ -1519,6 +1519,23 @@ AC_SUBST(LDAP_LIBS_FE) AC_SUBST(LDAP_LIBS_BE) +# Older versions of libxml2 lack the xmlStructuredErrorContext variable +# (which could be a macro referring to a function, if threading is enabled) +if test "$with_libxml" = yes ; then + AC_CACHE_CHECK([for xmlStructuredErrorContext], pgac_cv_libxml_structerrctx, + [AC_TRY_LINK([#include + void *globptr;], + [globptr = xmlStructuredErrorContext], + [pgac_cv_libxml_structerrctx=yes], + [pgac_cv_libxml_structerrctx=no])]) + if test x"$pgac_cv_libxml_structerrctx" = x"yes"; then + AC_DEFINE(HAVE_XMLSTRUCTUREDERRORCONTEXT, + 1, + [Define to 1 if your libxml has xmlStructuredErrorContext.]) + fi +fi + + # This test makes sure that run tests work at all. Sometimes a shared # library is found by the linker, but the runtime linker can't find it. # This check should come after all modifications of compiler or linker diff --git a/contrib/xml2/xpath.c b/contrib/xml2/xpath.c index 44c600e134..2ddee59fcb 100644 --- a/contrib/xml2/xpath.c +++ b/contrib/xml2/xpath.c @@ -38,7 +38,7 @@ Datum xpath_table(PG_FUNCTION_ARGS); /* exported for use by xslt_proc.c */ -void pgxml_parser_init(void); +PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness); /* workspace for pgxml_xpath() */ @@ -68,18 +68,27 @@ static void cleanup_workspace(xpath_workspace *workspace); /* * Initialize for xml parsing. + * + * As with the underlying pg_xml_init function, calls to this MUST be followed + * by a PG_TRY block that guarantees that pg_xml_done is called. */ -void -pgxml_parser_init(void) +PgXmlErrorContext * +pgxml_parser_init(PgXmlStrictness strictness) { + PgXmlErrorContext *xmlerrcxt; + /* Set up error handling (we share the core's error handler) */ - pg_xml_init(); + xmlerrcxt = pg_xml_init(strictness); + + /* Note: we're assuming an elog cannot be thrown by the following calls */ /* Initialize libxml */ xmlInitParser(); xmlSubstituteEntitiesDefault(1); xmlLoadExtDtdDefaultValue = 1; + + return xmlerrcxt; } @@ -98,16 +107,33 @@ Datum xml_is_well_formed(PG_FUNCTION_ARGS) { text *t = PG_GETARG_TEXT_P(0); /* document buffer */ + bool result = false; int32 docsize = VARSIZE(t) - VARHDRSZ; xmlDocPtr doctree; + PgXmlErrorContext *xmlerrcxt; + + xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); + + PG_TRY(); + { + doctree = xmlParseMemory((char *) VARDATA(t), docsize); + + result = (doctree != NULL); + + if (doctree != NULL) + xmlFreeDoc(doctree); + } + PG_CATCH(); + { + pg_xml_done(xmlerrcxt, true); - pgxml_parser_init(); + PG_RE_THROW(); + } + PG_END_TRY(); - doctree = xmlParseMemory((char *) VARDATA(t), docsize); - if (doctree == NULL) - PG_RETURN_BOOL(false); /* i.e. not well-formed */ - xmlFreeDoc(doctree); - PG_RETURN_BOOL(true); + pg_xml_done(xmlerrcxt, false); + + PG_RETURN_BOOL(result); } @@ -399,41 +425,52 @@ static xmlXPathObjectPtr pgxml_xpath(text *document, xmlChar *xpath, xpath_workspace *workspace) { int32 docsize = VARSIZE(document) - VARHDRSZ; - xmlXPathObjectPtr res; + PgXmlErrorContext *xmlerrcxt; xmlXPathCompExprPtr comppath; workspace->doctree = NULL; workspace->ctxt = NULL; workspace->res = NULL; - pgxml_parser_init(); + xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); - workspace->doctree = xmlParseMemory((char *) VARDATA(document), docsize); - if (workspace->doctree == NULL) - return NULL; /* not well-formed */ + PG_TRY(); + { + workspace->doctree = xmlParseMemory((char *) VARDATA(document), + docsize); + if (workspace->doctree != NULL) + { + workspace->ctxt = xmlXPathNewContext(workspace->doctree); + workspace->ctxt->node = xmlDocGetRootElement(workspace->doctree); - workspace->ctxt = xmlXPathNewContext(workspace->doctree); - workspace->ctxt->node = xmlDocGetRootElement(workspace->doctree); + /* compile the path */ + comppath = xmlXPathCompile(xpath); + if (comppath == NULL) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, + "XPath Syntax Error"); - /* compile the path */ - comppath = xmlXPathCompile(xpath); - if (comppath == NULL) + /* Now evaluate the path expression. */ + workspace->res = xmlXPathCompiledEval(comppath, workspace->ctxt); + + xmlXPathFreeCompExpr(comppath); + } + } + PG_CATCH(); { cleanup_workspace(workspace); - xml_ereport(ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, - "XPath Syntax Error"); - } - /* Now evaluate the path expression. */ - res = xmlXPathCompiledEval(comppath, workspace->ctxt); - workspace->res = res; + pg_xml_done(xmlerrcxt, true); - xmlXPathFreeCompExpr(comppath); + PG_RE_THROW(); + } + PG_END_TRY(); - if (res == NULL) + if (workspace->res == NULL) cleanup_workspace(workspace); - return res; + pg_xml_done(xmlerrcxt, false); + + return workspace->res; } /* Clean up after processing the result of pgxml_xpath() */ @@ -534,6 +571,8 @@ xpath_table(PG_FUNCTION_ARGS) * document */ bool had_values; /* To determine end of nodeset results */ StringInfoData query_buf; + PgXmlErrorContext *xmlerrcxt; + volatile xmlDocPtr doctree = NULL; /* We only have a valid tuple description in table function mode */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) @@ -659,14 +698,15 @@ xpath_table(PG_FUNCTION_ARGS) * Setup the parser. This should happen after we are done evaluating the * query, in case it calls functions that set up libxml differently. */ - pgxml_parser_init(); + xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); + PG_TRY(); + { /* For each row i.e. document returned from SPI */ for (i = 0; i < proc; i++) { char *pkey; char *xmldoc; - xmlDocPtr doctree; xmlXPathContextPtr ctxt; xmlXPathObjectPtr res; xmlChar *resstr; @@ -718,11 +758,9 @@ xpath_table(PG_FUNCTION_ARGS) /* compile the path */ comppath = xmlXPathCompile(xpaths[j]); if (comppath == NULL) - { - xmlFreeDoc(doctree); - xml_ereport(ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, + xml_ereport(xmlerrcxt, ERROR, + ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, "XPath Syntax Error"); - } /* Now evaluate the path expression. */ res = xmlXPathCompiledEval(comppath, ctxt); @@ -737,8 +775,7 @@ xpath_table(PG_FUNCTION_ARGS) if (res->nodesetval != NULL && rownr < res->nodesetval->nodeNr) { - resstr = - xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]); + resstr = xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]); had_values = true; } else @@ -776,13 +813,31 @@ xpath_table(PG_FUNCTION_ARGS) } while (had_values); } - xmlFreeDoc(doctree); + if (doctree != NULL) + xmlFreeDoc(doctree); + doctree = NULL; if (pkey) pfree(pkey); if (xmldoc) pfree(xmldoc); } + } + PG_CATCH(); + { + if (doctree != NULL) + xmlFreeDoc(doctree); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); + + if (doctree != NULL) + xmlFreeDoc(doctree); + + pg_xml_done(xmlerrcxt, false); tuplestore_donestoring(tupstore); diff --git a/contrib/xml2/xslt_proc.c b/contrib/xml2/xslt_proc.c index f8f7d7263f..a8e481a3ce 100644 --- a/contrib/xml2/xslt_proc.c +++ b/contrib/xml2/xslt_proc.c @@ -38,7 +38,7 @@ Datum xslt_process(PG_FUNCTION_ARGS); #ifdef USE_LIBXSLT /* declarations to come from xpath.c */ -extern void pgxml_parser_init(void); +extern PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness); /* local defs */ static const char **parse_params(text *paramstr); @@ -56,13 +56,14 @@ xslt_process(PG_FUNCTION_ARGS) text *ssheet = PG_GETARG_TEXT_P(1); text *paramstr; const char **params; - xsltStylesheetPtr stylesheet = NULL; - xmlDocPtr doctree; - xmlDocPtr restree; - xmlDocPtr ssdoc = NULL; - xmlChar *resstr; - int resstat; - int reslen; + PgXmlErrorContext *xmlerrcxt; + volatile xsltStylesheetPtr stylesheet = NULL; + volatile xmlDocPtr doctree = NULL; + volatile xmlDocPtr restree = NULL; + volatile xmlDocPtr ssdoc = NULL; + volatile int resstat = -1; + xmlChar *resstr = NULL; + int reslen = 0; if (fcinfo->nargs == 3) { @@ -77,9 +78,11 @@ xslt_process(PG_FUNCTION_ARGS) } /* Setup parser */ - pgxml_parser_init(); + xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY); - /* Check to see if document is a file or a literal */ + PG_TRY(); + { + /* Check to see if document is a file or a literal */ if (VARDATA(doct)[0] == '<') doctree = xmlParseMemory((char *) VARDATA(doct), VARSIZE(doct) - VARHDRSZ); @@ -87,7 +90,7 @@ xslt_process(PG_FUNCTION_ARGS) doctree = xmlParseFile(text_to_cstring(doct)); if (doctree == NULL) - xml_ereport(ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, + xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, "error parsing XML document"); /* Same for stylesheet */ @@ -96,35 +99,44 @@ xslt_process(PG_FUNCTION_ARGS) ssdoc = xmlParseMemory((char *) VARDATA(ssheet), VARSIZE(ssheet) - VARHDRSZ); if (ssdoc == NULL) - { - xmlFreeDoc(doctree); - xml_ereport(ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, + xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, "error parsing stylesheet as XML document"); - } stylesheet = xsltParseStylesheetDoc(ssdoc); } else stylesheet = xsltParseStylesheetFile((xmlChar *) text_to_cstring(ssheet)); - if (stylesheet == NULL) - { - xmlFreeDoc(doctree); - xsltCleanupGlobals(); - xml_ereport(ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, + xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION, "failed to parse stylesheet"); - } restree = xsltApplyStylesheet(stylesheet, doctree, params); resstat = xsltSaveResultToString(&resstr, &reslen, restree, stylesheet); + } + PG_CATCH(); + { + if (stylesheet != NULL) + xsltFreeStylesheet(stylesheet); + if (restree != NULL) + xmlFreeDoc(restree); + if (doctree != NULL) + xmlFreeDoc(doctree); + xsltCleanupGlobals(); + + pg_xml_done(xmlerrcxt, true); + + PG_RE_THROW(); + } + PG_END_TRY(); xsltFreeStylesheet(stylesheet); xmlFreeDoc(restree); xmlFreeDoc(doctree); - xsltCleanupGlobals(); + pg_xml_done(xmlerrcxt, false); + if (resstat < 0) PG_RETURN_NULL(); diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 945bc2901f..6786cd91bb 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -85,11 +85,27 @@ int xmloption; #ifdef USE_LIBXML -static StringInfo xml_err_buf = NULL; - -static void xml_errorHandler(void *ctxt, const char *msg,...); +/* random number to identify PgXmlErrorContext */ +#define ERRCXT_MAGIC 68275028 + +struct PgXmlErrorContext +{ + int magic; + /* strictness argument passed to pg_xml_init */ + PgXmlStrictness strictness; + /* current error status and accumulated message, if any */ + bool err_occurred; + StringInfoData err_buf; + /* previous libxml error handling state (saved by pg_xml_init) */ + xmlStructuredErrorFunc saved_errfunc; + void *saved_errcxt; +}; + +static void xml_errorHandler(void *data, xmlErrorPtr error); static void xml_ereport_by_code(int level, int sqlcode, const char *msg, int errcode); +static void chopStringInfoNewlines(StringInfo str); +static void appendStringInfoLineSeparator(StringInfo str); #ifdef USE_LIBXMLCONTEXT @@ -552,8 +568,9 @@ xmlelement(XmlExprState *xmlExpr, ExprContext *econtext) int i; ListCell *arg; ListCell *narg; - xmlBufferPtr buf = NULL; - xmlTextWriterPtr writer = NULL; + PgXmlErrorContext *xmlerrcxt; + volatile xmlBufferPtr buf = NULL; + volatile xmlTextWriterPtr writer = NULL; /* * We first evaluate all the arguments, then start up libxml and create @@ -598,17 +615,17 @@ xmlelement(XmlExprState *xmlExpr, ExprContext *econtext) } /* now safe to run libxml */ - pg_xml_init(); + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); PG_TRY(); { buf = xmlBufferCreate(); - if (!buf) - xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, + if (buf == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate xmlBuffer"); writer = xmlNewTextWriterMemory(buf, 0); - if (!writer) - xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, + if (writer == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate xmlTextWriter"); xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name); @@ -645,12 +662,17 @@ xmlelement(XmlExprState *xmlExpr, ExprContext *econtext) xmlFreeTextWriter(writer); if (buf) xmlBufferFree(buf); + + pg_xml_done(xmlerrcxt, true); + PG_RE_THROW(); } PG_END_TRY(); xmlBufferFree(buf); + pg_xml_done(xmlerrcxt, false); + return result; #else NO_XML_SUPPORT(); @@ -800,7 +822,7 @@ xml_is_document(xmltype *arg) { #ifdef USE_LIBXML bool result; - xmlDocPtr doc = NULL; + volatile xmlDocPtr doc = NULL; MemoryContext ccxt = CurrentMemoryContext; /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */ @@ -844,30 +866,24 @@ xml_is_document(xmltype *arg) #ifdef USE_LIBXML /* - * pg_xml_init --- set up for use of libxml + * pg_xml_init_library --- set up for use of libxml * * This should be called by each function that is about to use libxml - * facilities. It has two responsibilities: verify compatibility with the - * loaded libxml version (done on first call in a session) and establish - * or re-establish our libxml error handler. The latter needs to be done - * anytime we might have passed control to add-on modules (eg libperl) which - * might have set their own error handler for libxml. - * - * This is exported for use by contrib/xml2, as well as other code that might - * wish to share use of this module's libxml error handler. + * facilities but doesn't require error handling. It initializes libxml + * and verifies compatibility with the loaded libxml version. These are + * once-per-session activities. * * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and * check) */ void -pg_xml_init(void) +pg_xml_init_library(void) { static bool first_time = true; if (first_time) { /* Stuff we need do only once per session */ - MemoryContext oldcontext; /* * Currently, we have no pure UTF-8 support for internals -- check if @@ -879,16 +895,8 @@ pg_xml_init(void) errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.", (int) sizeof(char), (int) sizeof(xmlChar)))); - /* create error buffer in permanent context */ - oldcontext = MemoryContextSwitchTo(TopMemoryContext); - xml_err_buf = makeStringInfo(); - MemoryContextSwitchTo(oldcontext); - - /* Now that xml_err_buf exists, safe to call xml_errorHandler */ - xmlSetGenericErrorFunc(NULL, xml_errorHandler); - #ifdef USE_LIBXMLCONTEXT - /* Set up memory allocation our way, too */ + /* Set up libxml's memory allocation our way */ xml_memory_init(); #endif @@ -897,21 +905,119 @@ pg_xml_init(void) first_time = false; } - else - { - /* Reset pre-existing buffer to empty */ - Assert(xml_err_buf != NULL); - resetStringInfo(xml_err_buf); +} - /* - * We re-establish the error callback function every time. This makes - * it safe for other subsystems (PL/Perl, say) to also use libxml with - * their own callbacks ... so long as they likewise set up the - * callbacks on every use. It's cheap enough to not be worth worrying - * about, anyway. - */ - xmlSetGenericErrorFunc(NULL, xml_errorHandler); - } +/* + * pg_xml_init --- set up for use of libxml and register an error handler + * + * This should be called by each function that is about to use libxml + * facilities and requires error handling. It initializes libxml with + * pg_xml_init_library() and establishes our libxml error handler. + * + * strictness determines which errors are reported and which are ignored. + * + * Calls to this function MUST be followed by a PG_TRY block that guarantees + * that pg_xml_done() is called during either normal or error exit. + * + * This is exported for use by contrib/xml2, as well as other code that might + * wish to share use of this module's libxml error handler. + */ +PgXmlErrorContext * +pg_xml_init(PgXmlStrictness strictness) +{ + PgXmlErrorContext *errcxt; + + /* Do one-time setup if needed */ + pg_xml_init_library(); + + /* Create error handling context structure */ + errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext)); + errcxt->magic = ERRCXT_MAGIC; + errcxt->strictness = strictness; + errcxt->err_occurred = false; + initStringInfo(&errcxt->err_buf); + + /* + * Save original error handler and install ours. libxml originally didn't + * distinguish between the contexts for generic and for structured error + * handlers. If we're using an old libxml version, we must thus save + * the generic error context, even though we're using a structured + * error handler. + */ + errcxt->saved_errfunc = xmlStructuredError; + +#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT + errcxt->saved_errcxt = xmlStructuredErrorContext; +#else + errcxt->saved_errcxt = xmlGenericErrorContext; +#endif + + xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler); + + return errcxt; +} + + +/* + * pg_xml_done --- restore previous libxml error handling + * + * Resets libxml's global error-handling state to what it was before + * pg_xml_init() was called. + * + * This routine verifies that all pending errors have been dealt with + * (in assert-enabled builds, anyway). + */ +void +pg_xml_done(PgXmlErrorContext *errcxt, bool isError) +{ + void *cur_errcxt; + + /* An assert seems like enough protection here */ + Assert(errcxt->magic == ERRCXT_MAGIC); + + /* + * In a normal exit, there should be no un-handled libxml errors. But we + * shouldn't try to enforce this during error recovery, since the longjmp + * could have been thrown before xml_ereport had a chance to run. + */ + Assert(!errcxt->err_occurred || isError); + + /* + * Check that libxml's global state is correct, warn if not. This is + * a real test and not an Assert because it has a higher probability + * of happening. + */ +#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT + cur_errcxt = xmlStructuredErrorContext; +#else + cur_errcxt = xmlGenericErrorContext; +#endif + + if (cur_errcxt != (void *) errcxt) + elog(WARNING, "libxml error handling state is out of sync with xml.c"); + + /* Restore the saved handler */ + xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc); + + /* + * Mark the struct as invalid, just in case somebody somehow manages to + * call xml_errorHandler or xml_ereport with it. + */ + errcxt->magic = 0; + + /* Release memory */ + pfree(errcxt->err_buf.data); + pfree(errcxt); +} + + +/* + * pg_xml_error_occurred() --- test the error flag + */ +bool +pg_xml_error_occurred(PgXmlErrorContext *errcxt) +{ + return errcxt->err_occurred; } @@ -970,7 +1076,13 @@ parse_xml_decl(const xmlChar *str, size_t *lenp, int utf8char; int utf8len; - pg_xml_init(); + /* + * Only initialize libxml. We don't need error handling here, but we do + * need to make sure libxml is initialized before calling any of its + * functions. Note that this is safe (and a no-op) if caller has already + * done pg_xml_init(). + */ + pg_xml_init_library(); /* Initialize output arguments to "not present" */ if (version) @@ -1124,8 +1236,6 @@ static bool print_xml_decl(StringInfo buf, const xmlChar *version, pg_enc encoding, int standalone) { - pg_xml_init(); /* why is this here? */ - if ((version && strcmp((char *) version, PG_XML_DEFAULT_VERSION) != 0) || (encoding && encoding != PG_UTF8) || standalone != -1) @@ -1176,8 +1286,9 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, int32 len; xmlChar *string; xmlChar *utf8string; - xmlParserCtxtPtr ctxt; - xmlDocPtr doc; + PgXmlErrorContext *xmlerrcxt; + volatile xmlParserCtxtPtr ctxt = NULL; + volatile xmlDocPtr doc = NULL; len = VARSIZE(data) - VARHDRSZ; /* will be useful later */ string = xml_text2xmlChar(data); @@ -1187,18 +1298,19 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, encoding, PG_UTF8); - /* Start up libxml and its parser (no-ops if already done) */ - pg_xml_init(); - xmlInitParser(); + /* Start up libxml and its parser */ + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED); - ctxt = xmlNewParserCtxt(); - if (ctxt == NULL) - xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, - "could not allocate parser context"); - - /* Use a TRY block to ensure the ctxt is released */ + /* Use a TRY block to ensure we clean up correctly */ PG_TRY(); { + xmlInitParser(); + + ctxt = xmlNewParserCtxt(); + if (ctxt == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, + "could not allocate parser context"); + if (xmloption_arg == XMLOPTION_DOCUMENT) { /* @@ -1213,8 +1325,8 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, "UTF-8", XML_PARSE_NOENT | XML_PARSE_DTDATTR | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS)); - if (doc == NULL) - xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT, + if (doc == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, "invalid XML document"); } else @@ -1238,23 +1350,28 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, utf8string + count, NULL); - if (res_code != 0) - { - xmlFreeDoc(doc); - xml_ereport(ERROR, ERRCODE_INVALID_XML_CONTENT, + if (res_code != 0 || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT, "invalid XML content"); - } } } PG_CATCH(); { - xmlFreeParserCtxt(ctxt); + if (doc != NULL) + xmlFreeDoc(doc); + if (ctxt != NULL) + xmlFreeParserCtxt(ctxt); + + pg_xml_done(xmlerrcxt, true); + PG_RE_THROW(); } PG_END_TRY(); xmlFreeParserCtxt(ctxt); + pg_xml_done(xmlerrcxt, false); + return doc; } @@ -1335,70 +1452,180 @@ xml_pstrdup(const char *string) * handler. Note that pg_xml_init() *must* have been called previously. */ void -xml_ereport(int level, int sqlcode, const char *msg) +xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg) { char *detail; - /* - * It might seem that we should just pass xml_err_buf->data directly to - * errdetail. However, we want to clean out xml_err_buf before throwing - * error, in case there is another function using libxml further down the - * call stack. - */ - if (xml_err_buf->len > 0) - { - detail = pstrdup(xml_err_buf->data); - resetStringInfo(xml_err_buf); - } + /* Defend against someone passing us a bogus context struct */ + if (errcxt->magic != ERRCXT_MAGIC) + elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext"); + + /* Flag that the current libxml error has been reported */ + errcxt->err_occurred = false; + + /* Include detail only if we have some text from libxml */ + if (errcxt->err_buf.len > 0) + detail = errcxt->err_buf.data; else detail = NULL; - if (detail) + ereport(level, + (errcode(sqlcode), + errmsg_internal("%s", msg), + detail ? errdetail_internal("%s", detail) : 0)); +} + + +/* + * Error handler for libxml errors and warnings + */ +static void +xml_errorHandler(void *data, xmlErrorPtr error) +{ + PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data; + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt; + xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL; + xmlNodePtr node = error->node; + const xmlChar *name = (node != NULL && + node->type == XML_ELEMENT_NODE) ? node->name : NULL; + int domain = error->domain; + int level = error->level; + StringInfo errorBuf; + + /* Defend against someone passing us a bogus context struct */ + if (xmlerrcxt->magic != ERRCXT_MAGIC) + elog(ERROR, "xml_errorHandler called with invalid PgXmlErrorContext"); + + /*---------- + * Older libxml versions report some errors differently. + * First, some errors were previously reported as coming from the parser + * domain but are now reported as coming from the namespace domain. + * Second, some warnings were upgraded to errors. + * We attempt to compensate for that here. + *---------- + */ + switch (error->code) { - size_t len; + case XML_WAR_NS_URI: + level = XML_ERR_ERROR; + domain = XML_FROM_NAMESPACE; + break; + + case XML_ERR_NS_DECL_ERROR: + case XML_WAR_NS_URI_RELATIVE: + case XML_WAR_NS_COLUMN: + case XML_NS_ERR_XML_NAMESPACE: + case XML_NS_ERR_UNDEFINED_NAMESPACE: + case XML_NS_ERR_QNAME: + case XML_NS_ERR_ATTRIBUTE_REDEFINED: + case XML_NS_ERR_EMPTY: + domain = XML_FROM_NAMESPACE; + break; + } - /* libxml error messages end in '\n'; get rid of it */ - len = strlen(detail); - if (len > 0 && detail[len - 1] == '\n') - detail[len - 1] = '\0'; + /* Decide whether to act on the error or not */ + switch (domain) + { + case XML_FROM_PARSER: + case XML_FROM_NONE: + case XML_FROM_MEMORY: + case XML_FROM_IO: + /* Accept error regardless of the parsing purpose */ + break; - ereport(level, - (errcode(sqlcode), - errmsg_internal("%s", msg), - errdetail_internal("%s", detail))); + default: + /* Ignore error if only doing well-formedness check */ + if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED) + return; + break; } - else + + /* Prepare error message in errorBuf */ + errorBuf = makeStringInfo(); + + if (error->line > 0) + appendStringInfo(errorBuf, "line %d: ", error->line); + if (name != NULL) + appendStringInfo(errorBuf, "element %s: ", name); + appendStringInfoString(errorBuf, error->message); + + /* + * Append context information to errorBuf. + * + * xmlParserPrintFileContext() uses libxml's "generic" error handler to + * write the context. Since we don't want to duplicate libxml + * functionality here, we set up a generic error handler temporarily. + * + * We use appendStringInfo() directly as libxml's generic error handler. + * This should work because it has essentially the same signature as + * libxml expects, namely (void *ptr, const char *msg, ...). + */ + if (input != NULL) { - ereport(level, - (errcode(sqlcode), - errmsg_internal("%s", msg))); + xmlGenericErrorFunc errFuncSaved = xmlGenericError; + void *errCtxSaved = xmlGenericErrorContext; + + xmlSetGenericErrorFunc((void *) errorBuf, + (xmlGenericErrorFunc) appendStringInfo); + + /* Add context information to errorBuf */ + appendStringInfoLineSeparator(errorBuf); + + xmlParserPrintFileContext(input); + + /* Restore generic error func */ + xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved); } -} + /* Get rid of any trailing newlines in errorBuf */ + chopStringInfoNewlines(errorBuf); -/* - * Error handler for libxml error messages - */ -static void -xml_errorHandler(void *ctxt, const char *msg,...) -{ - /* Append the formatted text to xml_err_buf */ - for (;;) + /* + * Legacy error handling mode. err_occurred is never set, we just add the + * message to err_buf. This mode exists because the xml2 contrib module + * uses our error-handling infrastructure, but we don't want to change its + * behaviour since it's deprecated anyway. This is also why we don't + * distinguish between notices, warnings and errors here --- the old-style + * generic error handler wouldn't have done that either. + */ + if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY) { - va_list args; - bool success; + appendStringInfoLineSeparator(&xmlerrcxt->err_buf); + appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data); - /* Try to format the data. */ - va_start(args, msg); - success = appendStringInfoVA(xml_err_buf, msg, args); - va_end(args); + pfree(errorBuf->data); + pfree(errorBuf); + return; + } - if (success) - break; + /* + * We don't want to ereport() here because that'd probably leave libxml in + * an inconsistent state. Instead, we remember the error and ereport() + * from xml_ereport(). + * + * Warnings and notices can be reported immediately since they won't cause + * a longjmp() out of libxml. + */ + if (level >= XML_ERR_ERROR) + { + appendStringInfoLineSeparator(&xmlerrcxt->err_buf); + appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data); - /* Double the buffer size and try again. */ - enlargeStringInfo(xml_err_buf, xml_err_buf->maxlen); + xmlerrcxt->err_occurred = true; + } + else if (level >= XML_ERR_WARNING) + { + ereport(WARNING, + (errmsg_internal("%s", errorBuf->data))); + } + else + { + ereport(NOTICE, + (errmsg_internal("%s", errorBuf->data))); } + + pfree(errorBuf->data); + pfree(errorBuf); } @@ -1447,6 +1674,29 @@ xml_ereport_by_code(int level, int sqlcode, } +/* + * Remove all trailing newlines from a StringInfo string + */ +static void +chopStringInfoNewlines(StringInfo str) +{ + while (str->len > 0 && str->data[str->len - 1] == '\n') + str->data[--str->len] = '\0'; +} + + +/* + * Append a newline after removing any existing trailing newlines + */ +static void +appendStringInfoLineSeparator(StringInfo str) +{ + chopStringInfoNewlines(str); + if (str->len > 0) + appendStringInfoChar(str, '\n'); +} + + /* * Convert one char in the current server encoding to a Unicode codepoint. */ @@ -1753,21 +2003,22 @@ map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings) case BYTEAOID: { bytea *bstr = DatumGetByteaPP(value); - xmlBufferPtr buf = NULL; - xmlTextWriterPtr writer = NULL; + PgXmlErrorContext *xmlerrcxt; + volatile xmlBufferPtr buf = NULL; + volatile xmlTextWriterPtr writer = NULL; char *result; - pg_xml_init(); + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); PG_TRY(); { buf = xmlBufferCreate(); - if (!buf) - xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, + if (buf == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate xmlBuffer"); writer = xmlNewTextWriterMemory(buf, 0); - if (!writer) - xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, + if (writer == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate xmlTextWriter"); if (xmlbinary == XMLBINARY_BASE64) @@ -1789,12 +2040,17 @@ map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings) xmlFreeTextWriter(writer); if (buf) xmlBufferFree(buf); + + pg_xml_done(xmlerrcxt, true); + PG_RE_THROW(); } PG_END_TRY(); xmlBufferFree(buf); + pg_xml_done(xmlerrcxt, false); + return result; } #endif /* USE_LIBXML */ @@ -3312,11 +3568,12 @@ static void xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, int *res_nitems, ArrayBuildState **astate) { - xmlParserCtxtPtr ctxt = NULL; - xmlDocPtr doc = NULL; - xmlXPathContextPtr xpathctx = NULL; - xmlXPathCompExprPtr xpathcomp = NULL; - xmlXPathObjectPtr xpathobj = NULL; + PgXmlErrorContext *xmlerrcxt; + volatile xmlParserCtxtPtr ctxt = NULL; + volatile xmlDocPtr doc = NULL; + volatile xmlXPathContextPtr xpathctx = NULL; + volatile xmlXPathCompExprPtr xpathcomp = NULL; + volatile xmlXPathObjectPtr xpathobj = NULL; char *datastr; int32 len; int32 xpath_len; @@ -3382,30 +3639,31 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len); xpath_expr[xpath_len] = '\0'; - pg_xml_init(); - xmlInitParser(); + xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL); PG_TRY(); { + xmlInitParser(); + /* * redundant XML parsing (two parsings for the same value during one * command execution are possible) */ ctxt = xmlNewParserCtxt(); - if (ctxt == NULL) - xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, + if (ctxt == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate parser context"); doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0); - if (doc == NULL) - xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT, + if (doc == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT, "could not parse XML document"); xpathctx = xmlXPathNewContext(doc); - if (xpathctx == NULL) - xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY, + if (xpathctx == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY, "could not allocate XPath context"); xpathctx->node = xmlDocGetRootElement(doc); - if (xpathctx->node == NULL) - xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR, + if (xpathctx->node == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, "could not find root XML element"); /* register namespaces, if any */ @@ -3433,8 +3691,8 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, } xpathcomp = xmlXPathCompile(xpath_expr); - if (xpathcomp == NULL) /* TODO: show proper XPath error details */ - xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR, + if (xpathcomp == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, "invalid XPath expression"); /* @@ -3445,8 +3703,8 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, * the same. */ xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx); - if (xpathobj == NULL) /* TODO: reason? */ - xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR, + if (xpathobj == NULL || xmlerrcxt->err_occurred) + xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR, "could not create XPath object"); /* return empty array in cases when nothing is found */ @@ -3482,6 +3740,9 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, xmlFreeDoc(doc); if (ctxt) xmlFreeParserCtxt(ctxt); + + pg_xml_done(xmlerrcxt, true); + PG_RE_THROW(); } PG_END_TRY(); @@ -3491,6 +3752,8 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces, xmlXPathFreeContext(xpathctx); xmlFreeDoc(doc); xmlFreeParserCtxt(ctxt); + + pg_xml_done(xmlerrcxt, false); } #endif /* USE_LIBXML */ @@ -3579,7 +3842,7 @@ static bool wellformed_xml(text *data, XmlOptionType xmloption_arg) { bool result; - xmlDocPtr doc = NULL; + volatile xmlDocPtr doc = NULL; /* We want to catch any exceptions and return false */ PG_TRY(); diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 19f38cc0fd..24b951e7bd 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -671,6 +671,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_WINLDAP_H +/* Define to 1 if your libxml has xmlStructuredErrorContext. */ +#undef HAVE_XMLSTRUCTUREDERRORCONTEXT + /* Define to the appropriate snprintf format for 64-bit ints. */ #undef INT64_FORMAT diff --git a/src/include/utils/xml.h b/src/include/utils/xml.h index 6359cd6043..f9f6f56859 100644 --- a/src/include/utils/xml.h +++ b/src/include/utils/xml.h @@ -21,6 +21,31 @@ typedef struct varlena xmltype; +typedef enum +{ + XML_STANDALONE_YES, + XML_STANDALONE_NO, + XML_STANDALONE_NO_VALUE, + XML_STANDALONE_OMITTED +} XmlStandaloneType; + +typedef enum +{ + XMLBINARY_BASE64, + XMLBINARY_HEX +} XmlBinaryType; + +typedef enum +{ + PG_XML_STRICTNESS_LEGACY, /* ignore errors unless function result + * indicates error condition */ + PG_XML_STRICTNESS_WELLFORMED, /* ignore non-parser messages */ + PG_XML_STRICTNESS_ALL /* report all notices/warnings/errors */ +} PgXmlStrictness; + +/* struct PgXmlErrorContext is private to xml.c */ +typedef struct PgXmlErrorContext PgXmlErrorContext; + #define DatumGetXmlP(X) ((xmltype *) PG_DETOAST_DATUM(X)) #define XmlPGetDatum(X) PointerGetDatum(X) @@ -60,16 +85,13 @@ extern Datum database_to_xml(PG_FUNCTION_ARGS); extern Datum database_to_xmlschema(PG_FUNCTION_ARGS); extern Datum database_to_xml_and_xmlschema(PG_FUNCTION_ARGS); -typedef enum -{ - XML_STANDALONE_YES, - XML_STANDALONE_NO, - XML_STANDALONE_NO_VALUE, - XML_STANDALONE_OMITTED -} XmlStandaloneType; +extern void pg_xml_init_library(void); +extern PgXmlErrorContext *pg_xml_init(PgXmlStrictness strictness); +extern void pg_xml_done(PgXmlErrorContext *errcxt, bool isError); +extern bool pg_xml_error_occurred(PgXmlErrorContext *errcxt); +extern void xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, + const char *msg); -extern void pg_xml_init(void); -extern void xml_ereport(int level, int sqlcode, const char *msg); extern xmltype *xmlconcat(List *args); extern xmltype *xmlelement(XmlExprState *xmlExpr, ExprContext *econtext); extern xmltype *xmlparse(text *data, XmlOptionType xmloption, bool preserve_whitespace); @@ -83,12 +105,6 @@ extern char *map_sql_identifier_to_xml_name(char *ident, bool fully_escaped, boo extern char *map_xml_name_to_sql_identifier(char *name); extern char *map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings); -typedef enum -{ - XMLBINARY_BASE64, - XMLBINARY_HEX -} XmlBinaryType; - extern int xmlbinary; /* XmlBinaryType, but int for guc enum */ extern int xmloption; /* XmlOptionType, but int for guc enum */ diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out index eaa5a74ef0..379777aced 100644 --- a/src/test/regress/expected/xml.out +++ b/src/test/regress/expected/xml.out @@ -8,7 +8,7 @@ INSERT INTO xmltest VALUES (3, '', NULL, ''); @@ -206,9 +206,54 @@ SELECT xmlparse(content 'x'); x (1 row) +SELECT xmlparse(content '&'); +ERROR: invalid XML content +DETAIL: line 1: xmlParseEntityRef: no name +& + ^ +line 1: chunk is not well balanced +& + ^ +SELECT xmlparse(content '&idontexist;'); +ERROR: invalid XML content +DETAIL: line 1: Entity 'idontexist' not defined +&idontexist; + ^ +line 1: chunk is not well balanced +&idontexist; + ^ +SELECT xmlparse(content ''); + xmlparse +--------------------------- + +(1 row) + +SELECT xmlparse(content ''); + xmlparse +-------------------------------- + +(1 row) + +SELECT xmlparse(content '&idontexist;'); +ERROR: invalid XML content +DETAIL: line 1: Entity 'idontexist' not defined +&idontexist; + ^ +line 1: Opening and ending tag mismatch: twoerrors line 1 and unbalanced +&idontexist; + ^ +line 1: chunk is not well balanced +&idontexist; + ^ +SELECT xmlparse(content ''); + xmlparse +--------------------- + +(1 row) + SELECT xmlparse(document 'abc'); ERROR: invalid XML document -DETAIL: Entity: line 1: parser error : Start tag expected, '<' not found +DETAIL: line 1: Start tag expected, '<' not found abc ^ SELECT xmlparse(document 'x'); @@ -217,6 +262,48 @@ SELECT xmlparse(document 'x'); x (1 row) +SELECT xmlparse(document '&'); +ERROR: invalid XML document +DETAIL: line 1: xmlParseEntityRef: no name +& + ^ +line 1: Opening and ending tag mismatch: invalidentity line 1 and abc +& + ^ +SELECT xmlparse(document '&idontexist;'); +ERROR: invalid XML document +DETAIL: line 1: Entity 'idontexist' not defined +&idontexist; + ^ +line 1: Opening and ending tag mismatch: undefinedentity line 1 and abc +&idontexist; + ^ +SELECT xmlparse(document ''); + xmlparse +--------------------------- + +(1 row) + +SELECT xmlparse(document ''); + xmlparse +-------------------------------- + +(1 row) + +SELECT xmlparse(document '&idontexist;'); +ERROR: invalid XML document +DETAIL: line 1: Entity 'idontexist' not defined +&idontexist; + ^ +line 1: Opening and ending tag mismatch: twoerrors line 1 and unbalanced +&idontexist; + ^ +SELECT xmlparse(document ''); + xmlparse +--------------------- + +(1 row) + SELECT xmlpi(name foo); xmlpi --------- @@ -379,7 +466,7 @@ SELECT '<>' IS NOT DOCUMENT; ERROR: invalid XML content LINE 1: SELECT '<>' IS NOT DOCUMENT; ^ -DETAIL: Entity: line 1: parser error : StartTag: invalid element name +DETAIL: line 1: StartTag: invalid element name <> ^ SELECT xmlagg(data) FROM xmltest; @@ -425,7 +512,7 @@ EXECUTE foo ('bad'); ERROR: invalid XML document LINE 1: EXECUTE foo ('bad'); ^ -DETAIL: Entity: line 1: parser error : Start tag expected, '<' not found +DETAIL: line 1: Start tag expected, '<' not found bad ^ SET XML OPTION CONTENT; @@ -679,6 +766,36 @@ SELECT xml_is_well_formed('bar

&'); + xml_is_well_formed +-------------------- + f +(1 row) + +SELECT xml_is_well_formed('&idontexist;'); + xml_is_well_formed +-------------------- + f +(1 row) + +SELECT xml_is_well_formed(''); + xml_is_well_formed +-------------------- + t +(1 row) + +SELECT xml_is_well_formed(''); + xml_is_well_formed +-------------------- + t +(1 row) + +SELECT xml_is_well_formed('&idontexist;'); + xml_is_well_formed +-------------------- + f +(1 row) + SET xmloption TO CONTENT; SELECT xml_is_well_formed('abc'); xml_is_well_formed @@ -686,3 +803,36 @@ SELECT xml_is_well_formed('abc'); t (1 row) +-- Since xpath() deals with namespaces, it's a bit stricter about +-- what's well-formed and what's not. If we don't obey these rules +-- (i.e. ignore namespace-related errors from libxml), xpath() +-- fails in subtle ways. The following would for example produce +-- the xml value +-- +-- which is invalid because '<' may not appear un-escaped in +-- attribute values. +-- Since different libxml versions emit slightly different +-- error messages, we suppress the DETAIL in this test. +\set VERBOSITY terse +SELECT xpath('/*', ''); +ERROR: could not parse XML document +\set VERBOSITY default +-- Again, the XML isn't well-formed for namespace purposes +SELECT xpath('/*', ''); +ERROR: could not parse XML document +DETAIL: line 1: Namespace prefix nosuchprefix on tag is not defined + + ^ +CONTEXT: SQL function "xpath" statement 1 +-- XPath deprecates relative namespaces, but they're not supposed to +-- throw an error, only a warning. +SELECT xpath('/*', ''); +WARNING: line 1: xmlns: URI relative is not absolute + + ^ +CONTEXT: SQL function "xpath" statement 1 + xpath +-------------------------------------- + {""} +(1 row) + diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out index 711b4358a2..1f17bffc0b 100644 --- a/src/test/regress/expected/xml_1.out +++ b/src/test/regress/expected/xml_1.out @@ -172,6 +172,30 @@ SELECT xmlparse(content 'x'); ERROR: unsupported XML feature DETAIL: This functionality requires the server to be built with libxml support. HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(content '&'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(content '&idontexist;'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(content ''); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(content ''); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(content '&idontexist;'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(content ''); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. SELECT xmlparse(document 'abc'); ERROR: unsupported XML feature DETAIL: This functionality requires the server to be built with libxml support. @@ -180,6 +204,30 @@ SELECT xmlparse(document 'x'); ERROR: unsupported XML feature DETAIL: This functionality requires the server to be built with libxml support. HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(document '&'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(document '&idontexist;'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(document ''); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(document ''); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(document '&idontexist;'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xmlparse(document ''); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. SELECT xmlpi(name foo); ERROR: unsupported XML feature DETAIL: This functionality requires the server to be built with libxml support. @@ -627,8 +675,57 @@ SELECT xml_is_well_formed('bar

&'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed('&idontexist;'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed(''); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed(''); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +SELECT xml_is_well_formed('&idontexist;'); +ERROR: unsupported XML feature +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. SET xmloption TO CONTENT; SELECT xml_is_well_formed('abc'); ERROR: unsupported XML feature DETAIL: This functionality requires the server to be built with libxml support. HINT: You need to rebuild PostgreSQL using --with-libxml. +-- Since xpath() deals with namespaces, it's a bit stricter about +-- what's well-formed and what's not. If we don't obey these rules +-- (i.e. ignore namespace-related errors from libxml), xpath() +-- fails in subtle ways. The following would for example produce +-- the xml value +-- +-- which is invalid because '<' may not appear un-escaped in +-- attribute values. +-- Since different libxml versions emit slightly different +-- error messages, we suppress the DETAIL in this test. +\set VERBOSITY terse +SELECT xpath('/*', ''); +ERROR: unsupported XML feature at character 20 +\set VERBOSITY default +-- Again, the XML isn't well-formed for namespace purposes +SELECT xpath('/*', ''); +ERROR: unsupported XML feature +LINE 1: SELECT xpath('/*', ''); + ^ +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. +-- XPath deprecates relative namespaces, but they're not supposed to +-- throw an error, only a warning. +SELECT xpath('/*', ''); +ERROR: unsupported XML feature +LINE 1: SELECT xpath('/*', ''); + ^ +DETAIL: This functionality requires the server to be built with libxml support. +HINT: You need to rebuild PostgreSQL using --with-libxml. diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql index 717a1e7170..f4e423618e 100644 --- a/src/test/regress/sql/xml.sql +++ b/src/test/regress/sql/xml.sql @@ -62,9 +62,21 @@ SELECT xmlelement(name foo, xmlattributes('<>&"''' as funny, xml 'br' as fun SELECT xmlparse(content 'abc'); SELECT xmlparse(content 'x'); +SELECT xmlparse(content '&'); +SELECT xmlparse(content '&idontexist;'); +SELECT xmlparse(content ''); +SELECT xmlparse(content ''); +SELECT xmlparse(content '&idontexist;'); +SELECT xmlparse(content ''); SELECT xmlparse(document 'abc'); SELECT xmlparse(document 'x'); +SELECT xmlparse(document '&'); +SELECT xmlparse(document '&idontexist;'); +SELECT xmlparse(document ''); +SELECT xmlparse(document ''); +SELECT xmlparse(document '&idontexist;'); +SELECT xmlparse(document ''); SELECT xmlpi(name foo); @@ -208,6 +220,32 @@ SELECT xml_is_well_formed('baz'); SELECT xml_is_well_formed('number one'); SELECT xml_is_well_formed('bar'); SELECT xml_is_well_formed('bar'); +SELECT xml_is_well_formed('&'); +SELECT xml_is_well_formed('&idontexist;'); +SELECT xml_is_well_formed(''); +SELECT xml_is_well_formed(''); +SELECT xml_is_well_formed('&idontexist;'); SET xmloption TO CONTENT; SELECT xml_is_well_formed('abc'); + +-- Since xpath() deals with namespaces, it's a bit stricter about +-- what's well-formed and what's not. If we don't obey these rules +-- (i.e. ignore namespace-related errors from libxml), xpath() +-- fails in subtle ways. The following would for example produce +-- the xml value +-- +-- which is invalid because '<' may not appear un-escaped in +-- attribute values. +-- Since different libxml versions emit slightly different +-- error messages, we suppress the DETAIL in this test. +\set VERBOSITY terse +SELECT xpath('/*', ''); +\set VERBOSITY default + +-- Again, the XML isn't well-formed for namespace purposes +SELECT xpath('/*', ''); + +-- XPath deprecates relative namespaces, but they're not supposed to +-- throw an error, only a warning. +SELECT xpath('/*', ''); -- 2.40.0