* XML data type support.
*
*
- * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
- * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.75 2008/07/03 00:04:24 tgl Exp $
+ * src/backend/utils/adt/xml.c
*
*-------------------------------------------------------------------------
*/
/*
* Notes on memory management:
*
- * Via callbacks, libxml is told to use palloc and friends for memory
- * management, within a context that we reset at transaction end (and also at
- * subtransaction abort) to prevent memory leaks. Resetting at transaction or
- * subtransaction abort is necessary since we might have thrown a longjmp
- * while some data structures were not linked from anywhere persistent.
- * Resetting at transaction commit might not be necessary, but seems a good
- * idea to forestall long-term leaks.
- *
* Sometimes libxml allocates global structures in the hope that it can reuse
- * them later on. Therefore, before resetting LibxmlContext, we must tell
- * libxml to discard any global data it has. The libxml API documentation is
- * not very good about specifying this, but for now we assume that
- * xmlCleanupParser() will get rid of anything we need to worry about.
- *
- * We use palloc --- which will throw a longjmp on error --- for allocation
- * callbacks that officially should act like malloc, ie, return NULL on
- * out-of-memory. This is a bit risky since there is a chance of leaving
- * persistent libxml data structures in an inconsistent partially-constructed
- * state, perhaps leading to crash in xmlCleanupParser(). However, as of
- * early 2008 it is *known* that libxml can crash on out-of-memory due to
- * inadequate checks for NULL returns, so this behavior seems the lesser
- * of two evils.
+ * them later on. This makes it impractical to change the xmlMemSetup
+ * functions on-the-fly; that is likely to lead to trying to pfree() chunks
+ * allocated with malloc() or vice versa. Since libxml might be used by
+ * loadable modules, eg libperl, our only safe choices are to change the
+ * functions at postmaster/backend launch or not at all. Since we'd rather
+ * not activate libxml in sessions that might never use it, the latter choice
+ * is the preferred one. However, for debugging purposes it can be awfully
+ * handy to constrain libxml's allocations to be done in a specific palloc
+ * context, where they're easy to track. Therefore there is code here that
+ * can be enabled in debug builds to redirect libxml's allocations into a
+ * special context LibxmlContext. It's not recommended to turn this on in
+ * a production build because of the possibility of bad interactions with
+ * external modules.
*/
+/* #define USE_LIBXMLCONTEXT */
#include "postgres.h"
#ifdef USE_LIBXML
#include <libxml/chvalid.h>
#include <libxml/parser.h>
+#include <libxml/parserInternals.h>
#include <libxml/tree.h>
#include <libxml/uri.h>
#include <libxml/xmlerror.h>
+#include <libxml/xmlversion.h>
#include <libxml/xmlwriter.h>
#include <libxml/xpath.h>
#include <libxml/xpathInternals.h>
+
+/*
+ * We used to check for xmlStructuredErrorContext via a configure test; but
+ * that doesn't work on Windows, so instead use this grottier method of
+ * testing the library version number.
+ */
+#if LIBXML_VERSION >= 20704
+#define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
+#endif
#endif /* USE_LIBXML */
+#include "access/htup_details.h"
#include "catalog/namespace.h"
#include "catalog/pg_type.h"
#include "commands/dbcommands.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "nodes/execnodes.h"
-#include "parser/parse_expr.h"
+#include "nodes/nodeFuncs.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/date.h"
#include "utils/datetime.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
#include "utils/xml.h"
/* GUC variables */
-int xmlbinary;
-int xmloption;
+int xmlbinary;
+int xmloption;
#ifdef USE_LIBXML
-static StringInfo xml_err_buf = NULL;
+/* random number to identify PgXmlErrorContext */
+#define ERRCXT_MAGIC 68275028
+
+struct PgXmlErrorContext
+{
+ int magic;
+ /* strictness argument passed to pg_xml_init */
+ PgXmlStrictness strictness;
+ /* current error status and accumulated message, if any */
+ bool err_occurred;
+ StringInfoData err_buf;
+ /* previous libxml error handling state (saved by pg_xml_init) */
+ xmlStructuredErrorFunc saved_errfunc;
+ void *saved_errcxt;
+ /* previous libxml entity handler (saved by pg_xml_init) */
+ xmlExternalEntityLoader saved_entityfunc;
+};
+
+static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
+ xmlParserCtxtPtr ctxt);
+static void xml_errorHandler(void *data, xmlErrorPtr error);
+static void xml_ereport_by_code(int level, int sqlcode,
+ const char *msg, int errcode);
+static void chopStringInfoNewlines(StringInfo str);
+static void appendStringInfoLineSeparator(StringInfo str);
+
+#ifdef USE_LIBXMLCONTEXT
+
static MemoryContext LibxmlContext = NULL;
-static void xml_init(void);
static void xml_memory_init(void);
-static void xml_memory_cleanup(void);
static void *xml_palloc(size_t size);
static void *xml_repalloc(void *ptr, size_t size);
static void xml_pfree(void *ptr);
static char *xml_pstrdup(const char *string);
-static void xml_ereport(int level, int sqlcode, const char *msg);
-static void xml_errorHandler(void *ctxt, const char *msg,...);
-static void xml_ereport_by_code(int level, int sqlcode,
- const char *msg, int errcode);
+#endif /* USE_LIBXMLCONTEXT */
+
static xmlChar *xml_text2xmlChar(text *in);
-static int parse_xml_decl(const xmlChar * str, size_t *lenp,
- xmlChar ** version, xmlChar ** encoding, int *standalone);
-static bool print_xml_decl(StringInfo buf, const xmlChar * version,
+static int parse_xml_decl(const xmlChar *str, size_t *lenp,
+ xmlChar **version, xmlChar **encoding, int *standalone);
+static bool print_xml_decl(StringInfo buf, const xmlChar *version,
pg_enc encoding, int standalone);
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
- bool preserve_whitespace, xmlChar * encoding);
+ bool preserve_whitespace, int encoding);
static text *xml_xmlnodetoxmltype(xmlNodePtr cur);
+static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
+ ArrayBuildState **astate);
#endif /* USE_LIBXML */
static StringInfo query_to_xml_internal(const char *query, char *tablename,
errhint("You need to rebuild PostgreSQL using --with-libxml.")))
-/* from SQL/XML:2003 section 4.7 */
+/* from SQL/XML:2008 section 4.9 */
#define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
#define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
#define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
#ifdef USE_LIBXML
static int
-xmlChar_to_encoding(xmlChar * encoding_name)
+xmlChar_to_encoding(const xmlChar *encoding_name)
{
- int encoding = pg_char_to_encoding((char *) encoding_name);
+ int encoding = pg_char_to_encoding((const char *) encoding_name);
if (encoding < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid encoding name \"%s\"",
- (char *) encoding_name)));
+ (const char *) encoding_name)));
return encoding;
}
#endif
* Parse the data to check if it is well-formed XML data. Assume that
* ERROR occurred if parsing failed.
*/
- doc = xml_parse(vardata, xmloption, true, NULL);
+ doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding());
xmlFreeDoc(doc);
PG_RETURN_XML_P(vardata);
static char *
xml_out_internal(xmltype *x, pg_enc target_encoding)
{
- char *str;
- size_t len;
+ char *str = text_to_cstring((text *) x);
#ifdef USE_LIBXML
+ size_t len = strlen(str);
xmlChar *version;
- xmlChar *encoding;
int standalone;
int res_code;
-#endif
-
- str = text_to_cstring((text *) x);
- len = strlen(str);
-#ifdef USE_LIBXML
if ((res_code = parse_xml_decl((xmlChar *) str,
- &len, &version, &encoding, &standalone)) == 0)
+ &len, &version, NULL, &standalone)) == 0)
{
StringInfoData buf;
}
appendStringInfoString(&buf, str + len);
+ pfree(str);
+
return buf.data;
}
char *newstr;
int nbytes;
xmlDocPtr doc;
- xmlChar *encoding = NULL;
+ xmlChar *encodingStr = NULL;
+ int encoding;
/*
* Read the data in raw format. We don't know yet what the encoding is, as
str = VARDATA(result);
str[nbytes] = '\0';
- parse_xml_decl((xmlChar *) str, NULL, NULL, &encoding, NULL);
+ parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
+
+ /*
+ * If encoding wasn't explicitly specified in the XML header, treat it as
+ * UTF-8, as that's the default in XML. This is different from xml_in(),
+ * where the input has to go through the normal client to server encoding
+ * conversion.
+ */
+ encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
/*
* Parse the data to check if it is well-formed XML data. Assume that
/* Now that we know what we're dealing with, convert to server encoding */
newstr = (char *) pg_do_encoding_conversion((unsigned char *) str,
nbytes,
- encoding ?
- xmlChar_to_encoding(encoding) :
- PG_UTF8,
+ encoding,
GetDatabaseEncoding());
if (newstr != str)
static xmltype *
xmlBuffer_to_xmltype(xmlBufferPtr buf)
{
- return (xmltype *) cstring_to_text_with_len((char *) xmlBufferContent(buf),
+ return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
xmlBufferLength(buf));
}
#endif
errmsg("invalid XML comment")));
initStringInfo(&buf);
- appendStringInfo(&buf, "<!--");
+ appendStringInfoString(&buf, "<!--");
appendStringInfoText(&buf, arg);
- appendStringInfo(&buf, "-->");
+ appendStringInfoString(&buf, "-->");
PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
#else
if (!version)
global_version_no_value = true;
else if (!global_version)
- global_version = xmlStrdup(version);
+ global_version = version;
else if (xmlStrcmp(version, global_version) != 0)
global_version_no_value = true;
int i;
ListCell *arg;
ListCell *narg;
- xmlBufferPtr buf;
- xmlTextWriterPtr writer;
+ PgXmlErrorContext *xmlerrcxt;
+ volatile xmlBufferPtr buf = NULL;
+ volatile xmlTextWriterPtr writer = NULL;
/*
* We first evaluate all the arguments, then start up libxml and create
if (isnull)
str = NULL;
else
- str = OutputFunctionCall(&xmlExpr->named_outfuncs[i], value);
+ str = map_sql_value_to_xml_value(value, exprType((Node *) e->expr), false);
named_arg_strings = lappend(named_arg_strings, str);
i++;
}
if (!isnull)
{
str = map_sql_value_to_xml_value(value,
- exprType((Node *) e->expr));
+ exprType((Node *) e->expr), true);
arg_strings = lappend(arg_strings, str);
}
}
/* now safe to run libxml */
- xml_init();
+ xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
- buf = xmlBufferCreate();
- writer = xmlNewTextWriterMemory(buf, 0);
+ PG_TRY();
+ {
+ buf = xmlBufferCreate();
+ if (buf == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xmlBuffer");
+ writer = xmlNewTextWriterMemory(buf, 0);
+ if (writer == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xmlTextWriter");
- xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
+ xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
- forboth(arg, named_arg_strings, narg, xexpr->arg_names)
- {
- char *str = (char *) lfirst(arg);
- char *argname = strVal(lfirst(narg));
+ forboth(arg, named_arg_strings, narg, xexpr->arg_names)
+ {
+ char *str = (char *) lfirst(arg);
+ char *argname = strVal(lfirst(narg));
+
+ if (str)
+ xmlTextWriterWriteAttribute(writer,
+ (xmlChar *) argname,
+ (xmlChar *) str);
+ }
- if (str)
+ foreach(arg, arg_strings)
{
- xmlTextWriterWriteAttribute(writer,
- (xmlChar *) argname,
- (xmlChar *) str);
- pfree(str);
+ char *str = (char *) lfirst(arg);
+
+ xmlTextWriterWriteRaw(writer, (xmlChar *) str);
}
- }
- foreach(arg, arg_strings)
- {
- char *str = (char *) lfirst(arg);
+ xmlTextWriterEndElement(writer);
+
+ /* we MUST do this now to flush data out to the buffer ... */
+ xmlFreeTextWriter(writer);
+ writer = NULL;
- xmlTextWriterWriteRaw(writer, (xmlChar *) str);
+ result = xmlBuffer_to_xmltype(buf);
}
+ PG_CATCH();
+ {
+ if (writer)
+ xmlFreeTextWriter(writer);
+ if (buf)
+ xmlBufferFree(buf);
- xmlTextWriterEndElement(writer);
- xmlFreeTextWriter(writer);
+ pg_xml_done(xmlerrcxt, true);
+
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
- result = xmlBuffer_to_xmltype(buf);
xmlBufferFree(buf);
+ pg_xml_done(xmlerrcxt, false);
+
return result;
#else
NO_XML_SUPPORT();
#ifdef USE_LIBXML
xmlDocPtr doc;
- doc = xml_parse(data, xmloption_arg, preserve_whitespace, NULL);
+ doc = xml_parse(data, xmloption_arg, preserve_whitespace,
+ GetDatabaseEncoding());
xmlFreeDoc(doc);
return (xmltype *) data;
{
#ifdef USE_LIBXML
bool result;
- xmlDocPtr doc = NULL;
+ volatile xmlDocPtr doc = NULL;
MemoryContext ccxt = CurrentMemoryContext;
+ /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
PG_TRY();
{
- doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true, NULL);
+ doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
+ GetDatabaseEncoding());
result = true;
}
PG_CATCH();
}
-/*
- * xml cleanup function for transaction end. This is also called on
- * subtransaction abort; see notes at top of file for rationale.
- */
-void
-AtEOXact_xml(void)
-{
-#ifdef USE_LIBXML
- xml_memory_cleanup();
-#endif
-}
-
-
#ifdef USE_LIBXML
/*
- * Set up for use of libxml --- this should be called by each function that
- * is about to use libxml facilities.
+ * pg_xml_init_library --- set up for use of libxml
+ *
+ * This should be called by each function that is about to use libxml
+ * facilities but doesn't require error handling. It initializes libxml
+ * and verifies compatibility with the loaded libxml version. These are
+ * once-per-session activities.
*
* TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
* check)
*/
-static void
-xml_init(void)
+void
+pg_xml_init_library(void)
{
static bool first_time = true;
if (first_time)
{
/* Stuff we need do only once per session */
- MemoryContext oldcontext;
/*
* Currently, we have no pure UTF-8 support for internals -- check if
errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
(int) sizeof(char), (int) sizeof(xmlChar))));
- /* create error buffer in permanent context */
- oldcontext = MemoryContextSwitchTo(TopMemoryContext);
- xml_err_buf = makeStringInfo();
- MemoryContextSwitchTo(oldcontext);
-
- /* Now that xml_err_buf exists, safe to call xml_errorHandler */
- xmlSetGenericErrorFunc(NULL, xml_errorHandler);
-
- /* Set up memory allocation our way, too */
+#ifdef USE_LIBXMLCONTEXT
+ /* Set up libxml's memory allocation our way */
xml_memory_init();
+#endif
/* Check library compatibility */
LIBXML_TEST_VERSION;
first_time = false;
}
- else
- {
- /* Reset pre-existing buffer to empty */
- Assert(xml_err_buf != NULL);
- resetStringInfo(xml_err_buf);
+}
- /*
- * We re-establish the callback functions every time. This makes it
- * safe for other subsystems (PL/Perl, say) to also use libxml with
- * their own callbacks ... so long as they likewise set up the
- * callbacks on every use. It's cheap enough to not be worth worrying
- * about, anyway.
- */
- xmlSetGenericErrorFunc(NULL, xml_errorHandler);
- xml_memory_init();
- }
+/*
+ * pg_xml_init --- set up for use of libxml and register an error handler
+ *
+ * This should be called by each function that is about to use libxml
+ * facilities and requires error handling. It initializes libxml with
+ * pg_xml_init_library() and establishes our libxml error handler.
+ *
+ * strictness determines which errors are reported and which are ignored.
+ *
+ * Calls to this function MUST be followed by a PG_TRY block that guarantees
+ * that pg_xml_done() is called during either normal or error exit.
+ *
+ * This is exported for use by contrib/xml2, as well as other code that might
+ * wish to share use of this module's libxml error handler.
+ */
+PgXmlErrorContext *
+pg_xml_init(PgXmlStrictness strictness)
+{
+ PgXmlErrorContext *errcxt;
+ void *new_errcxt;
+
+ /* Do one-time setup if needed */
+ pg_xml_init_library();
+
+ /* Create error handling context structure */
+ errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
+ errcxt->magic = ERRCXT_MAGIC;
+ errcxt->strictness = strictness;
+ errcxt->err_occurred = false;
+ initStringInfo(&errcxt->err_buf);
+
+ /*
+ * Save original error handler and install ours. libxml originally didn't
+ * distinguish between the contexts for generic and for structured error
+ * handlers. If we're using an old libxml version, we must thus save the
+ * generic error context, even though we're using a structured error
+ * handler.
+ */
+ errcxt->saved_errfunc = xmlStructuredError;
+
+#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
+ errcxt->saved_errcxt = xmlStructuredErrorContext;
+#else
+ errcxt->saved_errcxt = xmlGenericErrorContext;
+#endif
+
+ xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler);
+
+ /*
+ * Verify that xmlSetStructuredErrorFunc set the context variable we
+ * expected it to. If not, the error context pointer we just saved is not
+ * the correct thing to restore, and since that leaves us without a way to
+ * restore the context in pg_xml_done, we must fail.
+ *
+ * The only known situation in which this test fails is if we compile with
+ * headers from a libxml2 that doesn't track the structured error context
+ * separately (< 2.7.4), but at runtime use a version that does, or vice
+ * versa. The libxml2 authors did not treat that change as constituting
+ * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
+ * fails to protect us from this.
+ */
+
+#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
+ new_errcxt = xmlStructuredErrorContext;
+#else
+ new_errcxt = xmlGenericErrorContext;
+#endif
+
+ if (new_errcxt != (void *) errcxt)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not set up XML error handler"),
+ errhint("This probably indicates that the version of libxml2"
+ " being used is not compatible with the libxml2"
+ " header files that PostgreSQL was built with.")));
+
+ /*
+ * Also, install an entity loader to prevent unwanted fetches of external
+ * files and URLs.
+ */
+ errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
+ xmlSetExternalEntityLoader(xmlPgEntityLoader);
+
+ return errcxt;
+}
+
+
+/*
+ * pg_xml_done --- restore previous libxml error handling
+ *
+ * Resets libxml's global error-handling state to what it was before
+ * pg_xml_init() was called.
+ *
+ * This routine verifies that all pending errors have been dealt with
+ * (in assert-enabled builds, anyway).
+ */
+void
+pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
+{
+ void *cur_errcxt;
+
+ /* An assert seems like enough protection here */
+ Assert(errcxt->magic == ERRCXT_MAGIC);
+
+ /*
+ * In a normal exit, there should be no un-handled libxml errors. But we
+ * shouldn't try to enforce this during error recovery, since the longjmp
+ * could have been thrown before xml_ereport had a chance to run.
+ */
+ Assert(!errcxt->err_occurred || isError);
+
+ /*
+ * Check that libxml's global state is correct, warn if not. This is a
+ * real test and not an Assert because it has a higher probability of
+ * happening.
+ */
+#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
+ cur_errcxt = xmlStructuredErrorContext;
+#else
+ cur_errcxt = xmlGenericErrorContext;
+#endif
+
+ if (cur_errcxt != (void *) errcxt)
+ elog(WARNING, "libxml error handling state is out of sync with xml.c");
+
+ /* Restore the saved handlers */
+ xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
+ xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
+
+ /*
+ * Mark the struct as invalid, just in case somebody somehow manages to
+ * call xml_errorHandler or xml_ereport with it.
+ */
+ errcxt->magic = 0;
+
+ /* Release memory */
+ pfree(errcxt->err_buf.data);
+ pfree(errcxt);
+}
+
+
+/*
+ * pg_xml_error_occurred() --- test the error flag
+ */
+bool
+pg_xml_error_occurred(PgXmlErrorContext *errcxt)
+{
+ return errcxt->err_occurred;
}
|| xmlIsCombiningQ(c) \
|| xmlIsExtender_ch(c))
+/* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
+static xmlChar *
+xml_pnstrdup(const xmlChar *str, size_t len)
+{
+ xmlChar *result;
+
+ result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
+ memcpy(result, str, len * sizeof(xmlChar));
+ result[len] = 0;
+ return result;
+}
+
+/*
+ * str is the null-terminated input string. Remaining arguments are
+ * output arguments; each can be NULL if value is not wanted.
+ * version and encoding are returned as locally-palloc'd strings.
+ * Result is 0 if OK, an error code if not.
+ */
static int
-parse_xml_decl(const xmlChar * str, size_t *lenp,
- xmlChar ** version, xmlChar ** encoding, int *standalone)
+parse_xml_decl(const xmlChar *str, size_t *lenp,
+ xmlChar **version, xmlChar **encoding, int *standalone)
{
const xmlChar *p;
const xmlChar *save_p;
int utf8char;
int utf8len;
- xml_init();
+ /*
+ * Only initialize libxml. We don't need error handling here, but we do
+ * need to make sure libxml is initialized before calling any of its
+ * functions. Note that this is safe (and a no-op) if caller has already
+ * done pg_xml_init().
+ */
+ pg_xml_init_library();
+ /* Initialize output arguments to "not present" */
if (version)
*version = NULL;
if (encoding)
return XML_ERR_VERSION_MISSING;
if (version)
- *version = xmlStrndup(p + 1, q - p - 1);
+ *version = xml_pnstrdup(p + 1, q - p - 1);
p = q + 1;
}
else
return XML_ERR_MISSING_ENCODING;
if (encoding)
- *encoding = xmlStrndup(p + 1, q - p - 1);
+ *encoding = xml_pnstrdup(p + 1, q - p - 1);
p = q + 1;
}
else
if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
{
- *standalone = 1;
+ if (standalone)
+ *standalone = 1;
p += 5;
}
else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
{
- *standalone = 0;
+ if (standalone)
+ *standalone = 0;
p += 4;
}
else
* which is the default version specified in SQL:2003.
*/
static bool
-print_xml_decl(StringInfo buf, const xmlChar * version,
+print_xml_decl(StringInfo buf, const xmlChar *version,
pg_enc encoding, int standalone)
{
- xml_init();
-
- if ((version && strcmp((char *) version, PG_XML_DEFAULT_VERSION) != 0)
+ if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
|| (encoding && encoding != PG_UTF8)
|| standalone != -1)
{
/*
* Convert a C string to XML internal representation
*
- * TODO maybe, libxml2's xmlreader is better? (do not construct DOM,
+ * Note: it is caller's responsibility to xmlFreeDoc() the result,
+ * else a permanent memory leak will ensue!
+ *
+ * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
* yet do not use SAX - see xmlreader.c)
*/
static xmlDocPtr
xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
- xmlChar * encoding)
+ int encoding)
{
int32 len;
xmlChar *string;
xmlChar *utf8string;
- xmlParserCtxtPtr ctxt;
- xmlDocPtr doc;
+ PgXmlErrorContext *xmlerrcxt;
+ volatile xmlParserCtxtPtr ctxt = NULL;
+ volatile xmlDocPtr doc = NULL;
len = VARSIZE(data) - VARHDRSZ; /* will be useful later */
string = xml_text2xmlChar(data);
utf8string = pg_do_encoding_conversion(string,
len,
- encoding ?
- xmlChar_to_encoding(encoding) :
- GetDatabaseEncoding(),
+ encoding,
PG_UTF8);
- xml_init();
- xmlInitParser();
- ctxt = xmlNewParserCtxt();
- if (ctxt == NULL)
- xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
- "could not allocate parser context");
+ /* Start up libxml and its parser */
+ xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
- if (xmloption_arg == XMLOPTION_DOCUMENT)
- {
- /*
- * Note, that here we try to apply DTD defaults
- * (XML_PARSE_DTDATTR) according to SQL/XML:10.16.7.d: 'Default
- * values defined by internal DTD are applied'. As for external
- * DTDs, we try to support them too, (see SQL/XML:10.16.7.e)
- */
- doc = xmlCtxtReadDoc(ctxt, utf8string,
- NULL,
- "UTF-8",
- XML_PARSE_NOENT | XML_PARSE_DTDATTR
- | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
- if (doc == NULL)
- xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
- "invalid XML document");
- }
- else
+ /* Use a TRY block to ensure we clean up correctly */
+ PG_TRY();
{
- int res_code;
- size_t count;
- xmlChar *version = NULL;
- int standalone = -1;
+ xmlInitParser();
- doc = xmlNewDoc(NULL);
+ ctxt = xmlNewParserCtxt();
+ if (ctxt == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate parser context");
- res_code = parse_xml_decl(utf8string,
- &count, &version, NULL, &standalone);
- if (res_code != 0)
- xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
- "invalid XML content: invalid XML declaration",
- res_code);
+ if (xmloption_arg == XMLOPTION_DOCUMENT)
+ {
+ /*
+ * Note, that here we try to apply DTD defaults
+ * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
+ * 'Default values defined by internal DTD are applied'. As for
+ * external DTDs, we try to support them too, (see SQL/XML:2008 GR
+ * 10.16.7.e)
+ */
+ doc = xmlCtxtReadDoc(ctxt, utf8string,
+ NULL,
+ "UTF-8",
+ XML_PARSE_NOENT | XML_PARSE_DTDATTR
+ | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
+ if (doc == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
+ "invalid XML document");
+ }
+ else
+ {
+ int res_code;
+ size_t count;
+ xmlChar *version;
+ int standalone;
+
+ res_code = parse_xml_decl(utf8string,
+ &count, &version, NULL, &standalone);
+ if (res_code != 0)
+ xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
+ "invalid XML content: invalid XML declaration",
+ res_code);
+
+ doc = xmlNewDoc(version);
+ Assert(doc->encoding == NULL);
+ doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
+ doc->standalone = standalone;
+
+ res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
+ utf8string + count, NULL);
+ if (res_code != 0 || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
+ "invalid XML content");
+ }
+ }
+ PG_CATCH();
+ {
+ if (doc != NULL)
+ xmlFreeDoc(doc);
+ if (ctxt != NULL)
+ xmlFreeParserCtxt(ctxt);
- res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
- utf8string + count, NULL);
- if (res_code != 0)
- xml_ereport(ERROR, ERRCODE_INVALID_XML_CONTENT,
- "invalid XML content");
+ pg_xml_done(xmlerrcxt, true);
- doc->version = xmlStrdup(version);
- doc->encoding = xmlStrdup((xmlChar *) "UTF-8");
- doc->standalone = standalone;
+ PG_RE_THROW();
}
+ PG_END_TRY();
xmlFreeParserCtxt(ctxt);
+ pg_xml_done(xmlerrcxt, false);
+
return doc;
}
}
+#ifdef USE_LIBXMLCONTEXT
+
/*
- * Manage the special context used for all libxml allocations
+ * Manage the special context used for all libxml allocations (but only
+ * in special debug builds; see notes at top of file)
*/
static void
xml_memory_init(void)
{
- /*
- * Create memory context if not there already. We make it a child of
- * TopMemoryContext, even though our current policy is that it doesn't
- * survive past transaction end, because we want to be really really
- * sure it doesn't go away before we've called xmlCleanupParser().
- */
+ /* Create memory context if not there already */
if (LibxmlContext == NULL)
LibxmlContext = AllocSetContextCreate(TopMemoryContext,
"LibxmlContext",
xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
}
-static void
-xml_memory_cleanup(void)
-{
- if (LibxmlContext != NULL)
- {
- /* Give libxml a chance to clean up dangling pointers */
- xmlCleanupParser();
-
- /* And flush the context */
- MemoryContextDelete(LibxmlContext);
- LibxmlContext = NULL;
- }
-}
-
/*
* Wrappers for memory management functions
*/
static void
xml_pfree(void *ptr)
{
- pfree(ptr);
+ /* At least some parts of libxml assume xmlFree(NULL) is allowed */
+ if (ptr)
+ pfree(ptr);
}
{
return MemoryContextStrdup(LibxmlContext, string);
}
+#endif /* USE_LIBXMLCONTEXT */
/*
- * Wrapper for "ereport" function for XML-related errors. The "msg"
- * is the SQL-level message; some can be adopted from the SQL/XML
- * standard. This function adds libxml's native error messages, if
- * any, as detail.
+ * xmlPgEntityLoader --- entity loader callback function
+ *
+ * Silently prevent any external entity URL from being loaded. We don't want
+ * to throw an error, so instead make the entity appear to expand to an empty
+ * string.
+ *
+ * We would prefer to allow loading entities that exist in the system's
+ * global XML catalog; but the available libxml2 APIs make that a complex
+ * and fragile task. For now, just shut down all external access.
*/
-static void
-xml_ereport(int level, int sqlcode, const char *msg)
+static xmlParserInputPtr
+xmlPgEntityLoader(const char *URL, const char *ID,
+ xmlParserCtxtPtr ctxt)
+{
+ return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
+}
+
+
+/*
+ * xml_ereport --- report an XML-related error
+ *
+ * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
+ * standard. This function adds libxml's native error message, if any, as
+ * detail.
+ *
+ * This is exported for modules that want to share the core libxml error
+ * handler. Note that pg_xml_init() *must* have been called previously.
+ */
+void
+xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
{
char *detail;
- if (xml_err_buf->len > 0)
- {
- detail = pstrdup(xml_err_buf->data);
- resetStringInfo(xml_err_buf);
- }
+ /* Defend against someone passing us a bogus context struct */
+ if (errcxt->magic != ERRCXT_MAGIC)
+ elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
+
+ /* Flag that the current libxml error has been reported */
+ errcxt->err_occurred = false;
+
+ /* Include detail only if we have some text from libxml */
+ if (errcxt->err_buf.len > 0)
+ detail = errcxt->err_buf.data;
else
detail = NULL;
- /* libxml error messages end in '\n'; get rid of it */
- if (detail)
+ ereport(level,
+ (errcode(sqlcode),
+ errmsg_internal("%s", msg),
+ detail ? errdetail_internal("%s", detail) : 0));
+}
+
+
+/*
+ * Error handler for libxml errors and warnings
+ */
+static void
+xml_errorHandler(void *data, xmlErrorPtr error)
+{
+ PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
+ xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
+ xmlNodePtr node = error->node;
+ const xmlChar *name = (node != NULL &&
+ node->type == XML_ELEMENT_NODE) ? node->name : NULL;
+ int domain = error->domain;
+ int level = error->level;
+ StringInfo errorBuf;
+
+ /*
+ * Defend against someone passing us a bogus context struct.
+ *
+ * We force a backend exit if this check fails because longjmp'ing out of
+ * libxml would likely render it unsafe to use further.
+ */
+ if (xmlerrcxt->magic != ERRCXT_MAGIC)
+ elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
+
+ /*----------
+ * Older libxml versions report some errors differently.
+ * First, some errors were previously reported as coming from the parser
+ * domain but are now reported as coming from the namespace domain.
+ * Second, some warnings were upgraded to errors.
+ * We attempt to compensate for that here.
+ *----------
+ */
+ switch (error->code)
{
- size_t len;
+ case XML_WAR_NS_URI:
+ level = XML_ERR_ERROR;
+ domain = XML_FROM_NAMESPACE;
+ break;
+
+ case XML_ERR_NS_DECL_ERROR:
+ case XML_WAR_NS_URI_RELATIVE:
+ case XML_WAR_NS_COLUMN:
+ case XML_NS_ERR_XML_NAMESPACE:
+ case XML_NS_ERR_UNDEFINED_NAMESPACE:
+ case XML_NS_ERR_QNAME:
+ case XML_NS_ERR_ATTRIBUTE_REDEFINED:
+ case XML_NS_ERR_EMPTY:
+ domain = XML_FROM_NAMESPACE;
+ break;
+ }
+
+ /* Decide whether to act on the error or not */
+ switch (domain)
+ {
+ case XML_FROM_PARSER:
+ case XML_FROM_NONE:
+ case XML_FROM_MEMORY:
+ case XML_FROM_IO:
- len = strlen(detail);
- if (len > 0 && detail[len - 1] == '\n')
- detail[len - 1] = '\0';
+ /*
+ * Suppress warnings about undeclared entities. We need to do
+ * this to avoid problems due to not loading DTD definitions.
+ */
+ if (error->code == XML_WAR_UNDECLARED_ENTITY)
+ return;
- ereport(level,
- (errcode(sqlcode),
- errmsg("%s", msg),
- errdetail("%s", detail)));
+ /* Otherwise, accept error regardless of the parsing purpose */
+ break;
+
+ default:
+ /* Ignore error if only doing well-formedness check */
+ if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
+ return;
+ break;
}
- else
+
+ /* Prepare error message in errorBuf */
+ errorBuf = makeStringInfo();
+
+ if (error->line > 0)
+ appendStringInfo(errorBuf, "line %d: ", error->line);
+ if (name != NULL)
+ appendStringInfo(errorBuf, "element %s: ", name);
+ appendStringInfoString(errorBuf, error->message);
+
+ /*
+ * Append context information to errorBuf.
+ *
+ * xmlParserPrintFileContext() uses libxml's "generic" error handler to
+ * write the context. Since we don't want to duplicate libxml
+ * functionality here, we set up a generic error handler temporarily.
+ *
+ * We use appendStringInfo() directly as libxml's generic error handler.
+ * This should work because it has essentially the same signature as
+ * libxml expects, namely (void *ptr, const char *msg, ...).
+ */
+ if (input != NULL)
{
- ereport(level,
- (errcode(sqlcode),
- errmsg("%s", msg)));
+ xmlGenericErrorFunc errFuncSaved = xmlGenericError;
+ void *errCtxSaved = xmlGenericErrorContext;
+
+ xmlSetGenericErrorFunc((void *) errorBuf,
+ (xmlGenericErrorFunc) appendStringInfo);
+
+ /* Add context information to errorBuf */
+ appendStringInfoLineSeparator(errorBuf);
+
+ xmlParserPrintFileContext(input);
+
+ /* Restore generic error func */
+ xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
}
-}
+ /* Get rid of any trailing newlines in errorBuf */
+ chopStringInfoNewlines(errorBuf);
-/*
- * Error handler for libxml error messages
- */
-static void
-xml_errorHandler(void *ctxt, const char *msg,...)
-{
- /* Append the formatted text to xml_err_buf */
- for (;;)
+ /*
+ * Legacy error handling mode. err_occurred is never set, we just add the
+ * message to err_buf. This mode exists because the xml2 contrib module
+ * uses our error-handling infrastructure, but we don't want to change its
+ * behaviour since it's deprecated anyway. This is also why we don't
+ * distinguish between notices, warnings and errors here --- the old-style
+ * generic error handler wouldn't have done that either.
+ */
+ if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
{
- va_list args;
- bool success;
+ appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
+ appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
- /* Try to format the data. */
- va_start(args, msg);
- success = appendStringInfoVA(xml_err_buf, msg, args);
- va_end(args);
+ pfree(errorBuf->data);
+ pfree(errorBuf);
+ return;
+ }
- if (success)
- break;
+ /*
+ * We don't want to ereport() here because that'd probably leave libxml in
+ * an inconsistent state. Instead, we remember the error and ereport()
+ * from xml_ereport().
+ *
+ * Warnings and notices can be reported immediately since they won't cause
+ * a longjmp() out of libxml.
+ */
+ if (level >= XML_ERR_ERROR)
+ {
+ appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
+ appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
- /* Double the buffer size and try again. */
- enlargeStringInfo(xml_err_buf, xml_err_buf->maxlen);
+ xmlerrcxt->err_occurred = true;
+ }
+ else if (level >= XML_ERR_WARNING)
+ {
+ ereport(WARNING,
+ (errmsg_internal("%s", errorBuf->data)));
+ }
+ else
+ {
+ ereport(NOTICE,
+ (errmsg_internal("%s", errorBuf->data)));
}
+
+ pfree(errorBuf->data);
+ pfree(errorBuf);
}
ereport(level,
(errcode(sqlcode),
- errmsg("%s", msg),
+ errmsg_internal("%s", msg),
errdetail(det, code)));
}
+/*
+ * Remove all trailing newlines from a StringInfo string
+ */
+static void
+chopStringInfoNewlines(StringInfo str)
+{
+ while (str->len > 0 && str->data[str->len - 1] == '\n')
+ str->data[--str->len] = '\0';
+}
+
+
+/*
+ * Append a newline after removing any existing trailing newlines
+ */
+static void
+appendStringInfoLineSeparator(StringInfo str)
+{
+ chopStringInfoNewlines(str);
+ if (str->len > 0)
+ appendStringInfoChar(str, '\n');
+}
+
+
/*
* Convert one char in the current server encoding to a Unicode codepoint.
*/
GetDatabaseEncoding(),
PG_UTF8);
- pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret, pg_mblen(s));
+ pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
+ pg_encoding_mblen(PG_UTF8, utf8string));
+
+ if (utf8string != s)
+ pfree(utf8string);
return ret[0];
}
/*
- * Map SQL identifier to XML name; see SQL/XML:2003 section 9.1.
+ * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
*/
char *
map_sql_identifier_to_xml_name(char *ident, bool fully_escaped,
for (p = ident; *p; p += pg_mblen(p))
{
if (*p == ':' && (p == ident || fully_escaped))
- appendStringInfo(&buf, "_x003A_");
+ appendStringInfoString(&buf, "_x003A_");
else if (*p == '_' && *(p + 1) == 'x')
- appendStringInfo(&buf, "_x005F_");
+ appendStringInfoString(&buf, "_x005F_");
else if (fully_escaped && p == ident &&
pg_strncasecmp(p, "xml", 3) == 0)
{
if (*p == 'x')
- appendStringInfo(&buf, "_x0078_");
+ appendStringInfoString(&buf, "_x0078_");
else
- appendStringInfo(&buf, "_x0058_");
+ appendStringInfoString(&buf, "_x0058_");
}
else if (escape_period && *p == '.')
- appendStringInfo(&buf, "_x002E_");
+ appendStringInfoString(&buf, "_x002E_");
else
{
pg_wchar u = sqlchar_to_unicode(p);
static char *
unicode_to_sqlchar(pg_wchar c)
{
- static unsigned char utf8string[5]; /* need trailing zero */
+ unsigned char utf8string[5]; /* need room for trailing zero */
+ char *result;
- if (c <= 0x7F)
- {
- utf8string[0] = c;
- }
- else if (c <= 0x7FF)
- {
- utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
- utf8string[1] = 0x80 | (c & 0x3F);
- }
- else if (c <= 0xFFFF)
- {
- utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
- utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
- utf8string[2] = 0x80 | (c & 0x3F);
- }
- else
- {
- utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
- utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
- utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
- utf8string[3] = 0x80 | (c & 0x3F);
- }
+ memset(utf8string, 0, sizeof(utf8string));
+ unicode_to_utf8(c, utf8string);
- return (char *) pg_do_encoding_conversion(utf8string,
- pg_mblen((char *) utf8string),
- PG_UTF8,
- GetDatabaseEncoding());
+ result = (char *) pg_do_encoding_conversion(utf8string,
+ pg_encoding_mblen(PG_UTF8,
+ (char *) utf8string),
+ PG_UTF8,
+ GetDatabaseEncoding());
+ /* if pg_do_encoding_conversion didn't strdup, we must */
+ if (result == (char *) utf8string)
+ result = pstrdup(result);
+ return result;
}
/*
- * Map XML name to SQL identifier; see SQL/XML:2003 section 9.17.
+ * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
*/
char *
map_xml_name_to_sql_identifier(char *name)
}
/*
- * Map SQL value to XML value; see SQL/XML:2003 section 9.16.
+ * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
+ *
+ * When xml_escape_strings is true, then certain characters in string
+ * values are replaced by entity references (< etc.), as specified
+ * in SQL/XML:2008 section 9.8 GR 9) a) iii). This is normally what is
+ * wanted. The false case is mainly useful when the resulting value
+ * is used with xmlTextWriterWriteAttribute() to write out an
+ * attribute, because that function does the escaping itself.
*/
char *
-map_sql_value_to_xml_value(Datum value, Oid type)
+map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
{
- StringInfoData buf;
-
- initStringInfo(&buf);
-
- if (type_is_array(type))
+ if (type_is_array_domain(type))
{
ArrayType *array;
Oid elmtype;
int num_elems;
Datum *elem_values;
bool *elem_nulls;
+ StringInfoData buf;
int i;
array = DatumGetArrayTypeP(value);
&elem_values, &elem_nulls,
&num_elems);
+ initStringInfo(&buf);
+
for (i = 0; i < num_elems; i++)
{
if (elem_nulls[i])
appendStringInfoString(&buf, "<element>");
appendStringInfoString(&buf,
map_sql_value_to_xml_value(elem_values[i],
- elmtype));
+ elmtype, true));
appendStringInfoString(&buf, "</element>");
}
pfree(elem_values);
pfree(elem_nulls);
+
+ return buf.data;
}
else
{
Oid typeOut;
bool isvarlena;
- char *p,
- *str;
+ char *str;
+
+ /*
+ * Flatten domains; the special-case treatments below should apply to,
+ * eg, domains over boolean not just boolean.
+ */
+ type = getBaseType(type);
/*
* Special XSD formatting for some data types
char buf[MAXDATELEN + 1];
date = DatumGetDateADT(value);
+ /* XSD doesn't support infinite values */
+ if (DATE_NOT_FINITE(date))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("date out of range"),
+ errdetail("XML does not support infinite date values.")));
j2date(date + POSTGRES_EPOCH_JDATE,
&(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
EncodeDateOnly(&tm, USE_XSD_DATES, buf);
Timestamp timestamp;
struct pg_tm tm;
fsec_t fsec;
- char *tzn = NULL;
char buf[MAXDATELEN + 1];
timestamp = DatumGetTimestamp(value);
if (TIMESTAMP_NOT_FINITE(timestamp))
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
- errmsg("timestamp out of range")));
+ errmsg("timestamp out of range"),
+ errdetail("XML does not support infinite timestamp values.")));
else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
- EncodeDateTime(&tm, fsec, NULL, &tzn, USE_XSD_DATES, buf);
+ EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
else
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
struct pg_tm tm;
int tz;
fsec_t fsec;
- char *tzn = NULL;
+ const char *tzn = NULL;
char buf[MAXDATELEN + 1];
timestamp = DatumGetTimestamp(value);
if (TIMESTAMP_NOT_FINITE(timestamp))
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
- errmsg("timestamp out of range")));
+ errmsg("timestamp out of range"),
+ errdetail("XML does not support infinite timestamp values.")));
else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
- EncodeDateTime(&tm, fsec, &tz, &tzn, USE_XSD_DATES, buf);
+ EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
else
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
return pstrdup(buf);
}
+
+#ifdef USE_LIBXML
+ case BYTEAOID:
+ {
+ bytea *bstr = DatumGetByteaPP(value);
+ PgXmlErrorContext *xmlerrcxt;
+ volatile xmlBufferPtr buf = NULL;
+ volatile xmlTextWriterPtr writer = NULL;
+ char *result;
+
+ xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
+
+ PG_TRY();
+ {
+ buf = xmlBufferCreate();
+ if (buf == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xmlBuffer");
+ writer = xmlNewTextWriterMemory(buf, 0);
+ if (writer == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xmlTextWriter");
+
+ if (xmlbinary == XMLBINARY_BASE64)
+ xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
+ 0, VARSIZE_ANY_EXHDR(bstr));
+ else
+ xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
+ 0, VARSIZE_ANY_EXHDR(bstr));
+
+ /* we MUST do this now to flush data out to the buffer */
+ xmlFreeTextWriter(writer);
+ writer = NULL;
+
+ result = pstrdup((const char *) xmlBufferContent(buf));
+ }
+ PG_CATCH();
+ {
+ if (writer)
+ xmlFreeTextWriter(writer);
+ if (buf)
+ xmlBufferFree(buf);
+
+ pg_xml_done(xmlerrcxt, true);
+
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ xmlBufferFree(buf);
+
+ pg_xml_done(xmlerrcxt, false);
+
+ return result;
+ }
+#endif /* USE_LIBXML */
+
}
+ /*
+ * otherwise, just use the type's native text representation
+ */
getTypeOutputInfo(type, &typeOut, &isvarlena);
str = OidOutputFunctionCall(typeOut, value);
- if (type == XMLOID)
+ /* ... exactly as-is for XML, and when escaping is not wanted */
+ if (type == XMLOID || !xml_escape_strings)
return str;
-#ifdef USE_LIBXML
- if (type == BYTEAOID)
- {
- xmlBufferPtr buf;
- xmlTextWriterPtr writer;
- char *result;
-
- xml_init();
-
- buf = xmlBufferCreate();
- writer = xmlNewTextWriterMemory(buf, 0);
+ /* otherwise, translate special characters as needed */
+ return escape_xml(str);
+ }
+}
- if (xmlbinary == XMLBINARY_BASE64)
- xmlTextWriterWriteBase64(writer, VARDATA(value), 0, VARSIZE(value) - VARHDRSZ);
- else
- xmlTextWriterWriteBinHex(writer, VARDATA(value), 0, VARSIZE(value) - VARHDRSZ);
- xmlFreeTextWriter(writer);
- result = pstrdup((const char *) xmlBufferContent(buf));
- xmlBufferFree(buf);
- return result;
- }
-#endif /* USE_LIBXML */
+/*
+ * Escape characters in text that have special meanings in XML.
+ *
+ * Returns a palloc'd string.
+ *
+ * NB: this is intentionally not dependent on libxml.
+ */
+char *
+escape_xml(const char *str)
+{
+ StringInfoData buf;
+ const char *p;
- for (p = str; *p; p += pg_mblen(p))
+ initStringInfo(&buf);
+ for (p = str; *p; p++)
+ {
+ switch (*p)
{
- switch (*p)
- {
- case '&':
- appendStringInfo(&buf, "&");
- break;
- case '<':
- appendStringInfo(&buf, "<");
- break;
- case '>':
- appendStringInfo(&buf, ">");
- break;
- case '\r':
- appendStringInfo(&buf, "
");
- break;
- default:
- appendBinaryStringInfo(&buf, p, pg_mblen(p));
- break;
- }
+ case '&':
+ appendStringInfoString(&buf, "&");
+ break;
+ case '<':
+ appendStringInfoString(&buf, "<");
+ break;
+ case '>':
+ appendStringInfoString(&buf, ">");
+ break;
+ case '\r':
+ appendStringInfoString(&buf, "
");
+ break;
+ default:
+ appendStringInfoCharMacro(&buf, *p);
+ break;
}
}
-
return buf.data;
}
/*
* SQL to XML mapping functions
*
- * What follows below is intentionally organized so that you can read
- * along in the SQL/XML:2003 standard. The functions are mostly split
- * up and ordered they way the clauses lay out in the standards
+ * What follows below was at one point intentionally organized so that
+ * you can read along in the SQL/XML standard. The functions are
+ * mostly split up the way the clauses lay out in the standards
* document, and the identifiers are also aligned with the standard
- * text. (SQL/XML:2006 appears to be ordered differently,
- * unfortunately.)
+ * text. Unfortunately, SQL/XML:2006 reordered the clauses
+ * differently than SQL/XML:2003, so the order below doesn't make much
+ * sense anymore.
*
* There are many things going on there:
*
/*
- * Visibility of objects for XML mappings; see SQL/XML:2003 section
- * 4.8.5.
+ * Visibility of objects for XML mappings; see SQL/XML:2008 section
+ * 4.10.8.
*/
/*
StringInfoData query;
initStringInfo(&query);
- appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class WHERE relnamespace = %u AND relkind IN ('r', 'v') AND pg_catalog.has_table_privilege (oid, 'SELECT') ORDER BY relname;", nspid);
+ appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class WHERE relnamespace = %u AND relkind IN ('r', 'm', 'v') AND pg_catalog.has_table_privilege (oid, 'SELECT') ORDER BY relname;", nspid);
return query_to_oid_list(query.data);
}
database_get_xml_visible_tables(void)
{
/* At the moment there is no order required here. */
- return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class WHERE relkind IN ('r', 'v') AND pg_catalog.has_table_privilege (pg_class.oid, 'SELECT') AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
+ return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class WHERE relkind IN ('r', 'm', 'v') AND pg_catalog.has_table_privilege (pg_class.oid, 'SELECT') AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
}
/*
- * Map SQL table to XML and/or XML Schema document; see SQL/XML:2003
- * section 9.3.
+ * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
+ * section 9.11.
*/
static StringInfo
if (strlen(targetns) > 0)
appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
else
- appendStringInfo(result, " xsi:noNamespaceSchemaLocation=\"#\"");
+ appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
}
- appendStringInfo(result, ">\n\n");
+ appendStringInfoString(result, ">\n");
}
errmsg("invalid query")));
if (!tableforest)
+ {
xmldata_root_element_start(result, xmltn, xmlschema,
targetns, top_level);
+ appendStringInfoString(result, "\n");
+ }
if (xmlschema)
appendStringInfo(result, "%s\n\n", xmlschema);
/*
- * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2003
- * section 9.4.
+ * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
+ * sections 9.13, 9.14.
*/
static StringInfo
result = makeStringInfo();
xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
+ appendStringInfoString(result, "\n");
if (xmlschema)
appendStringInfo(result, "%s\n\n", xmlschema);
Oid nspid;
schemaname = NameStr(*name);
- nspid = LookupExplicitNamespace(schemaname);
+ nspid = LookupExplicitNamespace(schemaname, false);
PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
nulls, tableforest, targetns, true)));
result = makeStringInfo();
- nspid = LookupExplicitNamespace(schemaname);
+ nspid = LookupExplicitNamespace(schemaname, false);
xsd_schema_element_start(result, targetns);
StringInfo xmlschema;
schemaname = NameStr(*name);
- nspid = LookupExplicitNamespace(schemaname);
+ nspid = LookupExplicitNamespace(schemaname, false);
xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
tableforest, targetns);
/*
- * Map SQL database to XML and/or XML Schema document; see SQL/XML:2003
- * section 9.5.
+ * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
+ * sections 9.16, 9.17.
*/
static StringInfo
result = makeStringInfo();
xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
+ appendStringInfoString(result, "\n");
if (xmlschema)
appendStringInfo(result, "%s\n\n", xmlschema);
/*
- * Map a multi-part SQL name to an XML name; see SQL/XML:2003 section
+ * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
* 9.2.
*/
static char *
initStringInfo(&result);
if (a)
- appendStringInfo(&result, "%s",
+ appendStringInfoString(&result,
map_sql_identifier_to_xml_name(a, true, true));
if (b)
appendStringInfo(&result, ".%s",
/*
- * Map an SQL table to an XML Schema document; see SQL/XML:2003
- * section 9.3.
+ * Map an SQL table to an XML Schema document; see SQL/XML:2008
+ * section 9.11.
*
- * Map an SQL table to XML Schema data types; see SQL/XML:2003 section
- * 9.6.
+ * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
+ * 9.9.
*/
static const char *
map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
HeapTuple tuple;
Form_pg_class reltuple;
- tuple = SearchSysCache(RELOID,
- ObjectIdGetDatum(relid),
- 0, 0, 0);
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
if (!HeapTupleIsValid(tuple))
elog(ERROR, "cache lookup failed for relation %u", relid);
reltuple = (Form_pg_class) GETSTRUCT(tuple);
rowtypename);
for (i = 0; i < tupdesc->natts; i++)
+ {
+ if (tupdesc->attrs[i]->attisdropped)
+ continue;
appendStringInfo(&result,
" <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
map_sql_identifier_to_xml_name(NameStr(tupdesc->attrs[i]->attname),
true, false),
map_sql_type_to_xml_name(tupdesc->attrs[i]->atttypid, -1),
nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
+ }
appendStringInfoString(&result,
" </xsd:sequence>\n"
/*
- * Map an SQL schema to XML Schema data types; see SQL/XML section
- * 9.7.
+ * Map an SQL schema to XML Schema data types; see SQL/XML:2008
+ * section 9.12.
*/
static const char *
map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
/*
- * Map an SQL catalog to XML Schema data types; see SQL/XML section
- * 9.8.
+ * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
+ * section 9.15.
*/
static const char *
map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
/*
- * Map an SQL data type to an XML name; see SQL/XML:2003 section 9.9.
+ * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
*/
static const char *
map_sql_type_to_xml_name(Oid typeoid, int typmod)
{
case BPCHAROID:
if (typmod == -1)
- appendStringInfo(&result, "CHAR");
+ appendStringInfoString(&result, "CHAR");
else
appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
break;
case VARCHAROID:
if (typmod == -1)
- appendStringInfo(&result, "VARCHAR");
+ appendStringInfoString(&result, "VARCHAR");
else
appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
break;
case NUMERICOID:
if (typmod == -1)
- appendStringInfo(&result, "NUMERIC");
+ appendStringInfoString(&result, "NUMERIC");
else
appendStringInfo(&result, "NUMERIC_%d_%d",
((typmod - VARHDRSZ) >> 16) & 0xffff,
(typmod - VARHDRSZ) & 0xffff);
break;
case INT4OID:
- appendStringInfo(&result, "INTEGER");
+ appendStringInfoString(&result, "INTEGER");
break;
case INT2OID:
- appendStringInfo(&result, "SMALLINT");
+ appendStringInfoString(&result, "SMALLINT");
break;
case INT8OID:
- appendStringInfo(&result, "BIGINT");
+ appendStringInfoString(&result, "BIGINT");
break;
case FLOAT4OID:
- appendStringInfo(&result, "REAL");
+ appendStringInfoString(&result, "REAL");
break;
case FLOAT8OID:
- appendStringInfo(&result, "DOUBLE");
+ appendStringInfoString(&result, "DOUBLE");
break;
case BOOLOID:
- appendStringInfo(&result, "BOOLEAN");
+ appendStringInfoString(&result, "BOOLEAN");
break;
case TIMEOID:
if (typmod == -1)
- appendStringInfo(&result, "TIME");
+ appendStringInfoString(&result, "TIME");
else
appendStringInfo(&result, "TIME_%d", typmod);
break;
case TIMETZOID:
if (typmod == -1)
- appendStringInfo(&result, "TIME_WTZ");
+ appendStringInfoString(&result, "TIME_WTZ");
else
appendStringInfo(&result, "TIME_WTZ_%d", typmod);
break;
case TIMESTAMPOID:
if (typmod == -1)
- appendStringInfo(&result, "TIMESTAMP");
+ appendStringInfoString(&result, "TIMESTAMP");
else
appendStringInfo(&result, "TIMESTAMP_%d", typmod);
break;
case TIMESTAMPTZOID:
if (typmod == -1)
- appendStringInfo(&result, "TIMESTAMP_WTZ");
+ appendStringInfoString(&result, "TIMESTAMP_WTZ");
else
appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
break;
case DATEOID:
- appendStringInfo(&result, "DATE");
+ appendStringInfoString(&result, "DATE");
break;
case XMLOID:
- appendStringInfo(&result, "XML");
+ appendStringInfoString(&result, "XML");
break;
default:
{
HeapTuple tuple;
Form_pg_type typtuple;
- tuple = SearchSysCache(TYPEOID,
- ObjectIdGetDatum(typeoid),
- 0, 0, 0);
+ tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
if (!HeapTupleIsValid(tuple))
elog(ERROR, "cache lookup failed for type %u", typeoid);
typtuple = (Form_pg_type) GETSTRUCT(tuple);
/*
* Map a collection of SQL data types to XML Schema data types; see
- * SQL/XML:2002 section 9.10.
+ * SQL/XML:2008 section 9.7.
*/
static const char *
map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
/*
- * Map an SQL data type to a named XML Schema data type; see SQL/XML
- * sections 9.11 and 9.15.
+ * Map an SQL data type to a named XML Schema data type; see
+ * SQL/XML:2008 sections 9.5 and 9.6.
*
- * (The distinction between 9.11 and 9.15 is basically that 9.15 adds
+ * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
* a name attribute, which this function does. The name-less version
- * 9.11 doesn't appear to be required anywhere.)
+ * 9.5 doesn't appear to be required anywhere.)
*/
static const char *
map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
if (typeoid == XMLOID)
{
- appendStringInfo(&result,
+ appendStringInfoString(&result,
"<xsd:complexType mixed=\"true\">\n"
" <xsd:sequence>\n"
" <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
appendStringInfo(&result,
" <xsd:maxLength value=\"%d\"/>\n",
typmod - VARHDRSZ);
- appendStringInfo(&result,
- " </xsd:restriction>\n");
+ appendStringInfoString(&result, " </xsd:restriction>\n");
break;
case BYTEAOID:
break;
case FLOAT4OID:
- appendStringInfo(&result,
+ appendStringInfoString(&result,
" <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
break;
case FLOAT8OID:
- appendStringInfo(&result,
+ appendStringInfoString(&result,
" <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
break;
case BOOLOID:
- appendStringInfo(&result,
+ appendStringInfoString(&result,
" <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
break;
}
case DATEOID:
- appendStringInfo(&result,
+ appendStringInfoString(&result,
" <xsd:restriction base=\"xsd:date\">\n"
" <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
" </xsd:restriction>\n");
}
break;
}
- appendStringInfo(&result,
- "</xsd:simpleType>\n");
+ appendStringInfoString(&result, "</xsd:simpleType>\n");
}
return result.data;
/*
* Map an SQL row to an XML element, taking the row from the active
- * SPI cursor. See also SQL/XML:2003 section 9.12.
+ * SPI cursor. See also SQL/XML:2008 section 9.10.
*/
static void
SPI_sql_row_to_xmlelement(int rownum, StringInfo result, char *tablename,
appendStringInfo(result, " <%s>%s</%s>\n",
colname,
map_sql_value_to_xml_value(colval,
- SPI_gettypeid(SPI_tuptable->tupdesc, i)),
+ SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
colname);
}
*/
#ifdef USE_LIBXML
+
/*
* Convert XML node to text (dump subtree in case of element,
* return value otherwise)
static text *
xml_xmlnodetoxmltype(xmlNodePtr cur)
{
- xmlChar *str;
xmltype *result;
- xmlBufferPtr buf;
if (cur->type == XML_ELEMENT_NODE)
{
+ xmlBufferPtr buf;
+
buf = xmlBufferCreate();
- xmlNodeDump(buf, NULL, cur, 0, 1);
- result = xmlBuffer_to_xmltype(buf);
+ PG_TRY();
+ {
+ xmlNodeDump(buf, NULL, cur, 0, 1);
+ result = xmlBuffer_to_xmltype(buf);
+ }
+ PG_CATCH();
+ {
+ xmlBufferFree(buf);
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
xmlBufferFree(buf);
}
else
{
+ xmlChar *str;
+
str = xmlXPathCastNodeToString(cur);
- result = (xmltype *) cstring_to_text((char *) str);
+ PG_TRY();
+ {
+ /* Here we rely on XML having the same representation as TEXT */
+ char *escaped = escape_xml((char *) str);
+
+ result = (xmltype *) cstring_to_text(escaped);
+ pfree(escaped);
+ }
+ PG_CATCH();
+ {
+ xmlFree(str);
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
xmlFree(str);
}
return result;
}
-#endif
-
/*
- * Evaluate XPath expression and return array of XML values.
+ * Convert an XML XPath object (the result of evaluating an XPath expression)
+ * to an array of xml values, which is returned at *astate. The function
+ * result value is the number of elements in the array.
*
- * As we have no support of XQuery sequences yet, this function seems
- * to be the most useful one (array of XML functions plays a role of
- * some kind of substitution for XQuery sequences).
+ * If "astate" is NULL then we don't generate the array value, but we still
+ * return the number of elements it would have had.
*
- * Workaround here: we parse XML data in different way to allow XPath for
- * fragments (see "XPath for fragment" TODO comment inside).
+ * Nodesets are converted to an array containing the nodes' textual
+ * representations. Primitive values (float, double, string) are converted
+ * to a single-element array containing the value's string representation.
*/
-Datum
-xpath(PG_FUNCTION_ARGS)
+static int
+xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
+ ArrayBuildState **astate)
{
-#ifdef USE_LIBXML
- text *xpath_expr_text = PG_GETARG_TEXT_P(0);
- xmltype *data = PG_GETARG_XML_P(1);
- ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
- ArrayBuildState *astate = NULL;
- xmlParserCtxtPtr ctxt;
- xmlDocPtr doc;
- xmlXPathContextPtr xpathctx;
- xmlXPathCompExprPtr xpathcomp;
- xmlXPathObjectPtr xpathobj;
+ int result = 0;
+ Datum datum;
+ Oid datumtype;
+ char *result_str;
+
+ if (astate != NULL)
+ *astate = NULL;
+
+ switch (xpathobj->type)
+ {
+ case XPATH_NODESET:
+ if (xpathobj->nodesetval != NULL)
+ {
+ result = xpathobj->nodesetval->nodeNr;
+ if (astate != NULL)
+ {
+ int i;
+
+ for (i = 0; i < result; i++)
+ {
+ datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i]));
+ *astate = accumArrayResult(*astate, datum,
+ false, XMLOID,
+ CurrentMemoryContext);
+ }
+ }
+ }
+ return result;
+
+ case XPATH_BOOLEAN:
+ if (astate == NULL)
+ return 1;
+ datum = BoolGetDatum(xpathobj->boolval);
+ datumtype = BOOLOID;
+ break;
+
+ case XPATH_NUMBER:
+ if (astate == NULL)
+ return 1;
+ datum = Float8GetDatum(xpathobj->floatval);
+ datumtype = FLOAT8OID;
+ break;
+
+ case XPATH_STRING:
+ if (astate == NULL)
+ return 1;
+ datum = CStringGetDatum((char *) xpathobj->stringval);
+ datumtype = CSTRINGOID;
+ break;
+
+ default:
+ elog(ERROR, "xpath expression result type %d is unsupported",
+ xpathobj->type);
+ return 0; /* keep compiler quiet */
+ }
+
+ /* Common code for scalar-value cases */
+ result_str = map_sql_value_to_xml_value(datum, datumtype, true);
+ datum = PointerGetDatum(cstring_to_xmltype(result_str));
+ *astate = accumArrayResult(*astate, datum,
+ false, XMLOID,
+ CurrentMemoryContext);
+ return 1;
+}
+
+
+/*
+ * Common code for xpath() and xmlexists()
+ *
+ * Evaluate XPath expression and return number of nodes in res_items
+ * and array of XML values in astate. Either of those pointers can be
+ * NULL if the corresponding result isn't wanted.
+ *
+ * It is up to the user to ensure that the XML passed is in fact
+ * an XML document - XPath doesn't work easily on fragments without
+ * a context node being known.
+ */
+static void
+xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
+ int *res_nitems, ArrayBuildState **astate)
+{
+ PgXmlErrorContext *xmlerrcxt;
+ volatile xmlParserCtxtPtr ctxt = NULL;
+ volatile xmlDocPtr doc = NULL;
+ volatile xmlXPathContextPtr xpathctx = NULL;
+ volatile xmlXPathCompExprPtr xpathcomp = NULL;
+ volatile xmlXPathObjectPtr xpathobj = NULL;
char *datastr;
int32 len;
int32 xpath_len;
xmlChar *string;
xmlChar *xpath_expr;
int i;
- int res_nitems;
int ndim;
Datum *ns_names_uris;
bool *ns_names_uris_nulls;
* ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
* 'http://example2.com']].
*/
- ndim = ARR_NDIM(namespaces);
+ ndim = namespaces ? ARR_NDIM(namespaces) : 0;
if (ndim != 0)
{
int *dims;
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("empty XPath expression")));
- xml_init();
+ string = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
+ memcpy(string, datastr, len);
+ string[len] = '\0';
- /*
- * To handle both documents and fragments, regardless of the fact whether
- * the XML datum has a single root (XML well-formedness), we wrap the XML
- * datum in a dummy element (<x>...</x>) and extend the XPath expression
- * accordingly. To do it, throw away the XML prolog, if any.
- */
- if (len >= 5 &&
- xmlStrncmp((xmlChar *) datastr, (xmlChar *) "<?xml", 5) == 0)
- {
- i = 5;
- while (i < len &&
- !(datastr[i - 1] == '?' && datastr[i] == '>'))
- i++;
-
- if (i == len)
- xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
- "could not parse XML data");
-
- ++i;
-
- datastr += i;
- len -= i;
- }
-
- string = (xmlChar *) palloc((len + 8) * sizeof(xmlChar));
- memcpy(string, "<x>", 3);
- memcpy(string + 3, datastr, len);
- memcpy(string + 3 + len, "</x>", 5);
- len += 7;
+ xpath_expr = (xmlChar *) palloc((xpath_len + 1) * sizeof(xmlChar));
+ memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
+ xpath_expr[xpath_len] = '\0';
- xpath_expr = (xmlChar *) palloc((xpath_len + 3) * sizeof(xmlChar));
- memcpy(xpath_expr, "/x", 2);
- memcpy(xpath_expr + 2, VARDATA(xpath_expr_text), xpath_len);
- xpath_expr[xpath_len + 2] = '\0';
- xpath_len += 2;
+ xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
- xmlInitParser();
+ PG_TRY();
+ {
+ xmlInitParser();
- /*
- * redundant XML parsing (two parsings for the same value during one
- * command execution are possible)
- */
- ctxt = xmlNewParserCtxt();
- if (ctxt == NULL)
- xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
- "could not allocate parser context");
- doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
- if (doc == NULL)
- xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
- "could not parse XML data");
- xpathctx = xmlXPathNewContext(doc);
- if (xpathctx == NULL)
- xml_ereport(ERROR, ERRCODE_OUT_OF_MEMORY,
- "could not allocate XPath context");
- xpathctx->node = xmlDocGetRootElement(doc);
- if (xpathctx->node == NULL)
- xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
- "could not find root XML element");
-
- /* register namespaces, if any */
- if (ns_count > 0)
- {
- for (i = 0; i < ns_count; i++)
+ /*
+ * redundant XML parsing (two parsings for the same value during one
+ * command execution are possible)
+ */
+ ctxt = xmlNewParserCtxt();
+ if (ctxt == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate parser context");
+ doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
+ if (doc == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
+ "could not parse XML document");
+ xpathctx = xmlXPathNewContext(doc);
+ if (xpathctx == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate XPath context");
+ xpathctx->node = xmlDocGetRootElement(doc);
+ if (xpathctx->node == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not find root XML element");
+
+ /* register namespaces, if any */
+ if (ns_count > 0)
{
- char *ns_name;
- char *ns_uri;
-
- if (ns_names_uris_nulls[i * 2] ||
- ns_names_uris_nulls[i * 2 + 1])
- ereport(ERROR,
- (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
- errmsg("neither namespace name nor URI may be null")));
- ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
- ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
- if (xmlXPathRegisterNs(xpathctx,
- (xmlChar *) ns_name,
- (xmlChar *) ns_uri) != 0)
- ereport(ERROR, /* is this an internal error??? */
- (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
- ns_name, ns_uri)));
+ for (i = 0; i < ns_count; i++)
+ {
+ char *ns_name;
+ char *ns_uri;
+
+ if (ns_names_uris_nulls[i * 2] ||
+ ns_names_uris_nulls[i * 2 + 1])
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("neither namespace name nor URI may be null")));
+ ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
+ ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
+ if (xmlXPathRegisterNs(xpathctx,
+ (xmlChar *) ns_name,
+ (xmlChar *) ns_uri) != 0)
+ ereport(ERROR, /* is this an internal error??? */
+ (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
+ ns_name, ns_uri)));
+ }
}
- }
-
- xpathcomp = xmlXPathCompile(xpath_expr);
- if (xpathcomp == NULL) /* TODO: show proper XPath error details */
- xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
- "invalid XPath expression");
-
- xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
- if (xpathobj == NULL) /* TODO: reason? */
- ereport(ERROR,
- (errmsg("could not create XPath object")));
- xmlXPathFreeCompExpr(xpathcomp);
+ xpathcomp = xmlXPathCompile(xpath_expr);
+ if (xpathcomp == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "invalid XPath expression");
- /* return empty array in cases when nothing is found */
- if (xpathobj->nodesetval == NULL)
- res_nitems = 0;
- else
- res_nitems = xpathobj->nodesetval->nodeNr;
+ /*
+ * Version 2.6.27 introduces a function named
+ * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
+ * but we can derive the existence by whether any nodes are returned,
+ * thereby preventing a library version upgrade and keeping the code
+ * the same.
+ */
+ xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
+ if (xpathobj == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not create XPath object");
- if (res_nitems)
+ /*
+ * Extract the results as requested.
+ */
+ if (res_nitems != NULL)
+ *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate);
+ else
+ (void) xml_xpathobjtoxmlarray(xpathobj, astate);
+ }
+ PG_CATCH();
{
- for (i = 0; i < xpathobj->nodesetval->nodeNr; i++)
- {
- Datum elem;
- bool elemisnull = false;
+ if (xpathobj)
+ xmlXPathFreeObject(xpathobj);
+ if (xpathcomp)
+ xmlXPathFreeCompExpr(xpathcomp);
+ if (xpathctx)
+ xmlXPathFreeContext(xpathctx);
+ if (doc)
+ xmlFreeDoc(doc);
+ if (ctxt)
+ xmlFreeParserCtxt(ctxt);
- elem = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i]));
- astate = accumArrayResult(astate, elem,
- elemisnull, XMLOID,
- CurrentMemoryContext);
- }
+ pg_xml_done(xmlerrcxt, true);
+
+ PG_RE_THROW();
}
+ PG_END_TRY();
xmlXPathFreeObject(xpathobj);
+ xmlXPathFreeCompExpr(xpathcomp);
xmlXPathFreeContext(xpathctx);
xmlFreeDoc(doc);
xmlFreeParserCtxt(ctxt);
+ pg_xml_done(xmlerrcxt, false);
+}
+#endif /* USE_LIBXML */
+
+/*
+ * Evaluate XPath expression and return array of XML values.
+ *
+ * As we have no support of XQuery sequences yet, this function seems
+ * to be the most useful one (array of XML functions plays a role of
+ * some kind of substitution for XQuery sequences).
+ */
+Datum
+xpath(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ text *xpath_expr_text = PG_GETARG_TEXT_P(0);
+ xmltype *data = PG_GETARG_XML_P(1);
+ ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
+ int res_nitems;
+ ArrayBuildState *astate;
+
+ xpath_internal(xpath_expr_text, data, namespaces,
+ &res_nitems, &astate);
+
if (res_nitems == 0)
PG_RETURN_ARRAYTYPE_P(construct_empty_array(XMLOID));
else
return 0;
#endif
}
+
+/*
+ * Determines if the node specified by the supplied XPath exists
+ * in a given XML document, returning a boolean.
+ */
+Datum
+xmlexists(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ text *xpath_expr_text = PG_GETARG_TEXT_P(0);
+ xmltype *data = PG_GETARG_XML_P(1);
+ int res_nitems;
+
+ xpath_internal(xpath_expr_text, data, NULL,
+ &res_nitems, NULL);
+
+ PG_RETURN_BOOL(res_nitems > 0);
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif
+}
+
+/*
+ * Determines if the node specified by the supplied XPath exists
+ * in a given XML document, returning a boolean. Differs from
+ * xmlexists as it supports namespaces and is not defined in SQL/XML.
+ */
+Datum
+xpath_exists(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ text *xpath_expr_text = PG_GETARG_TEXT_P(0);
+ xmltype *data = PG_GETARG_XML_P(1);
+ ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
+ int res_nitems;
+
+ xpath_internal(xpath_expr_text, data, namespaces,
+ &res_nitems, NULL);
+
+ PG_RETURN_BOOL(res_nitems > 0);
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif
+}
+
+/*
+ * Functions for checking well-formed-ness
+ */
+
+#ifdef USE_LIBXML
+static bool
+wellformed_xml(text *data, XmlOptionType xmloption_arg)
+{
+ bool result;
+ volatile xmlDocPtr doc = NULL;
+
+ /* We want to catch any exceptions and return false */
+ PG_TRY();
+ {
+ doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding());
+ result = true;
+ }
+ PG_CATCH();
+ {
+ FlushErrorState();
+ result = false;
+ }
+ PG_END_TRY();
+
+ if (doc)
+ xmlFreeDoc(doc);
+
+ return result;
+}
+#endif
+
+Datum
+xml_is_well_formed(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ text *data = PG_GETARG_TEXT_P(0);
+
+ PG_RETURN_BOOL(wellformed_xml(data, xmloption));
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif /* not USE_LIBXML */
+}
+
+Datum
+xml_is_well_formed_document(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ text *data = PG_GETARG_TEXT_P(0);
+
+ PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif /* not USE_LIBXML */
+}
+
+Datum
+xml_is_well_formed_content(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ text *data = PG_GETARG_TEXT_P(0);
+
+ PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif /* not USE_LIBXML */
+}