1 /*-------------------------------------------------------------------------
4 * XML data type support.
7 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
10 * src/backend/utils/adt/xml.c
12 *-------------------------------------------------------------------------
16 * Generally, XML type support is only available when libxml use was
17 * configured during the build. But even if that is not done, the
18 * type and all the functions are available, but most of them will
19 * fail. For one thing, this avoids having to manage variant catalog
20 * installations. But it also has nice effects such as that you can
21 * dump a database containing XML type data even if the server is not
22 * linked with libxml. Thus, make sure xml_out() works even if nothing
27 * Notes on memory management:
29 * Sometimes libxml allocates global structures in the hope that it can reuse
30 * them later on. This makes it impractical to change the xmlMemSetup
31 * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32 * allocated with malloc() or vice versa. Since libxml might be used by
33 * loadable modules, eg libperl, our only safe choices are to change the
34 * functions at postmaster/backend launch or not at all. Since we'd rather
35 * not activate libxml in sessions that might never use it, the latter choice
36 * is the preferred one. However, for debugging purposes it can be awfully
37 * handy to constrain libxml's allocations to be done in a specific palloc
38 * context, where they're easy to track. Therefore there is code here that
39 * can be enabled in debug builds to redirect libxml's allocations into a
40 * special context LibxmlContext. It's not recommended to turn this on in
41 * a production build because of the possibility of bad interactions with
44 /* #define USE_LIBXMLCONTEXT */
49 #include <libxml/chvalid.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/tree.h>
53 #include <libxml/uri.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/xmlversion.h>
56 #include <libxml/xmlwriter.h>
57 #include <libxml/xpath.h>
58 #include <libxml/xpathInternals.h>
61 * We used to check for xmlStructuredErrorContext via a configure test; but
62 * that doesn't work on Windows, so instead use this grottier method of
63 * testing the library version number.
65 #if LIBXML_VERSION >= 20704
66 #define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
68 #endif /* USE_LIBXML */
70 #include "access/htup_details.h"
71 #include "catalog/namespace.h"
72 #include "catalog/pg_type.h"
73 #include "commands/dbcommands.h"
74 #include "executor/executor.h"
75 #include "executor/spi.h"
76 #include "executor/tablefunc.h"
78 #include "lib/stringinfo.h"
79 #include "libpq/pqformat.h"
80 #include "mb/pg_wchar.h"
81 #include "miscadmin.h"
82 #include "nodes/execnodes.h"
83 #include "nodes/nodeFuncs.h"
84 #include "utils/array.h"
85 #include "utils/builtins.h"
86 #include "utils/date.h"
87 #include "utils/datetime.h"
88 #include "utils/lsyscache.h"
89 #include "utils/memutils.h"
90 #include "utils/rel.h"
91 #include "utils/syscache.h"
92 #include "utils/xml.h"
101 /* random number to identify PgXmlErrorContext */
102 #define ERRCXT_MAGIC 68275028
104 struct PgXmlErrorContext
107 /* strictness argument passed to pg_xml_init */
108 PgXmlStrictness strictness;
109 /* current error status and accumulated message, if any */
111 StringInfoData err_buf;
112 /* previous libxml error handling state (saved by pg_xml_init) */
113 xmlStructuredErrorFunc saved_errfunc;
115 /* previous libxml entity handler (saved by pg_xml_init) */
116 xmlExternalEntityLoader saved_entityfunc;
119 static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
120 xmlParserCtxtPtr ctxt);
121 static void xml_errorHandler(void *data, xmlErrorPtr error);
122 static void xml_ereport_by_code(int level, int sqlcode,
123 const char *msg, int errcode);
124 static void chopStringInfoNewlines(StringInfo str);
125 static void appendStringInfoLineSeparator(StringInfo str);
127 #ifdef USE_LIBXMLCONTEXT
129 static MemoryContext LibxmlContext = NULL;
131 static void xml_memory_init(void);
132 static void *xml_palloc(size_t size);
133 static void *xml_repalloc(void *ptr, size_t size);
134 static void xml_pfree(void *ptr);
135 static char *xml_pstrdup(const char *string);
136 #endif /* USE_LIBXMLCONTEXT */
138 static xmlChar *xml_text2xmlChar(text *in);
139 static int parse_xml_decl(const xmlChar *str, size_t *lenp,
140 xmlChar **version, xmlChar **encoding, int *standalone);
141 static bool print_xml_decl(StringInfo buf, const xmlChar *version,
142 pg_enc encoding, int standalone);
143 static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
144 bool preserve_whitespace, int encoding);
145 static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
146 static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
147 ArrayBuildState *astate,
148 PgXmlErrorContext *xmlerrcxt);
149 static xmlChar *pg_xmlCharStrndup(char *str, size_t len);
150 #endif /* USE_LIBXML */
152 static StringInfo query_to_xml_internal(const char *query, char *tablename,
153 const char *xmlschema, bool nulls, bool tableforest,
154 const char *targetns, bool top_level);
155 static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
156 bool nulls, bool tableforest, const char *targetns);
157 static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
158 List *relid_list, bool nulls,
159 bool tableforest, const char *targetns);
160 static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
161 bool nulls, bool tableforest,
162 const char *targetns);
163 static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
164 static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
165 static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
166 static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
167 char *tablename, bool nulls, bool tableforest,
168 const char *targetns, bool top_level);
170 /* XMLTABLE support */
172 /* random number to identify XmlTableContext */
173 #define XMLTABLE_CONTEXT_MAGIC 46922182
174 typedef struct XmlTableBuilderData
179 PgXmlErrorContext *xmlerrcxt;
180 xmlParserCtxtPtr ctxt;
182 xmlXPathContextPtr xpathcxt;
183 xmlXPathCompExprPtr xpathcomp;
184 xmlXPathObjectPtr xpathobj;
185 xmlXPathCompExprPtr *xpathscomp;
186 } XmlTableBuilderData;
189 static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
190 static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
191 static void XmlTableSetNamespace(struct TableFuncScanState *state, char *name,
193 static void XmlTableSetRowFilter(struct TableFuncScanState *state, char *path);
194 static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
195 char *path, int colnum);
196 static bool XmlTableFetchRow(struct TableFuncScanState *state);
197 static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
198 Oid typid, int32 typmod, bool *isnull);
199 static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
201 const TableFuncRoutine XmlTableRoutine =
205 XmlTableSetNamespace,
206 XmlTableSetRowFilter,
207 XmlTableSetColumnFilter,
210 XmlTableDestroyOpaque
213 #define NO_XML_SUPPORT() \
215 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
216 errmsg("unsupported XML feature"), \
217 errdetail("This functionality requires the server to be built with libxml support."), \
218 errhint("You need to rebuild PostgreSQL using --with-libxml.")))
221 /* from SQL/XML:2008 section 4.9 */
222 #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
223 #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
224 #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
230 xmlChar_to_encoding(const xmlChar *encoding_name)
232 int encoding = pg_char_to_encoding((const char *) encoding_name);
236 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
237 errmsg("invalid encoding name \"%s\"",
238 (const char *) encoding_name)));
245 * xml_in uses a plain C string to VARDATA conversion, so for the time being
246 * we use the conversion function for the text datatype.
248 * This is only acceptable so long as xmltype and text use the same
252 xml_in(PG_FUNCTION_ARGS)
255 char *s = PG_GETARG_CSTRING(0);
259 vardata = (xmltype *) cstring_to_text(s);
262 * Parse the data to check if it is well-formed XML data. Assume that
263 * ERROR occurred if parsing failed.
265 doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding());
268 PG_RETURN_XML_P(vardata);
276 #define PG_XML_DEFAULT_VERSION "1.0"
280 * xml_out_internal uses a plain VARDATA to C string conversion, so for the
281 * time being we use the conversion function for the text datatype.
283 * This is only acceptable so long as xmltype and text use the same
287 xml_out_internal(xmltype *x, pg_enc target_encoding)
289 char *str = text_to_cstring((text *) x);
292 size_t len = strlen(str);
297 if ((res_code = parse_xml_decl((xmlChar *) str,
298 &len, &version, NULL, &standalone)) == 0)
302 initStringInfo(&buf);
304 if (!print_xml_decl(&buf, version, target_encoding, standalone))
307 * If we are not going to produce an XML declaration, eat a single
308 * newline in the original string to prevent empty first lines in
311 if (*(str + len) == '\n')
314 appendStringInfoString(&buf, str + len);
321 xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
322 "could not parse XML declaration in stored value",
330 xml_out(PG_FUNCTION_ARGS)
332 xmltype *x = PG_GETARG_XML_P(0);
335 * xml_out removes the encoding property in all cases. This is because we
336 * cannot control from here whether the datum will be converted to a
337 * different client encoding, so we'd do more harm than good by including
340 PG_RETURN_CSTRING(xml_out_internal(x, 0));
345 xml_recv(PG_FUNCTION_ARGS)
348 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
354 xmlChar *encodingStr = NULL;
358 * Read the data in raw format. We don't know yet what the encoding is, as
359 * that information is embedded in the xml declaration; so we have to
360 * parse that before converting to server encoding.
362 nbytes = buf->len - buf->cursor;
363 str = (char *) pq_getmsgbytes(buf, nbytes);
366 * We need a null-terminated string to pass to parse_xml_decl(). Rather
367 * than make a separate copy, make the temporary result one byte bigger
368 * than it needs to be.
370 result = palloc(nbytes + 1 + VARHDRSZ);
371 SET_VARSIZE(result, nbytes + VARHDRSZ);
372 memcpy(VARDATA(result), str, nbytes);
373 str = VARDATA(result);
376 parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
379 * If encoding wasn't explicitly specified in the XML header, treat it as
380 * UTF-8, as that's the default in XML. This is different from xml_in(),
381 * where the input has to go through the normal client to server encoding
384 encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
387 * Parse the data to check if it is well-formed XML data. Assume that
388 * xml_parse will throw ERROR if not.
390 doc = xml_parse(result, xmloption, true, encoding);
393 /* Now that we know what we're dealing with, convert to server encoding */
394 newstr = pg_any_to_server(str, nbytes, encoding);
399 result = (xmltype *) cstring_to_text(newstr);
403 PG_RETURN_XML_P(result);
412 xml_send(PG_FUNCTION_ARGS)
414 xmltype *x = PG_GETARG_XML_P(0);
419 * xml_out_internal doesn't convert the encoding, it just prints the right
420 * declaration. pq_sendtext will do the conversion.
422 outval = xml_out_internal(x, pg_get_client_encoding());
424 pq_begintypsend(&buf);
425 pq_sendtext(&buf, outval, strlen(outval));
427 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
433 appendStringInfoText(StringInfo str, const text *t)
435 appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
441 stringinfo_to_xmltype(StringInfo buf)
443 return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
448 cstring_to_xmltype(const char *string)
450 return (xmltype *) cstring_to_text(string);
456 xmlBuffer_to_xmltype(xmlBufferPtr buf)
458 return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
459 xmlBufferLength(buf));
465 xmlcomment(PG_FUNCTION_ARGS)
468 text *arg = PG_GETARG_TEXT_P(0);
469 char *argdata = VARDATA(arg);
470 int len = VARSIZE(arg) - VARHDRSZ;
474 /* check for "--" in string or "-" at the end */
475 for (i = 1; i < len; i++)
477 if (argdata[i] == '-' && argdata[i - 1] == '-')
479 (errcode(ERRCODE_INVALID_XML_COMMENT),
480 errmsg("invalid XML comment")));
482 if (len > 0 && argdata[len - 1] == '-')
484 (errcode(ERRCODE_INVALID_XML_COMMENT),
485 errmsg("invalid XML comment")));
487 initStringInfo(&buf);
488 appendStringInfoString(&buf, "<!--");
489 appendStringInfoText(&buf, arg);
490 appendStringInfoString(&buf, "-->");
492 PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
502 * TODO: xmlconcat needs to merge the notations and unparsed entities
503 * of the argument values. Not very important in practice, though.
506 xmlconcat(List *args)
509 int global_standalone = 1;
510 xmlChar *global_version = NULL;
511 bool global_version_no_value = false;
515 initStringInfo(&buf);
518 xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
524 len = VARSIZE(x) - VARHDRSZ;
525 str = text_to_cstring((text *) x);
527 parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
529 if (standalone == 0 && global_standalone == 1)
530 global_standalone = 0;
532 global_standalone = -1;
535 global_version_no_value = true;
536 else if (!global_version)
537 global_version = version;
538 else if (xmlStrcmp(version, global_version) != 0)
539 global_version_no_value = true;
541 appendStringInfoString(&buf, str + len);
545 if (!global_version_no_value || global_standalone >= 0)
549 initStringInfo(&buf2);
551 print_xml_decl(&buf2,
552 (!global_version_no_value) ? global_version : NULL,
556 appendStringInfoString(&buf2, buf.data);
560 return stringinfo_to_xmltype(&buf);
572 xmlconcat2(PG_FUNCTION_ARGS)
579 PG_RETURN_XML_P(PG_GETARG_XML_P(1));
581 else if (PG_ARGISNULL(1))
582 PG_RETURN_XML_P(PG_GETARG_XML_P(0));
584 PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
585 PG_GETARG_XML_P(1))));
590 texttoxml(PG_FUNCTION_ARGS)
592 text *data = PG_GETARG_TEXT_P(0);
594 PG_RETURN_XML_P(xmlparse(data, xmloption, true));
599 xmltotext(PG_FUNCTION_ARGS)
601 xmltype *data = PG_GETARG_XML_P(0);
603 /* It's actually binary compatible. */
604 PG_RETURN_TEXT_P((text *) data);
609 xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg)
611 if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data))
613 (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
614 errmsg("not an XML document")));
616 /* It's actually binary compatible, save for the above check. */
617 return (text *) data;
622 xmlelement(XmlExprState *xmlExpr, ExprContext *econtext)
625 XmlExpr *xexpr = (XmlExpr *) xmlExpr->xprstate.expr;
627 List *named_arg_strings;
632 PgXmlErrorContext *xmlerrcxt;
633 volatile xmlBufferPtr buf = NULL;
634 volatile xmlTextWriterPtr writer = NULL;
637 * We first evaluate all the arguments, then start up libxml and create
638 * the result. This avoids issues if one of the arguments involves a call
639 * to some other function or subsystem that wants to use libxml on its own
642 named_arg_strings = NIL;
644 foreach(arg, xmlExpr->named_args)
646 ExprState *e = (ExprState *) lfirst(arg);
651 value = ExecEvalExpr(e, econtext, &isnull);
655 str = map_sql_value_to_xml_value(value, exprType((Node *) e->expr), false);
656 named_arg_strings = lappend(named_arg_strings, str);
661 foreach(arg, xmlExpr->args)
663 ExprState *e = (ExprState *) lfirst(arg);
668 value = ExecEvalExpr(e, econtext, &isnull);
669 /* here we can just forget NULL elements immediately */
672 str = map_sql_value_to_xml_value(value,
673 exprType((Node *) e->expr), true);
674 arg_strings = lappend(arg_strings, str);
678 /* now safe to run libxml */
679 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
683 buf = xmlBufferCreate();
684 if (buf == NULL || xmlerrcxt->err_occurred)
685 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
686 "could not allocate xmlBuffer");
687 writer = xmlNewTextWriterMemory(buf, 0);
688 if (writer == NULL || xmlerrcxt->err_occurred)
689 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
690 "could not allocate xmlTextWriter");
692 xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
694 forboth(arg, named_arg_strings, narg, xexpr->arg_names)
696 char *str = (char *) lfirst(arg);
697 char *argname = strVal(lfirst(narg));
700 xmlTextWriterWriteAttribute(writer,
705 foreach(arg, arg_strings)
707 char *str = (char *) lfirst(arg);
709 xmlTextWriterWriteRaw(writer, (xmlChar *) str);
712 xmlTextWriterEndElement(writer);
714 /* we MUST do this now to flush data out to the buffer ... */
715 xmlFreeTextWriter(writer);
718 result = xmlBuffer_to_xmltype(buf);
723 xmlFreeTextWriter(writer);
727 pg_xml_done(xmlerrcxt, true);
735 pg_xml_done(xmlerrcxt, false);
746 xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
751 doc = xml_parse(data, xmloption_arg, preserve_whitespace,
752 GetDatabaseEncoding());
755 return (xmltype *) data;
764 xmlpi(char *target, text *arg, bool arg_is_null, bool *result_is_null)
770 if (pg_strcasecmp(target, "xml") == 0)
772 (errcode(ERRCODE_SYNTAX_ERROR), /* really */
773 errmsg("invalid XML processing instruction"),
774 errdetail("XML processing instruction target name cannot be \"%s\".", target)));
777 * Following the SQL standard, the null check comes after the syntax check
780 *result_is_null = arg_is_null;
784 initStringInfo(&buf);
786 appendStringInfo(&buf, "<?%s", target);
792 string = text_to_cstring(arg);
793 if (strstr(string, "?>") != NULL)
795 (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
796 errmsg("invalid XML processing instruction"),
797 errdetail("XML processing instruction cannot contain \"?>\".")));
799 appendStringInfoChar(&buf, ' ');
800 appendStringInfoString(&buf, string + strspn(string, " "));
803 appendStringInfoString(&buf, "?>");
805 result = stringinfo_to_xmltype(&buf);
816 xmlroot(xmltype *data, text *version, int standalone)
821 xmlChar *orig_version;
825 len = VARSIZE(data) - VARHDRSZ;
826 str = text_to_cstring((text *) data);
828 parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
831 orig_version = xml_text2xmlChar(version);
837 case XML_STANDALONE_YES:
840 case XML_STANDALONE_NO:
843 case XML_STANDALONE_NO_VALUE:
844 orig_standalone = -1;
846 case XML_STANDALONE_OMITTED:
847 /* leave original value */
851 initStringInfo(&buf);
852 print_xml_decl(&buf, orig_version, 0, orig_standalone);
853 appendStringInfoString(&buf, str + len);
855 return stringinfo_to_xmltype(&buf);
864 * Validate document (given as string) against DTD (given as external link)
866 * This has been removed because it is a security hole: unprivileged users
867 * should not be able to use Postgres to fetch arbitrary external files,
868 * which unfortunately is exactly what libxml is willing to do with the DTD
872 xmlvalidate(PG_FUNCTION_ARGS)
875 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
876 errmsg("xmlvalidate is not implemented")));
882 xml_is_document(xmltype *arg)
886 volatile xmlDocPtr doc = NULL;
887 MemoryContext ccxt = CurrentMemoryContext;
889 /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
892 doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
893 GetDatabaseEncoding());
901 ecxt = MemoryContextSwitchTo(ccxt);
902 errdata = CopyErrorData();
903 if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT)
910 MemoryContextSwitchTo(ecxt);
920 #else /* not USE_LIBXML */
923 #endif /* not USE_LIBXML */
930 * pg_xml_init_library --- set up for use of libxml
932 * This should be called by each function that is about to use libxml
933 * facilities but doesn't require error handling. It initializes libxml
934 * and verifies compatibility with the loaded libxml version. These are
935 * once-per-session activities.
937 * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
941 pg_xml_init_library(void)
943 static bool first_time = true;
947 /* Stuff we need do only once per session */
950 * Currently, we have no pure UTF-8 support for internals -- check if
953 if (sizeof(char) != sizeof(xmlChar))
955 (errmsg("could not initialize XML library"),
956 errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
957 (int) sizeof(char), (int) sizeof(xmlChar))));
959 #ifdef USE_LIBXMLCONTEXT
960 /* Set up libxml's memory allocation our way */
964 /* Check library compatibility */
972 * pg_xml_init --- set up for use of libxml and register an error handler
974 * This should be called by each function that is about to use libxml
975 * facilities and requires error handling. It initializes libxml with
976 * pg_xml_init_library() and establishes our libxml error handler.
978 * strictness determines which errors are reported and which are ignored.
980 * Calls to this function MUST be followed by a PG_TRY block that guarantees
981 * that pg_xml_done() is called during either normal or error exit.
983 * This is exported for use by contrib/xml2, as well as other code that might
984 * wish to share use of this module's libxml error handler.
987 pg_xml_init(PgXmlStrictness strictness)
989 PgXmlErrorContext *errcxt;
992 /* Do one-time setup if needed */
993 pg_xml_init_library();
995 /* Create error handling context structure */
996 errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
997 errcxt->magic = ERRCXT_MAGIC;
998 errcxt->strictness = strictness;
999 errcxt->err_occurred = false;
1000 initStringInfo(&errcxt->err_buf);
1003 * Save original error handler and install ours. libxml originally didn't
1004 * distinguish between the contexts for generic and for structured error
1005 * handlers. If we're using an old libxml version, we must thus save the
1006 * generic error context, even though we're using a structured error
1009 errcxt->saved_errfunc = xmlStructuredError;
1011 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1012 errcxt->saved_errcxt = xmlStructuredErrorContext;
1014 errcxt->saved_errcxt = xmlGenericErrorContext;
1017 xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler);
1020 * Verify that xmlSetStructuredErrorFunc set the context variable we
1021 * expected it to. If not, the error context pointer we just saved is not
1022 * the correct thing to restore, and since that leaves us without a way to
1023 * restore the context in pg_xml_done, we must fail.
1025 * The only known situation in which this test fails is if we compile with
1026 * headers from a libxml2 that doesn't track the structured error context
1027 * separately (< 2.7.4), but at runtime use a version that does, or vice
1028 * versa. The libxml2 authors did not treat that change as constituting
1029 * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1030 * fails to protect us from this.
1033 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1034 new_errcxt = xmlStructuredErrorContext;
1036 new_errcxt = xmlGenericErrorContext;
1039 if (new_errcxt != (void *) errcxt)
1041 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1042 errmsg("could not set up XML error handler"),
1043 errhint("This probably indicates that the version of libxml2"
1044 " being used is not compatible with the libxml2"
1045 " header files that PostgreSQL was built with.")));
1048 * Also, install an entity loader to prevent unwanted fetches of external
1051 errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1052 xmlSetExternalEntityLoader(xmlPgEntityLoader);
1059 * pg_xml_done --- restore previous libxml error handling
1061 * Resets libxml's global error-handling state to what it was before
1062 * pg_xml_init() was called.
1064 * This routine verifies that all pending errors have been dealt with
1065 * (in assert-enabled builds, anyway).
1068 pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1072 /* An assert seems like enough protection here */
1073 Assert(errcxt->magic == ERRCXT_MAGIC);
1076 * In a normal exit, there should be no un-handled libxml errors. But we
1077 * shouldn't try to enforce this during error recovery, since the longjmp
1078 * could have been thrown before xml_ereport had a chance to run.
1080 Assert(!errcxt->err_occurred || isError);
1083 * Check that libxml's global state is correct, warn if not. This is a
1084 * real test and not an Assert because it has a higher probability of
1087 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1088 cur_errcxt = xmlStructuredErrorContext;
1090 cur_errcxt = xmlGenericErrorContext;
1093 if (cur_errcxt != (void *) errcxt)
1094 elog(WARNING, "libxml error handling state is out of sync with xml.c");
1096 /* Restore the saved handlers */
1097 xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1098 xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1101 * Mark the struct as invalid, just in case somebody somehow manages to
1102 * call xml_errorHandler or xml_ereport with it.
1106 /* Release memory */
1107 pfree(errcxt->err_buf.data);
1113 * pg_xml_error_occurred() --- test the error flag
1116 pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1118 return errcxt->err_occurred;
1123 * SQL/XML allows storing "XML documents" or "XML content". "XML
1124 * documents" are specified by the XML specification and are parsed
1125 * easily by libxml. "XML content" is specified by SQL/XML as the
1126 * production "XMLDecl? content". But libxml can only parse the
1127 * "content" part, so we have to parse the XML declaration ourselves
1131 #define CHECK_XML_SPACE(p) \
1133 if (!xmlIsBlank_ch(*(p))) \
1134 return XML_ERR_SPACE_REQUIRED; \
1137 #define SKIP_XML_SPACE(p) \
1138 while (xmlIsBlank_ch(*(p))) (p)++
1140 /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1141 /* Beware of multiple evaluations of argument! */
1142 #define PG_XMLISNAMECHAR(c) \
1143 (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1144 || xmlIsDigit_ch(c) \
1145 || c == '.' || c == '-' || c == '_' || c == ':' \
1146 || xmlIsCombiningQ(c) \
1147 || xmlIsExtender_ch(c))
1149 /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1151 xml_pnstrdup(const xmlChar *str, size_t len)
1155 result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1156 memcpy(result, str, len * sizeof(xmlChar));
1161 /* Ditto, except input is char* */
1163 pg_xmlCharStrndup(char *str, size_t len)
1167 result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1168 memcpy(result, str, len);
1175 * str is the null-terminated input string. Remaining arguments are
1176 * output arguments; each can be NULL if value is not wanted.
1177 * version and encoding are returned as locally-palloc'd strings.
1178 * Result is 0 if OK, an error code if not.
1181 parse_xml_decl(const xmlChar *str, size_t *lenp,
1182 xmlChar **version, xmlChar **encoding, int *standalone)
1185 const xmlChar *save_p;
1191 * Only initialize libxml. We don't need error handling here, but we do
1192 * need to make sure libxml is initialized before calling any of its
1193 * functions. Note that this is safe (and a no-op) if caller has already
1194 * done pg_xml_init().
1196 pg_xml_init_library();
1198 /* Initialize output arguments to "not present" */
1208 if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1211 /* if next char is name char, it's a PI like <?xml-stylesheet ...?> */
1212 utf8len = strlen((const char *) (p + 5));
1213 utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1214 if (PG_XMLISNAMECHAR(utf8char))
1222 if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1223 return XML_ERR_VERSION_MISSING;
1227 return XML_ERR_VERSION_MISSING;
1231 if (*p == '\'' || *p == '"')
1235 q = xmlStrchr(p + 1, *p);
1237 return XML_ERR_VERSION_MISSING;
1240 *version = xml_pnstrdup(p + 1, q - p - 1);
1244 return XML_ERR_VERSION_MISSING;
1249 if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1251 CHECK_XML_SPACE(save_p);
1255 return XML_ERR_MISSING_ENCODING;
1259 if (*p == '\'' || *p == '"')
1263 q = xmlStrchr(p + 1, *p);
1265 return XML_ERR_MISSING_ENCODING;
1268 *encoding = xml_pnstrdup(p + 1, q - p - 1);
1272 return XML_ERR_MISSING_ENCODING;
1282 if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1284 CHECK_XML_SPACE(save_p);
1288 return XML_ERR_STANDALONE_VALUE;
1291 if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1292 xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1298 else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1299 xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1306 return XML_ERR_STANDALONE_VALUE;
1314 if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1315 return XML_ERR_XMLDECL_NOT_FINISHED;
1321 for (p = str; p < str + len; p++)
1323 return XML_ERR_INVALID_CHAR;
1333 * Write an XML declaration. On output, we adjust the XML declaration
1334 * as follows. (These rules are the moral equivalent of the clause
1335 * "Serialization of an XML value" in the SQL standard.)
1337 * We try to avoid generating an XML declaration if possible. This is
1338 * so that you don't get trivial things like xml '<foo/>' resulting in
1339 * '<?xml version="1.0"?><foo/>', which would surely be annoying. We
1340 * must provide a declaration if the standalone property is specified
1341 * or if we include an encoding declaration. If we have a
1342 * declaration, we must specify a version (XML requires this).
1343 * Otherwise we only make a declaration if the version is not "1.0",
1344 * which is the default version specified in SQL:2003.
1347 print_xml_decl(StringInfo buf, const xmlChar *version,
1348 pg_enc encoding, int standalone)
1350 if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1351 || (encoding && encoding != PG_UTF8)
1352 || standalone != -1)
1354 appendStringInfoString(buf, "<?xml");
1357 appendStringInfo(buf, " version=\"%s\"", version);
1359 appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1361 if (encoding && encoding != PG_UTF8)
1364 * XXX might be useful to convert this to IANA names (ISO-8859-1
1365 * instead of LATIN1 etc.); needs field experience
1367 appendStringInfo(buf, " encoding=\"%s\"",
1368 pg_encoding_to_char(encoding));
1371 if (standalone == 1)
1372 appendStringInfoString(buf, " standalone=\"yes\"");
1373 else if (standalone == 0)
1374 appendStringInfoString(buf, " standalone=\"no\"");
1375 appendStringInfoString(buf, "?>");
1385 * Convert a C string to XML internal representation
1387 * Note: it is caller's responsibility to xmlFreeDoc() the result,
1388 * else a permanent memory leak will ensue!
1390 * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1391 * yet do not use SAX - see xmlreader.c)
1394 xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1399 xmlChar *utf8string;
1400 PgXmlErrorContext *xmlerrcxt;
1401 volatile xmlParserCtxtPtr ctxt = NULL;
1402 volatile xmlDocPtr doc = NULL;
1404 len = VARSIZE(data) - VARHDRSZ; /* will be useful later */
1405 string = xml_text2xmlChar(data);
1407 utf8string = pg_do_encoding_conversion(string,
1412 /* Start up libxml and its parser */
1413 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1415 /* Use a TRY block to ensure we clean up correctly */
1420 ctxt = xmlNewParserCtxt();
1421 if (ctxt == NULL || xmlerrcxt->err_occurred)
1422 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1423 "could not allocate parser context");
1425 if (xmloption_arg == XMLOPTION_DOCUMENT)
1428 * Note, that here we try to apply DTD defaults
1429 * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
1430 * 'Default values defined by internal DTD are applied'. As for
1431 * external DTDs, we try to support them too, (see SQL/XML:2008 GR
1434 doc = xmlCtxtReadDoc(ctxt, utf8string,
1437 XML_PARSE_NOENT | XML_PARSE_DTDATTR
1438 | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
1439 if (doc == NULL || xmlerrcxt->err_occurred)
1440 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
1441 "invalid XML document");
1450 res_code = parse_xml_decl(utf8string,
1451 &count, &version, NULL, &standalone);
1453 xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
1454 "invalid XML content: invalid XML declaration",
1457 doc = xmlNewDoc(version);
1458 Assert(doc->encoding == NULL);
1459 doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1460 doc->standalone = standalone;
1462 /* allow empty content */
1463 if (*(utf8string + count))
1465 res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1466 utf8string + count, NULL);
1467 if (res_code != 0 || xmlerrcxt->err_occurred)
1468 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1469 "invalid XML content");
1478 xmlFreeParserCtxt(ctxt);
1480 pg_xml_done(xmlerrcxt, true);
1486 xmlFreeParserCtxt(ctxt);
1488 pg_xml_done(xmlerrcxt, false);
1495 * xmlChar<->text conversions
1498 xml_text2xmlChar(text *in)
1500 return (xmlChar *) text_to_cstring(in);
1504 #ifdef USE_LIBXMLCONTEXT
1507 * Manage the special context used for all libxml allocations (but only
1508 * in special debug builds; see notes at top of file)
1511 xml_memory_init(void)
1513 /* Create memory context if not there already */
1514 if (LibxmlContext == NULL)
1515 LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1517 ALLOCSET_DEFAULT_SIZES);
1519 /* Re-establish the callbacks even if already set */
1520 xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1524 * Wrappers for memory management functions
1527 xml_palloc(size_t size)
1529 return MemoryContextAlloc(LibxmlContext, size);
1534 xml_repalloc(void *ptr, size_t size)
1536 return repalloc(ptr, size);
1541 xml_pfree(void *ptr)
1543 /* At least some parts of libxml assume xmlFree(NULL) is allowed */
1550 xml_pstrdup(const char *string)
1552 return MemoryContextStrdup(LibxmlContext, string);
1554 #endif /* USE_LIBXMLCONTEXT */
1558 * xmlPgEntityLoader --- entity loader callback function
1560 * Silently prevent any external entity URL from being loaded. We don't want
1561 * to throw an error, so instead make the entity appear to expand to an empty
1564 * We would prefer to allow loading entities that exist in the system's
1565 * global XML catalog; but the available libxml2 APIs make that a complex
1566 * and fragile task. For now, just shut down all external access.
1568 static xmlParserInputPtr
1569 xmlPgEntityLoader(const char *URL, const char *ID,
1570 xmlParserCtxtPtr ctxt)
1572 return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
1577 * xml_ereport --- report an XML-related error
1579 * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
1580 * standard. This function adds libxml's native error message, if any, as
1583 * This is exported for modules that want to share the core libxml error
1584 * handler. Note that pg_xml_init() *must* have been called previously.
1587 xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
1591 /* Defend against someone passing us a bogus context struct */
1592 if (errcxt->magic != ERRCXT_MAGIC)
1593 elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
1595 /* Flag that the current libxml error has been reported */
1596 errcxt->err_occurred = false;
1598 /* Include detail only if we have some text from libxml */
1599 if (errcxt->err_buf.len > 0)
1600 detail = errcxt->err_buf.data;
1606 errmsg_internal("%s", msg),
1607 detail ? errdetail_internal("%s", detail) : 0));
1612 * Error handler for libxml errors and warnings
1615 xml_errorHandler(void *data, xmlErrorPtr error)
1617 PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
1618 xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
1619 xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
1620 xmlNodePtr node = error->node;
1621 const xmlChar *name = (node != NULL &&
1622 node->type == XML_ELEMENT_NODE) ? node->name : NULL;
1623 int domain = error->domain;
1624 int level = error->level;
1625 StringInfo errorBuf;
1628 * Defend against someone passing us a bogus context struct.
1630 * We force a backend exit if this check fails because longjmp'ing out of
1631 * libxml would likely render it unsafe to use further.
1633 if (xmlerrcxt->magic != ERRCXT_MAGIC)
1634 elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
1637 * Older libxml versions report some errors differently.
1638 * First, some errors were previously reported as coming from the parser
1639 * domain but are now reported as coming from the namespace domain.
1640 * Second, some warnings were upgraded to errors.
1641 * We attempt to compensate for that here.
1644 switch (error->code)
1646 case XML_WAR_NS_URI:
1647 level = XML_ERR_ERROR;
1648 domain = XML_FROM_NAMESPACE;
1651 case XML_ERR_NS_DECL_ERROR:
1652 case XML_WAR_NS_URI_RELATIVE:
1653 case XML_WAR_NS_COLUMN:
1654 case XML_NS_ERR_XML_NAMESPACE:
1655 case XML_NS_ERR_UNDEFINED_NAMESPACE:
1656 case XML_NS_ERR_QNAME:
1657 case XML_NS_ERR_ATTRIBUTE_REDEFINED:
1658 case XML_NS_ERR_EMPTY:
1659 domain = XML_FROM_NAMESPACE;
1663 /* Decide whether to act on the error or not */
1666 case XML_FROM_PARSER:
1668 case XML_FROM_MEMORY:
1672 * Suppress warnings about undeclared entities. We need to do
1673 * this to avoid problems due to not loading DTD definitions.
1675 if (error->code == XML_WAR_UNDECLARED_ENTITY)
1678 /* Otherwise, accept error regardless of the parsing purpose */
1682 /* Ignore error if only doing well-formedness check */
1683 if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
1688 /* Prepare error message in errorBuf */
1689 errorBuf = makeStringInfo();
1691 if (error->line > 0)
1692 appendStringInfo(errorBuf, "line %d: ", error->line);
1694 appendStringInfo(errorBuf, "element %s: ", name);
1695 appendStringInfoString(errorBuf, error->message);
1698 * Append context information to errorBuf.
1700 * xmlParserPrintFileContext() uses libxml's "generic" error handler to
1701 * write the context. Since we don't want to duplicate libxml
1702 * functionality here, we set up a generic error handler temporarily.
1704 * We use appendStringInfo() directly as libxml's generic error handler.
1705 * This should work because it has essentially the same signature as
1706 * libxml expects, namely (void *ptr, const char *msg, ...).
1710 xmlGenericErrorFunc errFuncSaved = xmlGenericError;
1711 void *errCtxSaved = xmlGenericErrorContext;
1713 xmlSetGenericErrorFunc((void *) errorBuf,
1714 (xmlGenericErrorFunc) appendStringInfo);
1716 /* Add context information to errorBuf */
1717 appendStringInfoLineSeparator(errorBuf);
1719 xmlParserPrintFileContext(input);
1721 /* Restore generic error func */
1722 xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
1725 /* Get rid of any trailing newlines in errorBuf */
1726 chopStringInfoNewlines(errorBuf);
1729 * Legacy error handling mode. err_occurred is never set, we just add the
1730 * message to err_buf. This mode exists because the xml2 contrib module
1731 * uses our error-handling infrastructure, but we don't want to change its
1732 * behaviour since it's deprecated anyway. This is also why we don't
1733 * distinguish between notices, warnings and errors here --- the old-style
1734 * generic error handler wouldn't have done that either.
1736 if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
1738 appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1739 appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
1741 pfree(errorBuf->data);
1747 * We don't want to ereport() here because that'd probably leave libxml in
1748 * an inconsistent state. Instead, we remember the error and ereport()
1749 * from xml_ereport().
1751 * Warnings and notices can be reported immediately since they won't cause
1752 * a longjmp() out of libxml.
1754 if (level >= XML_ERR_ERROR)
1756 appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1757 appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
1759 xmlerrcxt->err_occurred = true;
1761 else if (level >= XML_ERR_WARNING)
1764 (errmsg_internal("%s", errorBuf->data)));
1769 (errmsg_internal("%s", errorBuf->data)));
1772 pfree(errorBuf->data);
1778 * Wrapper for "ereport" function for XML-related errors. The "msg"
1779 * is the SQL-level message; some can be adopted from the SQL/XML
1780 * standard. This function uses "code" to create a textual detail
1781 * message. At the moment, we only need to cover those codes that we
1782 * may raise in this file.
1785 xml_ereport_by_code(int level, int sqlcode,
1786 const char *msg, int code)
1792 case XML_ERR_INVALID_CHAR:
1793 det = gettext_noop("Invalid character value.");
1795 case XML_ERR_SPACE_REQUIRED:
1796 det = gettext_noop("Space required.");
1798 case XML_ERR_STANDALONE_VALUE:
1799 det = gettext_noop("standalone accepts only 'yes' or 'no'.");
1801 case XML_ERR_VERSION_MISSING:
1802 det = gettext_noop("Malformed declaration: missing version.");
1804 case XML_ERR_MISSING_ENCODING:
1805 det = gettext_noop("Missing encoding in text declaration.");
1807 case XML_ERR_XMLDECL_NOT_FINISHED:
1808 det = gettext_noop("Parsing XML declaration: '?>' expected.");
1811 det = gettext_noop("Unrecognized libxml error code: %d.");
1817 errmsg_internal("%s", msg),
1818 errdetail(det, code)));
1823 * Remove all trailing newlines from a StringInfo string
1826 chopStringInfoNewlines(StringInfo str)
1828 while (str->len > 0 && str->data[str->len - 1] == '\n')
1829 str->data[--str->len] = '\0';
1834 * Append a newline after removing any existing trailing newlines
1837 appendStringInfoLineSeparator(StringInfo str)
1839 chopStringInfoNewlines(str);
1841 appendStringInfoChar(str, '\n');
1846 * Convert one char in the current server encoding to a Unicode codepoint.
1849 sqlchar_to_unicode(char *s)
1852 pg_wchar ret[2]; /* need space for trailing zero */
1854 /* note we're not assuming s is null-terminated */
1855 utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
1857 pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
1858 pg_encoding_mblen(PG_UTF8, utf8string));
1860 if (utf8string != s)
1868 is_valid_xml_namefirst(pg_wchar c)
1870 /* (Letter | '_' | ':') */
1871 return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1872 || c == '_' || c == ':');
1877 is_valid_xml_namechar(pg_wchar c)
1879 /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1880 return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1882 || c == '.' || c == '-' || c == '_' || c == ':'
1883 || xmlIsCombiningQ(c)
1884 || xmlIsExtenderQ(c));
1886 #endif /* USE_LIBXML */
1890 * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
1893 map_sql_identifier_to_xml_name(char *ident, bool fully_escaped,
1901 * SQL/XML doesn't make use of this case anywhere, so it's probably a
1904 Assert(fully_escaped || !escape_period);
1906 initStringInfo(&buf);
1908 for (p = ident; *p; p += pg_mblen(p))
1910 if (*p == ':' && (p == ident || fully_escaped))
1911 appendStringInfoString(&buf, "_x003A_");
1912 else if (*p == '_' && *(p + 1) == 'x')
1913 appendStringInfoString(&buf, "_x005F_");
1914 else if (fully_escaped && p == ident &&
1915 pg_strncasecmp(p, "xml", 3) == 0)
1918 appendStringInfoString(&buf, "_x0078_");
1920 appendStringInfoString(&buf, "_x0058_");
1922 else if (escape_period && *p == '.')
1923 appendStringInfoString(&buf, "_x002E_");
1926 pg_wchar u = sqlchar_to_unicode(p);
1929 ? !is_valid_xml_namefirst(u)
1930 : !is_valid_xml_namechar(u))
1931 appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
1933 appendBinaryStringInfo(&buf, p, pg_mblen(p));
1938 #else /* not USE_LIBXML */
1941 #endif /* not USE_LIBXML */
1946 * Map a Unicode codepoint into the current server encoding.
1949 unicode_to_sqlchar(pg_wchar c)
1951 char utf8string[8]; /* need room for trailing zero */
1954 memset(utf8string, 0, sizeof(utf8string));
1955 unicode_to_utf8(c, (unsigned char *) utf8string);
1957 result = pg_any_to_server(utf8string, strlen(utf8string), PG_UTF8);
1958 /* if pg_any_to_server didn't strdup, we must */
1959 if (result == utf8string)
1960 result = pstrdup(result);
1966 * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
1969 map_xml_name_to_sql_identifier(char *name)
1974 initStringInfo(&buf);
1976 for (p = name; *p; p += pg_mblen(p))
1978 if (*p == '_' && *(p + 1) == 'x'
1979 && isxdigit((unsigned char) *(p + 2))
1980 && isxdigit((unsigned char) *(p + 3))
1981 && isxdigit((unsigned char) *(p + 4))
1982 && isxdigit((unsigned char) *(p + 5))
1987 sscanf(p + 2, "%X", &u);
1988 appendStringInfoString(&buf, unicode_to_sqlchar(u));
1992 appendBinaryStringInfo(&buf, p, pg_mblen(p));
1999 * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2001 * When xml_escape_strings is true, then certain characters in string
2002 * values are replaced by entity references (< etc.), as specified
2003 * in SQL/XML:2008 section 9.8 GR 9) a) iii). This is normally what is
2004 * wanted. The false case is mainly useful when the resulting value
2005 * is used with xmlTextWriterWriteAttribute() to write out an
2006 * attribute, because that function does the escaping itself.
2009 map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2011 if (type_is_array_domain(type))
2024 array = DatumGetArrayTypeP(value);
2025 elmtype = ARR_ELEMTYPE(array);
2026 get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
2028 deconstruct_array(array, elmtype,
2029 elmlen, elmbyval, elmalign,
2030 &elem_values, &elem_nulls,
2033 initStringInfo(&buf);
2035 for (i = 0; i < num_elems; i++)
2039 appendStringInfoString(&buf, "<element>");
2040 appendStringInfoString(&buf,
2041 map_sql_value_to_xml_value(elem_values[i],
2043 appendStringInfoString(&buf, "</element>");
2058 * Flatten domains; the special-case treatments below should apply to,
2059 * eg, domains over boolean not just boolean.
2061 type = getBaseType(type);
2064 * Special XSD formatting for some data types
2069 if (DatumGetBool(value))
2078 char buf[MAXDATELEN + 1];
2080 date = DatumGetDateADT(value);
2081 /* XSD doesn't support infinite values */
2082 if (DATE_NOT_FINITE(date))
2084 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2085 errmsg("date out of range"),
2086 errdetail("XML does not support infinite date values.")));
2087 j2date(date + POSTGRES_EPOCH_JDATE,
2088 &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2089 EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2091 return pstrdup(buf);
2096 Timestamp timestamp;
2099 char buf[MAXDATELEN + 1];
2101 timestamp = DatumGetTimestamp(value);
2103 /* XSD doesn't support infinite values */
2104 if (TIMESTAMP_NOT_FINITE(timestamp))
2106 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2107 errmsg("timestamp out of range"),
2108 errdetail("XML does not support infinite timestamp values.")));
2109 else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2110 EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2113 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2114 errmsg("timestamp out of range")));
2116 return pstrdup(buf);
2119 case TIMESTAMPTZOID:
2121 TimestampTz timestamp;
2125 const char *tzn = NULL;
2126 char buf[MAXDATELEN + 1];
2128 timestamp = DatumGetTimestamp(value);
2130 /* XSD doesn't support infinite values */
2131 if (TIMESTAMP_NOT_FINITE(timestamp))
2133 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2134 errmsg("timestamp out of range"),
2135 errdetail("XML does not support infinite timestamp values.")));
2136 else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2137 EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2140 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2141 errmsg("timestamp out of range")));
2143 return pstrdup(buf);
2149 bytea *bstr = DatumGetByteaPP(value);
2150 PgXmlErrorContext *xmlerrcxt;
2151 volatile xmlBufferPtr buf = NULL;
2152 volatile xmlTextWriterPtr writer = NULL;
2155 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2159 buf = xmlBufferCreate();
2160 if (buf == NULL || xmlerrcxt->err_occurred)
2161 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2162 "could not allocate xmlBuffer");
2163 writer = xmlNewTextWriterMemory(buf, 0);
2164 if (writer == NULL || xmlerrcxt->err_occurred)
2165 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2166 "could not allocate xmlTextWriter");
2168 if (xmlbinary == XMLBINARY_BASE64)
2169 xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2170 0, VARSIZE_ANY_EXHDR(bstr));
2172 xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2173 0, VARSIZE_ANY_EXHDR(bstr));
2175 /* we MUST do this now to flush data out to the buffer */
2176 xmlFreeTextWriter(writer);
2179 result = pstrdup((const char *) xmlBufferContent(buf));
2184 xmlFreeTextWriter(writer);
2188 pg_xml_done(xmlerrcxt, true);
2196 pg_xml_done(xmlerrcxt, false);
2200 #endif /* USE_LIBXML */
2205 * otherwise, just use the type's native text representation
2207 getTypeOutputInfo(type, &typeOut, &isvarlena);
2208 str = OidOutputFunctionCall(typeOut, value);
2210 /* ... exactly as-is for XML, and when escaping is not wanted */
2211 if (type == XMLOID || !xml_escape_strings)
2214 /* otherwise, translate special characters as needed */
2215 return escape_xml(str);
2221 * Escape characters in text that have special meanings in XML.
2223 * Returns a palloc'd string.
2225 * NB: this is intentionally not dependent on libxml.
2228 escape_xml(const char *str)
2233 initStringInfo(&buf);
2234 for (p = str; *p; p++)
2239 appendStringInfoString(&buf, "&");
2242 appendStringInfoString(&buf, "<");
2245 appendStringInfoString(&buf, ">");
2248 appendStringInfoString(&buf, "
");
2251 appendStringInfoCharMacro(&buf, *p);
2260 _SPI_strdup(const char *s)
2262 size_t len = strlen(s) + 1;
2263 char *ret = SPI_palloc(len);
2265 memcpy(ret, s, len);
2271 * SQL to XML mapping functions
2273 * What follows below was at one point intentionally organized so that
2274 * you can read along in the SQL/XML standard. The functions are
2275 * mostly split up the way the clauses lay out in the standards
2276 * document, and the identifiers are also aligned with the standard
2277 * text. Unfortunately, SQL/XML:2006 reordered the clauses
2278 * differently than SQL/XML:2003, so the order below doesn't make much
2281 * There are many things going on there:
2283 * There are two kinds of mappings: Mapping SQL data (table contents)
2284 * to XML documents, and mapping SQL structure (the "schema") to XML
2285 * Schema. And there are functions that do both at the same time.
2287 * Then you can map a database, a schema, or a table, each in both
2288 * ways. This breaks down recursively: Mapping a database invokes
2289 * mapping schemas, which invokes mapping tables, which invokes
2290 * mapping rows, which invokes mapping columns, although you can't
2291 * call the last two from the outside. Because of this, there are a
2292 * number of xyz_internal() functions which are to be called both from
2293 * the function manager wrapper and from some upper layer in a
2296 * See the documentation about what the common function arguments
2297 * nulls, tableforest, and targetns mean.
2299 * Some style guidelines for XML output: Use double quotes for quoting
2300 * XML attributes. Indent XML elements by two spaces, but remember
2301 * that a lot of code is called recursively at different levels, so
2302 * it's better not to indent rather than create output that indents
2303 * and outdents weirdly. Add newlines to make the output look nice.
2308 * Visibility of objects for XML mappings; see SQL/XML:2008 section
2313 * Given a query, which must return type oid as first column, produce
2314 * a list of Oids with the query results.
2317 query_to_oid_list(const char *query)
2322 SPI_execute(query, true, 0);
2324 for (i = 0; i < SPI_processed; i++)
2329 oid = SPI_getbinval(SPI_tuptable->vals[i],
2330 SPI_tuptable->tupdesc,
2334 list = lappend_oid(list, DatumGetObjectId(oid));
2342 schema_get_xml_visible_tables(Oid nspid)
2344 StringInfoData query;
2346 initStringInfo(&query);
2347 appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class WHERE relnamespace = %u AND relkind IN ('r', 'm', 'v') AND pg_catalog.has_table_privilege (oid, 'SELECT') ORDER BY relname;", nspid);
2349 return query_to_oid_list(query.data);
2354 * Including the system schemas is probably not useful for a database
2357 #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2359 #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2363 database_get_xml_visible_schemas(void)
2365 return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2370 database_get_xml_visible_tables(void)
2372 /* At the moment there is no order required here. */
2373 return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class WHERE relkind IN ('r', 'm', 'v') AND pg_catalog.has_table_privilege (pg_class.oid, 'SELECT') AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2378 * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2383 table_to_xml_internal(Oid relid,
2384 const char *xmlschema, bool nulls, bool tableforest,
2385 const char *targetns, bool top_level)
2387 StringInfoData query;
2389 initStringInfo(&query);
2390 appendStringInfo(&query, "SELECT * FROM %s",
2391 DatumGetCString(DirectFunctionCall1(regclassout,
2392 ObjectIdGetDatum(relid))));
2393 return query_to_xml_internal(query.data, get_rel_name(relid),
2394 xmlschema, nulls, tableforest,
2395 targetns, top_level);
2400 table_to_xml(PG_FUNCTION_ARGS)
2402 Oid relid = PG_GETARG_OID(0);
2403 bool nulls = PG_GETARG_BOOL(1);
2404 bool tableforest = PG_GETARG_BOOL(2);
2405 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2407 PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2414 query_to_xml(PG_FUNCTION_ARGS)
2416 char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2417 bool nulls = PG_GETARG_BOOL(1);
2418 bool tableforest = PG_GETARG_BOOL(2);
2419 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2421 PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2422 NULL, nulls, tableforest,
2428 cursor_to_xml(PG_FUNCTION_ARGS)
2430 char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2431 int32 count = PG_GETARG_INT32(1);
2432 bool nulls = PG_GETARG_BOOL(2);
2433 bool tableforest = PG_GETARG_BOOL(3);
2434 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2436 StringInfoData result;
2440 initStringInfo(&result);
2443 portal = SPI_cursor_find(name);
2446 (errcode(ERRCODE_UNDEFINED_CURSOR),
2447 errmsg("cursor \"%s\" does not exist", name)));
2449 SPI_cursor_fetch(portal, true, count);
2450 for (i = 0; i < SPI_processed; i++)
2451 SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2452 tableforest, targetns, true);
2456 PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2461 * Write the start tag of the root element of a data mapping.
2463 * top_level means that this is the very top level of the eventual
2464 * output. For example, when the user calls table_to_xml, then a call
2465 * with a table name to this function is the top level. When the user
2466 * calls database_to_xml, then a call with a schema name to this
2467 * function is not the top level. If top_level is false, then the XML
2468 * namespace declarations are omitted, because they supposedly already
2469 * appeared earlier in the output. Repeating them is not wrong, but
2473 xmldata_root_element_start(StringInfo result, const char *eltname,
2474 const char *xmlschema, const char *targetns,
2477 /* This isn't really wrong but currently makes no sense. */
2478 Assert(top_level || !xmlschema);
2480 appendStringInfo(result, "<%s", eltname);
2483 appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
2484 if (strlen(targetns) > 0)
2485 appendStringInfo(result, " xmlns=\"%s\"", targetns);
2489 /* FIXME: better targets */
2490 if (strlen(targetns) > 0)
2491 appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
2493 appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
2495 appendStringInfoString(result, ">\n");
2500 xmldata_root_element_end(StringInfo result, const char *eltname)
2502 appendStringInfo(result, "</%s>\n", eltname);
2507 query_to_xml_internal(const char *query, char *tablename,
2508 const char *xmlschema, bool nulls, bool tableforest,
2509 const char *targetns, bool top_level)
2516 xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
2520 result = makeStringInfo();
2523 if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
2525 (errcode(ERRCODE_DATA_EXCEPTION),
2526 errmsg("invalid query")));
2530 xmldata_root_element_start(result, xmltn, xmlschema,
2531 targetns, top_level);
2532 appendStringInfoChar(result, '\n');
2536 appendStringInfo(result, "%s\n\n", xmlschema);
2538 for (i = 0; i < SPI_processed; i++)
2539 SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
2540 tableforest, targetns, top_level);
2543 xmldata_root_element_end(result, xmltn);
2552 table_to_xmlschema(PG_FUNCTION_ARGS)
2554 Oid relid = PG_GETARG_OID(0);
2555 bool nulls = PG_GETARG_BOOL(1);
2556 bool tableforest = PG_GETARG_BOOL(2);
2557 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2561 rel = heap_open(relid, AccessShareLock);
2562 result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2563 tableforest, targetns);
2564 heap_close(rel, NoLock);
2566 PG_RETURN_XML_P(cstring_to_xmltype(result));
2571 query_to_xmlschema(PG_FUNCTION_ARGS)
2573 char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2574 bool nulls = PG_GETARG_BOOL(1);
2575 bool tableforest = PG_GETARG_BOOL(2);
2576 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2583 if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2584 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2586 if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2587 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2589 result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2591 tableforest, targetns));
2592 SPI_cursor_close(portal);
2595 PG_RETURN_XML_P(cstring_to_xmltype(result));
2600 cursor_to_xmlschema(PG_FUNCTION_ARGS)
2602 char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2603 bool nulls = PG_GETARG_BOOL(1);
2604 bool tableforest = PG_GETARG_BOOL(2);
2605 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2606 const char *xmlschema;
2610 portal = SPI_cursor_find(name);
2613 (errcode(ERRCODE_UNDEFINED_CURSOR),
2614 errmsg("cursor \"%s\" does not exist", name)));
2616 xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2618 tableforest, targetns));
2621 PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
2626 table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2628 Oid relid = PG_GETARG_OID(0);
2629 bool nulls = PG_GETARG_BOOL(1);
2630 bool tableforest = PG_GETARG_BOOL(2);
2631 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2633 const char *xmlschema;
2635 rel = heap_open(relid, AccessShareLock);
2636 xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2637 tableforest, targetns);
2638 heap_close(rel, NoLock);
2640 PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
2641 xmlschema, nulls, tableforest,
2647 query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2649 char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2650 bool nulls = PG_GETARG_BOOL(1);
2651 bool tableforest = PG_GETARG_BOOL(2);
2652 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2654 const char *xmlschema;
2660 if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2661 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2663 if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2664 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2666 xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2667 InvalidOid, nulls, tableforest, targetns));
2668 SPI_cursor_close(portal);
2671 PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2672 xmlschema, nulls, tableforest,
2678 * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
2679 * sections 9.13, 9.14.
2683 schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
2684 bool tableforest, const char *targetns, bool top_level)
2691 xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
2693 result = makeStringInfo();
2695 xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
2696 appendStringInfoChar(result, '\n');
2699 appendStringInfo(result, "%s\n\n", xmlschema);
2703 relid_list = schema_get_xml_visible_tables(nspid);
2705 foreach(cell, relid_list)
2707 Oid relid = lfirst_oid(cell);
2710 subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
2713 appendStringInfoString(result, subres->data);
2714 appendStringInfoChar(result, '\n');
2719 xmldata_root_element_end(result, xmlsn);
2726 schema_to_xml(PG_FUNCTION_ARGS)
2728 Name name = PG_GETARG_NAME(0);
2729 bool nulls = PG_GETARG_BOOL(1);
2730 bool tableforest = PG_GETARG_BOOL(2);
2731 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2736 schemaname = NameStr(*name);
2737 nspid = LookupExplicitNamespace(schemaname, false);
2739 PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
2740 nulls, tableforest, targetns, true)));
2745 * Write the start element of the root element of an XML Schema mapping.
2748 xsd_schema_element_start(StringInfo result, const char *targetns)
2750 appendStringInfoString(result,
2752 " xmlns:xsd=\"" NAMESPACE_XSD "\"");
2753 if (strlen(targetns) > 0)
2754 appendStringInfo(result,
2756 " targetNamespace=\"%s\"\n"
2757 " elementFormDefault=\"qualified\"",
2759 appendStringInfoString(result,
2765 xsd_schema_element_end(StringInfo result)
2767 appendStringInfoString(result, "</xsd:schema>");
2772 schema_to_xmlschema_internal(const char *schemaname, bool nulls,
2773 bool tableforest, const char *targetns)
2781 result = makeStringInfo();
2783 nspid = LookupExplicitNamespace(schemaname, false);
2785 xsd_schema_element_start(result, targetns);
2789 relid_list = schema_get_xml_visible_tables(nspid);
2792 foreach(cell, relid_list)
2796 rel = heap_open(lfirst_oid(cell), AccessShareLock);
2797 tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
2798 heap_close(rel, NoLock);
2801 appendStringInfoString(result,
2802 map_sql_typecoll_to_xmlschema_types(tupdesc_list));
2804 appendStringInfoString(result,
2805 map_sql_schema_to_xmlschema_types(nspid, relid_list,
2806 nulls, tableforest, targetns));
2808 xsd_schema_element_end(result);
2817 schema_to_xmlschema(PG_FUNCTION_ARGS)
2819 Name name = PG_GETARG_NAME(0);
2820 bool nulls = PG_GETARG_BOOL(1);
2821 bool tableforest = PG_GETARG_BOOL(2);
2822 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2824 PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
2825 nulls, tableforest, targetns)));
2830 schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2832 Name name = PG_GETARG_NAME(0);
2833 bool nulls = PG_GETARG_BOOL(1);
2834 bool tableforest = PG_GETARG_BOOL(2);
2835 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2838 StringInfo xmlschema;
2840 schemaname = NameStr(*name);
2841 nspid = LookupExplicitNamespace(schemaname, false);
2843 xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
2844 tableforest, targetns);
2846 PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
2847 xmlschema->data, nulls,
2848 tableforest, targetns, true)));
2853 * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
2854 * sections 9.16, 9.17.
2858 database_to_xml_internal(const char *xmlschema, bool nulls,
2859 bool tableforest, const char *targetns)
2866 xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
2868 result = makeStringInfo();
2870 xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
2871 appendStringInfoChar(result, '\n');
2874 appendStringInfo(result, "%s\n\n", xmlschema);
2878 nspid_list = database_get_xml_visible_schemas();
2880 foreach(cell, nspid_list)
2882 Oid nspid = lfirst_oid(cell);
2885 subres = schema_to_xml_internal(nspid, NULL, nulls,
2886 tableforest, targetns, false);
2888 appendStringInfoString(result, subres->data);
2889 appendStringInfoChar(result, '\n');
2894 xmldata_root_element_end(result, xmlcn);
2901 database_to_xml(PG_FUNCTION_ARGS)
2903 bool nulls = PG_GETARG_BOOL(0);
2904 bool tableforest = PG_GETARG_BOOL(1);
2905 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
2907 PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
2908 tableforest, targetns)));
2913 database_to_xmlschema_internal(bool nulls, bool tableforest,
2914 const char *targetns)
2922 result = makeStringInfo();
2924 xsd_schema_element_start(result, targetns);
2928 relid_list = database_get_xml_visible_tables();
2929 nspid_list = database_get_xml_visible_schemas();
2932 foreach(cell, relid_list)
2936 rel = heap_open(lfirst_oid(cell), AccessShareLock);
2937 tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
2938 heap_close(rel, NoLock);
2941 appendStringInfoString(result,
2942 map_sql_typecoll_to_xmlschema_types(tupdesc_list));
2944 appendStringInfoString(result,
2945 map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
2947 xsd_schema_element_end(result);
2956 database_to_xmlschema(PG_FUNCTION_ARGS)
2958 bool nulls = PG_GETARG_BOOL(0);
2959 bool tableforest = PG_GETARG_BOOL(1);
2960 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
2962 PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
2963 tableforest, targetns)));
2968 database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2970 bool nulls = PG_GETARG_BOOL(0);
2971 bool tableforest = PG_GETARG_BOOL(1);
2972 const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
2973 StringInfo xmlschema;
2975 xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
2977 PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
2978 nulls, tableforest, targetns)));
2983 * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
2987 map_multipart_sql_identifier_to_xml_name(char *a, char *b, char *c, char *d)
2989 StringInfoData result;
2991 initStringInfo(&result);
2994 appendStringInfoString(&result,
2995 map_sql_identifier_to_xml_name(a, true, true));
2997 appendStringInfo(&result, ".%s",
2998 map_sql_identifier_to_xml_name(b, true, true));
3000 appendStringInfo(&result, ".%s",
3001 map_sql_identifier_to_xml_name(c, true, true));
3003 appendStringInfo(&result, ".%s",
3004 map_sql_identifier_to_xml_name(d, true, true));
3011 * Map an SQL table to an XML Schema document; see SQL/XML:2008
3014 * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3018 map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
3019 bool tableforest, const char *targetns)
3023 char *tabletypename;
3025 StringInfoData result;
3027 initStringInfo(&result);
3029 if (OidIsValid(relid))
3032 Form_pg_class reltuple;
3034 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
3035 if (!HeapTupleIsValid(tuple))
3036 elog(ERROR, "cache lookup failed for relation %u", relid);
3037 reltuple = (Form_pg_class) GETSTRUCT(tuple);
3039 xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
3042 tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
3043 get_database_name(MyDatabaseId),
3044 get_namespace_name(reltuple->relnamespace),
3045 NameStr(reltuple->relname));
3047 rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
3048 get_database_name(MyDatabaseId),
3049 get_namespace_name(reltuple->relnamespace),
3050 NameStr(reltuple->relname));
3052 ReleaseSysCache(tuple);
3061 tabletypename = "TableType";
3062 rowtypename = "RowType";
3065 xsd_schema_element_start(&result, targetns);
3067 appendStringInfoString(&result,
3068 map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3070 appendStringInfo(&result,
3071 "<xsd:complexType name=\"%s\">\n"
3072 " <xsd:sequence>\n",
3075 for (i = 0; i < tupdesc->natts; i++)
3077 if (tupdesc->attrs[i]->attisdropped)
3079 appendStringInfo(&result,
3080 " <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3081 map_sql_identifier_to_xml_name(NameStr(tupdesc->attrs[i]->attname),
3083 map_sql_type_to_xml_name(tupdesc->attrs[i]->atttypid, -1),
3084 nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3087 appendStringInfoString(&result,
3088 " </xsd:sequence>\n"
3089 "</xsd:complexType>\n\n");
3093 appendStringInfo(&result,
3094 "<xsd:complexType name=\"%s\">\n"
3096 " <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3097 " </xsd:sequence>\n"
3098 "</xsd:complexType>\n\n",
3099 tabletypename, rowtypename);
3101 appendStringInfo(&result,
3102 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3103 xmltn, tabletypename);
3106 appendStringInfo(&result,
3107 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3108 xmltn, rowtypename);
3110 xsd_schema_element_end(&result);
3117 * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3121 map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3122 bool tableforest, const char *targetns)
3127 char *schematypename;
3128 StringInfoData result;
3131 dbname = get_database_name(MyDatabaseId);
3132 nspname = get_namespace_name(nspid);
3134 initStringInfo(&result);
3136 xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3138 schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3143 appendStringInfo(&result,
3144 "<xsd:complexType name=\"%s\">\n", schematypename);
3146 appendStringInfoString(&result,
3149 appendStringInfoString(&result,
3150 " <xsd:sequence>\n");
3152 foreach(cell, relid_list)
3154 Oid relid = lfirst_oid(cell);
3155 char *relname = get_rel_name(relid);
3156 char *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3157 char *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3163 appendStringInfo(&result,
3164 " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3165 xmltn, tabletypename);
3167 appendStringInfo(&result,
3168 " <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3169 xmltn, tabletypename);
3173 appendStringInfoString(&result,
3176 appendStringInfoString(&result,
3177 " </xsd:sequence>\n");
3178 appendStringInfoString(&result,
3179 "</xsd:complexType>\n\n");
3181 appendStringInfo(&result,
3182 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3183 xmlsn, schematypename);
3190 * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3194 map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3195 bool tableforest, const char *targetns)
3199 char *catalogtypename;
3200 StringInfoData result;
3203 dbname = get_database_name(MyDatabaseId);
3205 initStringInfo(&result);
3207 xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3209 catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3214 appendStringInfo(&result,
3215 "<xsd:complexType name=\"%s\">\n", catalogtypename);
3216 appendStringInfoString(&result,
3219 foreach(cell, nspid_list)
3221 Oid nspid = lfirst_oid(cell);
3222 char *nspname = get_namespace_name(nspid);
3223 char *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3224 char *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3229 appendStringInfo(&result,
3230 " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3231 xmlsn, schematypename);
3234 appendStringInfoString(&result,
3236 appendStringInfoString(&result,
3237 "</xsd:complexType>\n\n");
3239 appendStringInfo(&result,
3240 "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3241 xmlcn, catalogtypename);
3248 * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3251 map_sql_type_to_xml_name(Oid typeoid, int typmod)
3253 StringInfoData result;
3255 initStringInfo(&result);
3261 appendStringInfoString(&result, "CHAR");
3263 appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3267 appendStringInfoString(&result, "VARCHAR");
3269 appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3273 appendStringInfoString(&result, "NUMERIC");
3275 appendStringInfo(&result, "NUMERIC_%d_%d",
3276 ((typmod - VARHDRSZ) >> 16) & 0xffff,
3277 (typmod - VARHDRSZ) & 0xffff);
3280 appendStringInfoString(&result, "INTEGER");
3283 appendStringInfoString(&result, "SMALLINT");
3286 appendStringInfoString(&result, "BIGINT");
3289 appendStringInfoString(&result, "REAL");
3292 appendStringInfoString(&result, "DOUBLE");
3295 appendStringInfoString(&result, "BOOLEAN");
3299 appendStringInfoString(&result, "TIME");
3301 appendStringInfo(&result, "TIME_%d", typmod);
3305 appendStringInfoString(&result, "TIME_WTZ");
3307 appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3311 appendStringInfoString(&result, "TIMESTAMP");
3313 appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3315 case TIMESTAMPTZOID:
3317 appendStringInfoString(&result, "TIMESTAMP_WTZ");
3319 appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3322 appendStringInfoString(&result, "DATE");
3325 appendStringInfoString(&result, "XML");
3330 Form_pg_type typtuple;
3332 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3333 if (!HeapTupleIsValid(tuple))
3334 elog(ERROR, "cache lookup failed for type %u", typeoid);
3335 typtuple = (Form_pg_type) GETSTRUCT(tuple);
3337 appendStringInfoString(&result,
3338 map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3339 get_database_name(MyDatabaseId),
3340 get_namespace_name(typtuple->typnamespace),
3341 NameStr(typtuple->typname)));
3343 ReleaseSysCache(tuple);
3352 * Map a collection of SQL data types to XML Schema data types; see
3353 * SQL/XML:2008 section 9.7.
3356 map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3358 List *uniquetypes = NIL;
3360 StringInfoData result;
3363 /* extract all column types used in the set of TupleDescs */
3364 foreach(cell0, tupdesc_list)
3366 TupleDesc tupdesc = (TupleDesc) lfirst(cell0);
3368 for (i = 0; i < tupdesc->natts; i++)
3370 if (tupdesc->attrs[i]->attisdropped)
3372 uniquetypes = list_append_unique_oid(uniquetypes,
3373 tupdesc->attrs[i]->atttypid);
3377 /* add base types of domains */
3378 foreach(cell0, uniquetypes)
3380 Oid typid = lfirst_oid(cell0);
3381 Oid basetypid = getBaseType(typid);
3383 if (basetypid != typid)
3384 uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3387 /* Convert to textual form */
3388 initStringInfo(&result);
3390 foreach(cell0, uniquetypes)
3392 appendStringInfo(&result, "%s\n",
3393 map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3402 * Map an SQL data type to a named XML Schema data type; see
3403 * SQL/XML:2008 sections 9.5 and 9.6.
3405 * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3406 * a name attribute, which this function does. The name-less version
3407 * 9.5 doesn't appear to be required anywhere.)
3410 map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3412 StringInfoData result;
3413 const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3415 initStringInfo(&result);
3417 if (typeoid == XMLOID)
3419 appendStringInfoString(&result,
3420 "<xsd:complexType mixed=\"true\">\n"
3422 " <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3423 " </xsd:sequence>\n"
3424 "</xsd:complexType>\n");
3428 appendStringInfo(&result,
3429 "<xsd:simpleType name=\"%s\">\n", typename);
3436 appendStringInfo(&result,
3437 " <xsd:restriction base=\"xsd:string\">\n");
3439 appendStringInfo(&result,
3440 " <xsd:maxLength value=\"%d\"/>\n",
3442 appendStringInfoString(&result, " </xsd:restriction>\n");
3446 appendStringInfo(&result,
3447 " <xsd:restriction base=\"xsd:%s\">\n"
3448 " </xsd:restriction>\n",
3449 xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3454 appendStringInfo(&result,
3455 " <xsd:restriction base=\"xsd:decimal\">\n"
3456 " <xsd:totalDigits value=\"%d\"/>\n"
3457 " <xsd:fractionDigits value=\"%d\"/>\n"
3458 " </xsd:restriction>\n",
3459 ((typmod - VARHDRSZ) >> 16) & 0xffff,
3460 (typmod - VARHDRSZ) & 0xffff);
3464 appendStringInfo(&result,
3465 " <xsd:restriction base=\"xsd:short\">\n"
3466 " <xsd:maxInclusive value=\"%d\"/>\n"
3467 " <xsd:minInclusive value=\"%d\"/>\n"
3468 " </xsd:restriction>\n",
3469 SHRT_MAX, SHRT_MIN);
3473 appendStringInfo(&result,
3474 " <xsd:restriction base=\"xsd:int\">\n"
3475 " <xsd:maxInclusive value=\"%d\"/>\n"
3476 " <xsd:minInclusive value=\"%d\"/>\n"
3477 " </xsd:restriction>\n",
3482 appendStringInfo(&result,
3483 " <xsd:restriction base=\"xsd:long\">\n"
3484 " <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
3485 " <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
3486 " </xsd:restriction>\n",
3487 (((uint64) 1) << (sizeof(int64) * 8 - 1)) - 1,
3488 (((uint64) 1) << (sizeof(int64) * 8 - 1)));
3492 appendStringInfoString(&result,
3493 " <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
3497 appendStringInfoString(&result,
3498 " <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
3502 appendStringInfoString(&result,
3503 " <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
3509 const char *tz = (typeoid == TIMETZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3512 appendStringInfo(&result,
3513 " <xsd:restriction base=\"xsd:time\">\n"
3514 " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3515 " </xsd:restriction>\n", tz);
3516 else if (typmod == 0)
3517 appendStringInfo(&result,
3518 " <xsd:restriction base=\"xsd:time\">\n"
3519 " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3520 " </xsd:restriction>\n", tz);
3522 appendStringInfo(&result,
3523 " <xsd:restriction base=\"xsd:time\">\n"
3524 " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3525 " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3530 case TIMESTAMPTZOID:
3532 const char *tz = (typeoid == TIMESTAMPTZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3535 appendStringInfo(&result,
3536 " <xsd:restriction base=\"xsd:dateTime\">\n"
3537 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3538 " </xsd:restriction>\n", tz);
3539 else if (typmod == 0)
3540 appendStringInfo(&result,
3541 " <xsd:restriction base=\"xsd:dateTime\">\n"
3542 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3543 " </xsd:restriction>\n", tz);
3545 appendStringInfo(&result,
3546 " <xsd:restriction base=\"xsd:dateTime\">\n"
3547 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3548 " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3553 appendStringInfoString(&result,
3554 " <xsd:restriction base=\"xsd:date\">\n"
3555 " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
3556 " </xsd:restriction>\n");
3560 if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
3563 int32 base_typmod = -1;
3565 base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
3567 appendStringInfo(&result,
3568 " <xsd:restriction base=\"%s\"/>\n",
3569 map_sql_type_to_xml_name(base_typeoid, base_typmod));
3573 appendStringInfoString(&result, "</xsd:simpleType>\n");
3581 * Map an SQL row to an XML element, taking the row from the active
3582 * SPI cursor. See also SQL/XML:2008 section 9.10.
3585 SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
3586 bool nulls, bool tableforest,
3587 const char *targetns, bool top_level)
3593 xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3603 xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
3605 appendStringInfoString(result, "<row>\n");
3607 for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
3613 colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
3615 colval = SPI_getbinval(SPI_tuptable->vals[rownum],
3616 SPI_tuptable->tupdesc,
3622 appendStringInfo(result, " <%s xsi:nil=\"true\"/>\n", colname);
3625 appendStringInfo(result, " <%s>%s</%s>\n",
3627 map_sql_value_to_xml_value(colval,
3628 SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
3634 xmldata_root_element_end(result, xmltn);
3635 appendStringInfoChar(result, '\n');
3638 appendStringInfoString(result, "</row>\n\n");
3643 * XPath related functions
3649 * Convert XML node to text (dump subtree in case of element,
3650 * return value otherwise)
3653 xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
3657 if (cur->type == XML_ELEMENT_NODE)
3660 xmlNodePtr cur_copy;
3662 buf = xmlBufferCreate();
3665 * The result of xmlNodeDump() won't contain namespace definitions
3666 * from parent nodes, but xmlCopyNode() duplicates a node along with
3667 * its required namespace definitions.
3669 cur_copy = xmlCopyNode(cur, 1);
3671 if (cur_copy == NULL)
3672 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3673 "could not copy node");
3677 xmlNodeDump(buf, NULL, cur_copy, 0, 1);
3678 result = xmlBuffer_to_xmltype(buf);
3682 xmlFreeNode(cur_copy);
3687 xmlFreeNode(cur_copy);
3694 str = xmlXPathCastNodeToString(cur);
3697 /* Here we rely on XML having the same representation as TEXT */
3698 char *escaped = escape_xml((char *) str);
3700 result = (xmltype *) cstring_to_text(escaped);
3716 * Convert an XML XPath object (the result of evaluating an XPath expression)
3717 * to an array of xml values, which are appended to astate. The function
3718 * result value is the number of elements in the array.
3720 * If "astate" is NULL then we don't generate the array value, but we still
3721 * return the number of elements it would have had.
3723 * Nodesets are converted to an array containing the nodes' textual
3724 * representations. Primitive values (float, double, string) are converted
3725 * to a single-element array containing the value's string representation.
3728 xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
3729 ArrayBuildState *astate,
3730 PgXmlErrorContext *xmlerrcxt)
3737 switch (xpathobj->type)
3740 if (xpathobj->nodesetval != NULL)
3742 result = xpathobj->nodesetval->nodeNr;
3747 for (i = 0; i < result; i++)
3749 datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
3751 (void) accumArrayResult(astate, datum, false,
3752 XMLOID, CurrentMemoryContext);
3761 datum = BoolGetDatum(xpathobj->boolval);
3762 datumtype = BOOLOID;
3768 datum = Float8GetDatum(xpathobj->floatval);
3769 datumtype = FLOAT8OID;
3775 datum = CStringGetDatum((char *) xpathobj->stringval);
3776 datumtype = CSTRINGOID;
3780 elog(ERROR, "xpath expression result type %d is unsupported",
3782 return 0; /* keep compiler quiet */
3785 /* Common code for scalar-value cases */
3786 result_str = map_sql_value_to_xml_value(datum, datumtype, true);
3787 datum = PointerGetDatum(cstring_to_xmltype(result_str));
3788 (void) accumArrayResult(astate, datum, false,
3789 XMLOID, CurrentMemoryContext);
3795 * Common code for xpath() and xmlexists()
3797 * Evaluate XPath expression and return number of nodes in res_items
3798 * and array of XML values in astate. Either of those pointers can be
3799 * NULL if the corresponding result isn't wanted.
3801 * It is up to the user to ensure that the XML passed is in fact
3802 * an XML document - XPath doesn't work easily on fragments without
3803 * a context node being known.
3806 xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
3807 int *res_nitems, ArrayBuildState *astate)
3809 PgXmlErrorContext *xmlerrcxt;
3810 volatile xmlParserCtxtPtr ctxt = NULL;
3811 volatile xmlDocPtr doc = NULL;
3812 volatile xmlXPathContextPtr xpathctx = NULL;
3813 volatile xmlXPathCompExprPtr xpathcomp = NULL;
3814 volatile xmlXPathObjectPtr xpathobj = NULL;
3819 xmlChar *xpath_expr;
3822 Datum *ns_names_uris;
3823 bool *ns_names_uris_nulls;
3827 * Namespace mappings are passed as text[]. If an empty array is passed
3828 * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
3829 * Else, a 2-dimensional array with length of the second axis being equal
3830 * to 2 should be passed, i.e., every subarray contains 2 elements, the
3831 * first element defining the name, the second one the URI. Example:
3832 * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
3833 * 'http://example2.com']].
3835 ndim = namespaces ? ARR_NDIM(namespaces) : 0;
3840 dims = ARR_DIMS(namespaces);
3842 if (ndim != 2 || dims[1] != 2)
3844 (errcode(ERRCODE_DATA_EXCEPTION),
3845 errmsg("invalid array for XML namespace mapping"),
3846 errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
3848 Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
3850 deconstruct_array(namespaces, TEXTOID, -1, false, 'i',
3851 &ns_names_uris, &ns_names_uris_nulls,
3854 Assert((ns_count % 2) == 0); /* checked above */
3855 ns_count /= 2; /* count pairs only */
3859 ns_names_uris = NULL;
3860 ns_names_uris_nulls = NULL;
3864 datastr = VARDATA(data);
3865 len = VARSIZE(data) - VARHDRSZ;
3866 xpath_len = VARSIZE(xpath_expr_text) - VARHDRSZ;
3869 (errcode(ERRCODE_DATA_EXCEPTION),
3870 errmsg("empty XPath expression")));
3872 string = pg_xmlCharStrndup(datastr, len);
3873 xpath_expr = pg_xmlCharStrndup(VARDATA(xpath_expr_text), xpath_len);
3875 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
3882 * redundant XML parsing (two parsings for the same value during one
3883 * command execution are possible)
3885 ctxt = xmlNewParserCtxt();
3886 if (ctxt == NULL || xmlerrcxt->err_occurred)
3887 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3888 "could not allocate parser context");
3889 doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
3890 if (doc == NULL || xmlerrcxt->err_occurred)
3891 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
3892 "could not parse XML document");
3893 xpathctx = xmlXPathNewContext(doc);
3894 if (xpathctx == NULL || xmlerrcxt->err_occurred)
3895 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3896 "could not allocate XPath context");
3897 xpathctx->node = xmlDocGetRootElement(doc);
3898 if (xpathctx->node == NULL || xmlerrcxt->err_occurred)
3899 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
3900 "could not find root XML element");
3902 /* register namespaces, if any */
3905 for (i = 0; i < ns_count; i++)
3910 if (ns_names_uris_nulls[i * 2] ||
3911 ns_names_uris_nulls[i * 2 + 1])
3913 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
3914 errmsg("neither namespace name nor URI may be null")));
3915 ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
3916 ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
3917 if (xmlXPathRegisterNs(xpathctx,
3918 (xmlChar *) ns_name,
3919 (xmlChar *) ns_uri) != 0)
3920 ereport(ERROR, /* is this an internal error??? */
3921 (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
3926 xpathcomp = xmlXPathCompile(xpath_expr);
3927 if (xpathcomp == NULL || xmlerrcxt->err_occurred)
3928 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
3929 "invalid XPath expression");
3932 * Version 2.6.27 introduces a function named
3933 * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
3934 * but we can derive the existence by whether any nodes are returned,
3935 * thereby preventing a library version upgrade and keeping the code
3938 xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
3939 if (xpathobj == NULL || xmlerrcxt->err_occurred)
3940 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
3941 "could not create XPath object");
3944 * Extract the results as requested.
3946 if (res_nitems != NULL)
3947 *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
3949 (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
3954 xmlXPathFreeObject(xpathobj);
3956 xmlXPathFreeCompExpr(xpathcomp);
3958 xmlXPathFreeContext(xpathctx);
3962 xmlFreeParserCtxt(ctxt);
3964 pg_xml_done(xmlerrcxt, true);
3970 xmlXPathFreeObject(xpathobj);
3971 xmlXPathFreeCompExpr(xpathcomp);
3972 xmlXPathFreeContext(xpathctx);
3974 xmlFreeParserCtxt(ctxt);
3976 pg_xml_done(xmlerrcxt, false);
3978 #endif /* USE_LIBXML */
3981 * Evaluate XPath expression and return array of XML values.
3983 * As we have no support of XQuery sequences yet, this function seems
3984 * to be the most useful one (array of XML functions plays a role of
3985 * some kind of substitution for XQuery sequences).
3988 xpath(PG_FUNCTION_ARGS)
3991 text *xpath_expr_text = PG_GETARG_TEXT_P(0);
3992 xmltype *data = PG_GETARG_XML_P(1);
3993 ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
3994 ArrayBuildState *astate;
3996 astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
3997 xpath_internal(xpath_expr_text, data, namespaces,
3999 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
4007 * Determines if the node specified by the supplied XPath exists
4008 * in a given XML document, returning a boolean.
4011 xmlexists(PG_FUNCTION_ARGS)
4014 text *xpath_expr_text = PG_GETARG_TEXT_P(0);
4015 xmltype *data = PG_GETARG_XML_P(1);
4018 xpath_internal(xpath_expr_text, data, NULL,
4021 PG_RETURN_BOOL(res_nitems > 0);
4029 * Determines if the node specified by the supplied XPath exists
4030 * in a given XML document, returning a boolean. Differs from
4031 * xmlexists as it supports namespaces and is not defined in SQL/XML.
4034 xpath_exists(PG_FUNCTION_ARGS)
4037 text *xpath_expr_text = PG_GETARG_TEXT_P(0);
4038 xmltype *data = PG_GETARG_XML_P(1);
4039 ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4042 xpath_internal(xpath_expr_text, data, namespaces,
4045 PG_RETURN_BOOL(res_nitems > 0);
4053 * Functions for checking well-formed-ness
4058 wellformed_xml(text *data, XmlOptionType xmloption_arg)
4061 volatile xmlDocPtr doc = NULL;
4063 /* We want to catch any exceptions and return false */
4066 doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding());
4084 xml_is_well_formed(PG_FUNCTION_ARGS)
4087 text *data = PG_GETARG_TEXT_P(0);
4089 PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4093 #endif /* not USE_LIBXML */
4097 xml_is_well_formed_document(PG_FUNCTION_ARGS)
4100 text *data = PG_GETARG_TEXT_P(0);
4102 PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4106 #endif /* not USE_LIBXML */
4110 xml_is_well_formed_content(PG_FUNCTION_ARGS)
4113 text *data = PG_GETARG_TEXT_P(0);
4115 PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4119 #endif /* not USE_LIBXML */
4123 * support functions for XMLTABLE
4129 * Returns private data from executor state. Ensure validity by check with
4132 static inline XmlTableBuilderData *
4133 GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4135 XmlTableBuilderData *result;
4137 if (!IsA(state, TableFuncScanState))
4138 elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4139 result = (XmlTableBuilderData *) state->opaque;
4140 if (result->magic != XMLTABLE_CONTEXT_MAGIC)
4141 elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4148 * XmlTableInitOpaque
4149 * Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4152 * Note: Because we call pg_xml_init() here and pg_xml_done() in
4153 * XmlTableDestroyOpaque, it is critical for robustness that no other
4154 * executor nodes run until this node is processed to completion. Caller
4155 * must execute this to completion (probably filling a tuplestore to exhaust
4156 * this node in a single pass) instead of using row-per-call mode.
4159 XmlTableInitOpaque(TableFuncScanState *state, int natts)
4162 volatile xmlParserCtxtPtr ctxt = NULL;
4163 XmlTableBuilderData *xtCxt;
4164 PgXmlErrorContext *xmlerrcxt;
4166 xtCxt = palloc0(sizeof(XmlTableBuilderData));
4167 xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4168 xtCxt->natts = natts;
4169 xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
4171 xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4177 ctxt = xmlNewParserCtxt();
4178 if (ctxt == NULL || xmlerrcxt->err_occurred)
4179 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4180 "could not allocate parser context");
4185 xmlFreeParserCtxt(ctxt);
4187 pg_xml_done(xmlerrcxt, true);
4193 xtCxt->xmlerrcxt = xmlerrcxt;
4196 state->opaque = xtCxt;
4199 #endif /* not USE_LIBXML */
4203 * XmlTableSetDocument
4204 * Install the input document
4207 XmlTableSetDocument(TableFuncScanState *state, Datum value)
4210 XmlTableBuilderData *xtCxt;
4211 xmltype *xmlval = DatumGetXmlP(value);
4215 volatile xmlDocPtr doc = NULL;
4216 volatile xmlXPathContextPtr xpathcxt = NULL;
4218 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
4221 * Use out function for casting to string (remove encoding property). See
4222 * comment in xml_out.
4224 str = xml_out_internal(xmlval, 0);
4226 length = strlen(str);
4227 xstr = pg_xmlCharStrndup(str, length);
4231 doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4232 if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
4233 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4234 "could not parse XML document");
4235 xpathcxt = xmlXPathNewContext(doc);
4236 if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
4237 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4238 "could not allocate XPath context");
4239 xpathcxt->node = xmlDocGetRootElement(doc);
4240 if (xpathcxt->node == NULL || xtCxt->xmlerrcxt->err_occurred)
4241 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4242 "could not find root XML element");
4246 if (xpathcxt != NULL)
4247 xmlXPathFreeContext(xpathcxt);
4256 xtCxt->xpathcxt = xpathcxt;
4259 #endif /* not USE_LIBXML */
4263 * XmlTableSetNamespace
4264 * Add a namespace declaration
4267 XmlTableSetNamespace(TableFuncScanState *state, char *name, char *uri)
4270 XmlTableBuilderData *xtCxt;
4274 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4275 errmsg("DEFAULT namespace is not supported")));
4276 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4278 if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4279 pg_xmlCharStrndup(name, strlen(name)),
4280 pg_xmlCharStrndup(uri, strlen(uri))))
4281 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4282 "could not set XML namespace");
4285 #endif /* not USE_LIBXML */
4289 * XmlTableSetRowFilter
4290 * Install the row-filter Xpath expression.
4293 XmlTableSetRowFilter(TableFuncScanState *state, char *path)
4296 XmlTableBuilderData *xtCxt;
4299 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4303 (errcode(ERRCODE_DATA_EXCEPTION),
4304 errmsg("row path filter must not be empty string")));
4306 xstr = pg_xmlCharStrndup(path, strlen(path));
4308 xtCxt->xpathcomp = xmlXPathCompile(xstr);
4309 if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
4310 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_SYNTAX_ERROR,
4311 "invalid XPath expression");
4314 #endif /* not USE_LIBXML */
4318 * XmlTableSetColumnFilter
4319 * Install the column-filter Xpath expression, for the given column.
4322 XmlTableSetColumnFilter(TableFuncScanState *state, char *path, int colnum)
4325 XmlTableBuilderData *xtCxt;
4328 AssertArg(PointerIsValid(path));
4330 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4334 (errcode(ERRCODE_DATA_EXCEPTION),
4335 errmsg("column path filter must not be empty string")));
4337 xstr = pg_xmlCharStrndup(path, strlen(path));
4339 xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr);
4340 if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
4341 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4342 "invalid XPath expression");
4345 #endif /* not USE_LIBXML */
4350 * Prepare the next "current" tuple for upcoming GetValue calls.
4351 * Returns FALSE if the row-filter expression returned no more rows.
4354 XmlTableFetchRow(TableFuncScanState *state)
4357 XmlTableBuilderData *xtCxt;
4359 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4362 * XmlTable returns table - set of composite values. The error context, is
4363 * used for producement more values, between two calls, there can be
4364 * created and used another libxml2 error context. It is libxml2 global
4365 * value, so it should be refreshed any time before any libxml2 usage,
4366 * that is finished by returning some value.
4368 xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4370 if (xtCxt->xpathobj == NULL)
4372 xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4373 if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4374 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4375 "could not create XPath object");
4377 xtCxt->row_count = 0;
4380 if (xtCxt->xpathobj->type == XPATH_NODESET)
4382 if (xtCxt->xpathobj->nodesetval != NULL)
4384 if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
4393 #endif /* not USE_LIBXML */
4398 * Return the value for column number 'colnum' for the current row. If
4399 * column -1 is requested, return representation of the whole row.
4401 * This leaks memory, so be sure to reset often the context in which it's
4405 XmlTableGetValue(TableFuncScanState *state, int colnum,
4406 Oid typid, int32 typmod, bool *isnull)
4409 XmlTableBuilderData *xtCxt;
4410 Datum result = (Datum) 0;
4413 volatile xmlXPathObjectPtr xpathobj = NULL;
4415 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4417 Assert(xtCxt->xpathobj &&
4418 xtCxt->xpathobj->type == XPATH_NODESET &&
4419 xtCxt->xpathobj->nodesetval != NULL);
4421 /* Propagate context related error context to libxml2 */
4422 xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4426 cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
4428 Assert(xtCxt->xpathscomp[colnum] != NULL);
4432 /* Set current node as entry point for XPath evaluation */
4433 xtCxt->xpathcxt->node = cur;
4435 /* Evaluate column path */
4436 xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
4437 if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4438 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4439 "could not create XPath object");
4442 * There are four possible cases, depending on the number of nodes
4443 * returned by the XPath expression and the type of the target column:
4444 * a) XPath returns no nodes. b) One node is returned, and column is
4445 * of type XML. c) One node, column type other than XML. d) Multiple
4446 * nodes are returned.
4448 if (xpathobj->type == XPATH_NODESET)
4452 if (xpathobj->nodesetval != NULL)
4453 count = xpathobj->nodesetval->nodeNr;
4455 if (xpathobj->nodesetval == NULL || count == 0)
4459 else if (count == 1 && typid == XMLOID)
4463 /* simple case, result is one value */
4464 textstr = xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[0],
4466 cstr = text_to_cstring(textstr);
4468 else if (count == 1)
4472 str = xmlNodeListGetString(xtCxt->doc,
4473 xpathobj->nodesetval->nodeTab[0]->xmlChildrenNode,
4480 cstr = pstrdup((char *) str);
4493 * This line ensure mapping of empty tags to PostgreSQL
4494 * value. Usually we would to map a empty tag to empty
4495 * string. But this mapping can create empty string when
4496 * user doesn't expect it - when empty tag is enforced
4497 * by libxml2 - when user uses a text() function for
4511 * When evaluating the XPath expression returns multiple
4512 * nodes, the result is the concatenation of them all. The
4513 * target type must be XML.
4515 if (typid != XMLOID)
4517 (errcode(ERRCODE_CARDINALITY_VIOLATION),
4518 errmsg("more than one value returned by column XPath expression")));
4520 /* Concatenate serialized values */
4521 initStringInfo(&str);
4522 for (i = 0; i < count; i++)
4524 appendStringInfoText(&str,
4525 xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4531 else if (xpathobj->type == XPATH_STRING)
4533 cstr = (char *) xpathobj->stringval;
4536 elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
4539 * By here, either cstr contains the result value, or the isnull flag
4542 Assert(cstr || *isnull);
4545 result = InputFunctionCall(&state->in_functions[colnum],
4547 state->typioparams[colnum],
4552 if (xpathobj != NULL)
4553 xmlXPathFreeObject(xpathobj);
4558 xmlXPathFreeObject(xpathobj);
4564 #endif /* not USE_LIBXML */
4568 * XmlTableDestroyOpaque
4569 * Release all libxml2 resources
4572 XmlTableDestroyOpaque(TableFuncScanState *state)
4575 XmlTableBuilderData *xtCxt;
4577 xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
4579 /* Propagate context related error context to libxml2 */
4580 xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4582 if (xtCxt->xpathscomp != NULL)
4586 for (i = 0; i < xtCxt->natts; i++)
4587 if (xtCxt->xpathscomp[i] != NULL)
4588 xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
4591 if (xtCxt->xpathobj != NULL)
4592 xmlXPathFreeObject(xtCxt->xpathobj);
4593 if (xtCxt->xpathcomp != NULL)
4594 xmlXPathFreeCompExpr(xtCxt->xpathcomp);
4595 if (xtCxt->xpathcxt != NULL)
4596 xmlXPathFreeContext(xtCxt->xpathcxt);
4597 if (xtCxt->doc != NULL)
4598 xmlFreeDoc(xtCxt->doc);
4599 if (xtCxt->ctxt != NULL)
4600 xmlFreeParserCtxt(xtCxt->ctxt);
4602 pg_xml_done(xtCxt->xmlerrcxt, true);
4604 /* not valid anymore */
4606 state->opaque = NULL;
4610 #endif /* not USE_LIBXML */