]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/xml.c
1908b13db5c7f4328c25316decf6d28b34137db6
[postgresql] / src / backend / utils / adt / xml.c
1 /*-------------------------------------------------------------------------
2  *
3  * xml.c
4  *        XML data type support.
5  *
6  *
7  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/backend/utils/adt/xml.c
11  *
12  *-------------------------------------------------------------------------
13  */
14
15 /*
16  * Generally, XML type support is only available when libxml use was
17  * configured during the build.  But even if that is not done, the
18  * type and all the functions are available, but most of them will
19  * fail.  For one thing, this avoids having to manage variant catalog
20  * installations.  But it also has nice effects such as that you can
21  * dump a database containing XML type data even if the server is not
22  * linked with libxml.  Thus, make sure xml_out() works even if nothing
23  * else does.
24  */
25
26 /*
27  * Notes on memory management:
28  *
29  * Sometimes libxml allocates global structures in the hope that it can reuse
30  * them later on.  This makes it impractical to change the xmlMemSetup
31  * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32  * allocated with malloc() or vice versa.  Since libxml might be used by
33  * loadable modules, eg libperl, our only safe choices are to change the
34  * functions at postmaster/backend launch or not at all.  Since we'd rather
35  * not activate libxml in sessions that might never use it, the latter choice
36  * is the preferred one.  However, for debugging purposes it can be awfully
37  * handy to constrain libxml's allocations to be done in a specific palloc
38  * context, where they're easy to track.  Therefore there is code here that
39  * can be enabled in debug builds to redirect libxml's allocations into a
40  * special context LibxmlContext.  It's not recommended to turn this on in
41  * a production build because of the possibility of bad interactions with
42  * external modules.
43  */
44 /* #define USE_LIBXMLCONTEXT */
45
46 #include "postgres.h"
47
48 #ifdef USE_LIBXML
49 #include <libxml/chvalid.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/tree.h>
53 #include <libxml/uri.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/xmlversion.h>
56 #include <libxml/xmlwriter.h>
57 #include <libxml/xpath.h>
58 #include <libxml/xpathInternals.h>
59
60 /*
61  * We used to check for xmlStructuredErrorContext via a configure test; but
62  * that doesn't work on Windows, so instead use this grottier method of
63  * testing the library version number.
64  */
65 #if LIBXML_VERSION >= 20704
66 #define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
67 #endif
68 #endif   /* USE_LIBXML */
69
70 #include "access/htup_details.h"
71 #include "catalog/namespace.h"
72 #include "catalog/pg_class.h"
73 #include "catalog/pg_type.h"
74 #include "commands/dbcommands.h"
75 #include "executor/executor.h"
76 #include "executor/spi.h"
77 #include "executor/tablefunc.h"
78 #include "fmgr.h"
79 #include "lib/stringinfo.h"
80 #include "libpq/pqformat.h"
81 #include "mb/pg_wchar.h"
82 #include "miscadmin.h"
83 #include "nodes/execnodes.h"
84 #include "nodes/nodeFuncs.h"
85 #include "utils/array.h"
86 #include "utils/builtins.h"
87 #include "utils/date.h"
88 #include "utils/datetime.h"
89 #include "utils/lsyscache.h"
90 #include "utils/memutils.h"
91 #include "utils/rel.h"
92 #include "utils/syscache.h"
93 #include "utils/xml.h"
94
95
96 /* GUC variables */
97 int                     xmlbinary;
98 int                     xmloption;
99
100 #ifdef USE_LIBXML
101
102 /* random number to identify PgXmlErrorContext */
103 #define ERRCXT_MAGIC    68275028
104
105 struct PgXmlErrorContext
106 {
107         int                     magic;
108         /* strictness argument passed to pg_xml_init */
109         PgXmlStrictness strictness;
110         /* current error status and accumulated message, if any */
111         bool            err_occurred;
112         StringInfoData err_buf;
113         /* previous libxml error handling state (saved by pg_xml_init) */
114         xmlStructuredErrorFunc saved_errfunc;
115         void       *saved_errcxt;
116         /* previous libxml entity handler (saved by pg_xml_init) */
117         xmlExternalEntityLoader saved_entityfunc;
118 };
119
120 static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
121                                   xmlParserCtxtPtr ctxt);
122 static void xml_errorHandler(void *data, xmlErrorPtr error);
123 static void xml_ereport_by_code(int level, int sqlcode,
124                                         const char *msg, int errcode);
125 static void chopStringInfoNewlines(StringInfo str);
126 static void appendStringInfoLineSeparator(StringInfo str);
127
128 #ifdef USE_LIBXMLCONTEXT
129
130 static MemoryContext LibxmlContext = NULL;
131
132 static void xml_memory_init(void);
133 static void *xml_palloc(size_t size);
134 static void *xml_repalloc(void *ptr, size_t size);
135 static void xml_pfree(void *ptr);
136 static char *xml_pstrdup(const char *string);
137 #endif   /* USE_LIBXMLCONTEXT */
138
139 static xmlChar *xml_text2xmlChar(text *in);
140 static int parse_xml_decl(const xmlChar *str, size_t *lenp,
141                            xmlChar **version, xmlChar **encoding, int *standalone);
142 static bool print_xml_decl(StringInfo buf, const xmlChar *version,
143                            pg_enc encoding, int standalone);
144 static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
145                   bool preserve_whitespace, int encoding);
146 static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
147 static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
148                                            ArrayBuildState *astate,
149                                            PgXmlErrorContext *xmlerrcxt);
150 static xmlChar *pg_xmlCharStrndup(char *str, size_t len);
151 #endif   /* USE_LIBXML */
152
153 static StringInfo query_to_xml_internal(const char *query, char *tablename,
154                                           const char *xmlschema, bool nulls, bool tableforest,
155                                           const char *targetns, bool top_level);
156 static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
157                                                  bool nulls, bool tableforest, const char *targetns);
158 static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
159                                                                   List *relid_list, bool nulls,
160                                                                   bool tableforest, const char *targetns);
161 static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
162                                                                    bool nulls, bool tableforest,
163                                                                    const char *targetns);
164 static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
165 static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
166 static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
167 static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
168                                                   char *tablename, bool nulls, bool tableforest,
169                                                   const char *targetns, bool top_level);
170
171 /* XMLTABLE support */
172 #ifdef USE_LIBXML
173 /* random number to identify XmlTableContext */
174 #define XMLTABLE_CONTEXT_MAGIC  46922182
175 typedef struct XmlTableBuilderData
176 {
177         int                     magic;
178         int                     natts;
179         long int        row_count;
180         PgXmlErrorContext *xmlerrcxt;
181         xmlParserCtxtPtr ctxt;
182         xmlDocPtr       doc;
183         xmlXPathContextPtr xpathcxt;
184         xmlXPathCompExprPtr xpathcomp;
185         xmlXPathObjectPtr xpathobj;
186         xmlXPathCompExprPtr *xpathscomp;
187 } XmlTableBuilderData;
188 #endif
189
190 static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
191 static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
192 static void XmlTableSetNamespace(struct TableFuncScanState *state, char *name,
193                                          char *uri);
194 static void XmlTableSetRowFilter(struct TableFuncScanState *state, char *path);
195 static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
196                                                 char *path, int colnum);
197 static bool XmlTableFetchRow(struct TableFuncScanState *state);
198 static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
199                                  Oid typid, int32 typmod, bool *isnull);
200 static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
201
202 const TableFuncRoutine XmlTableRoutine =
203 {
204         XmlTableInitOpaque,
205         XmlTableSetDocument,
206         XmlTableSetNamespace,
207         XmlTableSetRowFilter,
208         XmlTableSetColumnFilter,
209         XmlTableFetchRow,
210         XmlTableGetValue,
211         XmlTableDestroyOpaque
212 };
213
214 #define NO_XML_SUPPORT() \
215         ereport(ERROR, \
216                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
217                          errmsg("unsupported XML feature"), \
218                          errdetail("This functionality requires the server to be built with libxml support."), \
219                          errhint("You need to rebuild PostgreSQL using --with-libxml.")))
220
221
222 /* from SQL/XML:2008 section 4.9 */
223 #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
224 #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
225 #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
226
227
228 #ifdef USE_LIBXML
229
230 static int
231 xmlChar_to_encoding(const xmlChar *encoding_name)
232 {
233         int                     encoding = pg_char_to_encoding((const char *) encoding_name);
234
235         if (encoding < 0)
236                 ereport(ERROR,
237                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
238                                  errmsg("invalid encoding name \"%s\"",
239                                                 (const char *) encoding_name)));
240         return encoding;
241 }
242 #endif
243
244
245 /*
246  * xml_in uses a plain C string to VARDATA conversion, so for the time being
247  * we use the conversion function for the text datatype.
248  *
249  * This is only acceptable so long as xmltype and text use the same
250  * representation.
251  */
252 Datum
253 xml_in(PG_FUNCTION_ARGS)
254 {
255 #ifdef USE_LIBXML
256         char       *s = PG_GETARG_CSTRING(0);
257         xmltype    *vardata;
258         xmlDocPtr       doc;
259
260         vardata = (xmltype *) cstring_to_text(s);
261
262         /*
263          * Parse the data to check if it is well-formed XML data.  Assume that
264          * ERROR occurred if parsing failed.
265          */
266         doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding());
267         xmlFreeDoc(doc);
268
269         PG_RETURN_XML_P(vardata);
270 #else
271         NO_XML_SUPPORT();
272         return 0;
273 #endif
274 }
275
276
277 #define PG_XML_DEFAULT_VERSION "1.0"
278
279
280 /*
281  * xml_out_internal uses a plain VARDATA to C string conversion, so for the
282  * time being we use the conversion function for the text datatype.
283  *
284  * This is only acceptable so long as xmltype and text use the same
285  * representation.
286  */
287 static char *
288 xml_out_internal(xmltype *x, pg_enc target_encoding)
289 {
290         char       *str = text_to_cstring((text *) x);
291
292 #ifdef USE_LIBXML
293         size_t          len = strlen(str);
294         xmlChar    *version;
295         int                     standalone;
296         int                     res_code;
297
298         if ((res_code = parse_xml_decl((xmlChar *) str,
299                                                                    &len, &version, NULL, &standalone)) == 0)
300         {
301                 StringInfoData buf;
302
303                 initStringInfo(&buf);
304
305                 if (!print_xml_decl(&buf, version, target_encoding, standalone))
306                 {
307                         /*
308                          * If we are not going to produce an XML declaration, eat a single
309                          * newline in the original string to prevent empty first lines in
310                          * the output.
311                          */
312                         if (*(str + len) == '\n')
313                                 len += 1;
314                 }
315                 appendStringInfoString(&buf, str + len);
316
317                 pfree(str);
318
319                 return buf.data;
320         }
321
322         xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
323                                                 "could not parse XML declaration in stored value",
324                                                 res_code);
325 #endif
326         return str;
327 }
328
329
330 Datum
331 xml_out(PG_FUNCTION_ARGS)
332 {
333         xmltype    *x = PG_GETARG_XML_P(0);
334
335         /*
336          * xml_out removes the encoding property in all cases.  This is because we
337          * cannot control from here whether the datum will be converted to a
338          * different client encoding, so we'd do more harm than good by including
339          * it.
340          */
341         PG_RETURN_CSTRING(xml_out_internal(x, 0));
342 }
343
344
345 Datum
346 xml_recv(PG_FUNCTION_ARGS)
347 {
348 #ifdef USE_LIBXML
349         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
350         xmltype    *result;
351         char       *str;
352         char       *newstr;
353         int                     nbytes;
354         xmlDocPtr       doc;
355         xmlChar    *encodingStr = NULL;
356         int                     encoding;
357
358         /*
359          * Read the data in raw format. We don't know yet what the encoding is, as
360          * that information is embedded in the xml declaration; so we have to
361          * parse that before converting to server encoding.
362          */
363         nbytes = buf->len - buf->cursor;
364         str = (char *) pq_getmsgbytes(buf, nbytes);
365
366         /*
367          * We need a null-terminated string to pass to parse_xml_decl().  Rather
368          * than make a separate copy, make the temporary result one byte bigger
369          * than it needs to be.
370          */
371         result = palloc(nbytes + 1 + VARHDRSZ);
372         SET_VARSIZE(result, nbytes + VARHDRSZ);
373         memcpy(VARDATA(result), str, nbytes);
374         str = VARDATA(result);
375         str[nbytes] = '\0';
376
377         parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
378
379         /*
380          * If encoding wasn't explicitly specified in the XML header, treat it as
381          * UTF-8, as that's the default in XML. This is different from xml_in(),
382          * where the input has to go through the normal client to server encoding
383          * conversion.
384          */
385         encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
386
387         /*
388          * Parse the data to check if it is well-formed XML data.  Assume that
389          * xml_parse will throw ERROR if not.
390          */
391         doc = xml_parse(result, xmloption, true, encoding);
392         xmlFreeDoc(doc);
393
394         /* Now that we know what we're dealing with, convert to server encoding */
395         newstr = pg_any_to_server(str, nbytes, encoding);
396
397         if (newstr != str)
398         {
399                 pfree(result);
400                 result = (xmltype *) cstring_to_text(newstr);
401                 pfree(newstr);
402         }
403
404         PG_RETURN_XML_P(result);
405 #else
406         NO_XML_SUPPORT();
407         return 0;
408 #endif
409 }
410
411
412 Datum
413 xml_send(PG_FUNCTION_ARGS)
414 {
415         xmltype    *x = PG_GETARG_XML_P(0);
416         char       *outval;
417         StringInfoData buf;
418
419         /*
420          * xml_out_internal doesn't convert the encoding, it just prints the right
421          * declaration. pq_sendtext will do the conversion.
422          */
423         outval = xml_out_internal(x, pg_get_client_encoding());
424
425         pq_begintypsend(&buf);
426         pq_sendtext(&buf, outval, strlen(outval));
427         pfree(outval);
428         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
429 }
430
431
432 #ifdef USE_LIBXML
433 static void
434 appendStringInfoText(StringInfo str, const text *t)
435 {
436         appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
437 }
438 #endif
439
440
441 static xmltype *
442 stringinfo_to_xmltype(StringInfo buf)
443 {
444         return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
445 }
446
447
448 static xmltype *
449 cstring_to_xmltype(const char *string)
450 {
451         return (xmltype *) cstring_to_text(string);
452 }
453
454
455 #ifdef USE_LIBXML
456 static xmltype *
457 xmlBuffer_to_xmltype(xmlBufferPtr buf)
458 {
459         return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
460                                                                                                 xmlBufferLength(buf));
461 }
462 #endif
463
464
465 Datum
466 xmlcomment(PG_FUNCTION_ARGS)
467 {
468 #ifdef USE_LIBXML
469         text       *arg = PG_GETARG_TEXT_PP(0);
470         char       *argdata = VARDATA_ANY(arg);
471         int                     len = VARSIZE_ANY_EXHDR(arg);
472         StringInfoData buf;
473         int                     i;
474
475         /* check for "--" in string or "-" at the end */
476         for (i = 1; i < len; i++)
477         {
478                 if (argdata[i] == '-' && argdata[i - 1] == '-')
479                         ereport(ERROR,
480                                         (errcode(ERRCODE_INVALID_XML_COMMENT),
481                                          errmsg("invalid XML comment")));
482         }
483         if (len > 0 && argdata[len - 1] == '-')
484                 ereport(ERROR,
485                                 (errcode(ERRCODE_INVALID_XML_COMMENT),
486                                  errmsg("invalid XML comment")));
487
488         initStringInfo(&buf);
489         appendStringInfoString(&buf, "<!--");
490         appendStringInfoText(&buf, arg);
491         appendStringInfoString(&buf, "-->");
492
493         PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
494 #else
495         NO_XML_SUPPORT();
496         return 0;
497 #endif
498 }
499
500
501
502 /*
503  * TODO: xmlconcat needs to merge the notations and unparsed entities
504  * of the argument values.  Not very important in practice, though.
505  */
506 xmltype *
507 xmlconcat(List *args)
508 {
509 #ifdef USE_LIBXML
510         int                     global_standalone = 1;
511         xmlChar    *global_version = NULL;
512         bool            global_version_no_value = false;
513         StringInfoData buf;
514         ListCell   *v;
515
516         initStringInfo(&buf);
517         foreach(v, args)
518         {
519                 xmltype    *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
520                 size_t          len;
521                 xmlChar    *version;
522                 int                     standalone;
523                 char       *str;
524
525                 len = VARSIZE(x) - VARHDRSZ;
526                 str = text_to_cstring((text *) x);
527
528                 parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
529
530                 if (standalone == 0 && global_standalone == 1)
531                         global_standalone = 0;
532                 if (standalone < 0)
533                         global_standalone = -1;
534
535                 if (!version)
536                         global_version_no_value = true;
537                 else if (!global_version)
538                         global_version = version;
539                 else if (xmlStrcmp(version, global_version) != 0)
540                         global_version_no_value = true;
541
542                 appendStringInfoString(&buf, str + len);
543                 pfree(str);
544         }
545
546         if (!global_version_no_value || global_standalone >= 0)
547         {
548                 StringInfoData buf2;
549
550                 initStringInfo(&buf2);
551
552                 print_xml_decl(&buf2,
553                                            (!global_version_no_value) ? global_version : NULL,
554                                            0,
555                                            global_standalone);
556
557                 appendStringInfoString(&buf2, buf.data);
558                 buf = buf2;
559         }
560
561         return stringinfo_to_xmltype(&buf);
562 #else
563         NO_XML_SUPPORT();
564         return NULL;
565 #endif
566 }
567
568
569 /*
570  * XMLAGG support
571  */
572 Datum
573 xmlconcat2(PG_FUNCTION_ARGS)
574 {
575         if (PG_ARGISNULL(0))
576         {
577                 if (PG_ARGISNULL(1))
578                         PG_RETURN_NULL();
579                 else
580                         PG_RETURN_XML_P(PG_GETARG_XML_P(1));
581         }
582         else if (PG_ARGISNULL(1))
583                 PG_RETURN_XML_P(PG_GETARG_XML_P(0));
584         else
585                 PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
586                                                                                          PG_GETARG_XML_P(1))));
587 }
588
589
590 Datum
591 texttoxml(PG_FUNCTION_ARGS)
592 {
593         text       *data = PG_GETARG_TEXT_PP(0);
594
595         PG_RETURN_XML_P(xmlparse(data, xmloption, true));
596 }
597
598
599 Datum
600 xmltotext(PG_FUNCTION_ARGS)
601 {
602         xmltype    *data = PG_GETARG_XML_P(0);
603
604         /* It's actually binary compatible. */
605         PG_RETURN_TEXT_P((text *) data);
606 }
607
608
609 text *
610 xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg)
611 {
612         if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data))
613                 ereport(ERROR,
614                                 (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
615                                  errmsg("not an XML document")));
616
617         /* It's actually binary compatible, save for the above check. */
618         return (text *) data;
619 }
620
621
622 xmltype *
623 xmlelement(XmlExprState *xmlExpr, ExprContext *econtext)
624 {
625 #ifdef USE_LIBXML
626         XmlExpr    *xexpr = (XmlExpr *) xmlExpr->xprstate.expr;
627         xmltype    *result;
628         List       *named_arg_strings;
629         List       *arg_strings;
630         int                     i;
631         ListCell   *arg;
632         ListCell   *narg;
633         PgXmlErrorContext *xmlerrcxt;
634         volatile xmlBufferPtr buf = NULL;
635         volatile xmlTextWriterPtr writer = NULL;
636
637         /*
638          * We first evaluate all the arguments, then start up libxml and create
639          * the result.  This avoids issues if one of the arguments involves a call
640          * to some other function or subsystem that wants to use libxml on its own
641          * terms.
642          */
643         named_arg_strings = NIL;
644         i = 0;
645         foreach(arg, xmlExpr->named_args)
646         {
647                 ExprState  *e = (ExprState *) lfirst(arg);
648                 Datum           value;
649                 bool            isnull;
650                 char       *str;
651
652                 value = ExecEvalExpr(e, econtext, &isnull);
653                 if (isnull)
654                         str = NULL;
655                 else
656                         str = map_sql_value_to_xml_value(value, exprType((Node *) e->expr), false);
657                 named_arg_strings = lappend(named_arg_strings, str);
658                 i++;
659         }
660
661         arg_strings = NIL;
662         foreach(arg, xmlExpr->args)
663         {
664                 ExprState  *e = (ExprState *) lfirst(arg);
665                 Datum           value;
666                 bool            isnull;
667                 char       *str;
668
669                 value = ExecEvalExpr(e, econtext, &isnull);
670                 /* here we can just forget NULL elements immediately */
671                 if (!isnull)
672                 {
673                         str = map_sql_value_to_xml_value(value,
674                                                                                    exprType((Node *) e->expr), true);
675                         arg_strings = lappend(arg_strings, str);
676                 }
677         }
678
679         /* now safe to run libxml */
680         xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
681
682         PG_TRY();
683         {
684                 buf = xmlBufferCreate();
685                 if (buf == NULL || xmlerrcxt->err_occurred)
686                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
687                                                 "could not allocate xmlBuffer");
688                 writer = xmlNewTextWriterMemory(buf, 0);
689                 if (writer == NULL || xmlerrcxt->err_occurred)
690                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
691                                                 "could not allocate xmlTextWriter");
692
693                 xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
694
695                 forboth(arg, named_arg_strings, narg, xexpr->arg_names)
696                 {
697                         char       *str = (char *) lfirst(arg);
698                         char       *argname = strVal(lfirst(narg));
699
700                         if (str)
701                                 xmlTextWriterWriteAttribute(writer,
702                                                                                         (xmlChar *) argname,
703                                                                                         (xmlChar *) str);
704                 }
705
706                 foreach(arg, arg_strings)
707                 {
708                         char       *str = (char *) lfirst(arg);
709
710                         xmlTextWriterWriteRaw(writer, (xmlChar *) str);
711                 }
712
713                 xmlTextWriterEndElement(writer);
714
715                 /* we MUST do this now to flush data out to the buffer ... */
716                 xmlFreeTextWriter(writer);
717                 writer = NULL;
718
719                 result = xmlBuffer_to_xmltype(buf);
720         }
721         PG_CATCH();
722         {
723                 if (writer)
724                         xmlFreeTextWriter(writer);
725                 if (buf)
726                         xmlBufferFree(buf);
727
728                 pg_xml_done(xmlerrcxt, true);
729
730                 PG_RE_THROW();
731         }
732         PG_END_TRY();
733
734         xmlBufferFree(buf);
735
736         pg_xml_done(xmlerrcxt, false);
737
738         return result;
739 #else
740         NO_XML_SUPPORT();
741         return NULL;
742 #endif
743 }
744
745
746 xmltype *
747 xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
748 {
749 #ifdef USE_LIBXML
750         xmlDocPtr       doc;
751
752         doc = xml_parse(data, xmloption_arg, preserve_whitespace,
753                                         GetDatabaseEncoding());
754         xmlFreeDoc(doc);
755
756         return (xmltype *) data;
757 #else
758         NO_XML_SUPPORT();
759         return NULL;
760 #endif
761 }
762
763
764 xmltype *
765 xmlpi(char *target, text *arg, bool arg_is_null, bool *result_is_null)
766 {
767 #ifdef USE_LIBXML
768         xmltype    *result;
769         StringInfoData buf;
770
771         if (pg_strcasecmp(target, "xml") == 0)
772                 ereport(ERROR,
773                                 (errcode(ERRCODE_SYNTAX_ERROR), /* really */
774                                  errmsg("invalid XML processing instruction"),
775                                  errdetail("XML processing instruction target name cannot be \"%s\".", target)));
776
777         /*
778          * Following the SQL standard, the null check comes after the syntax check
779          * above.
780          */
781         *result_is_null = arg_is_null;
782         if (*result_is_null)
783                 return NULL;
784
785         initStringInfo(&buf);
786
787         appendStringInfo(&buf, "<?%s", target);
788
789         if (arg != NULL)
790         {
791                 char       *string;
792
793                 string = text_to_cstring(arg);
794                 if (strstr(string, "?>") != NULL)
795                         ereport(ERROR,
796                                         (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
797                                          errmsg("invalid XML processing instruction"),
798                         errdetail("XML processing instruction cannot contain \"?>\".")));
799
800                 appendStringInfoChar(&buf, ' ');
801                 appendStringInfoString(&buf, string + strspn(string, " "));
802                 pfree(string);
803         }
804         appendStringInfoString(&buf, "?>");
805
806         result = stringinfo_to_xmltype(&buf);
807         pfree(buf.data);
808         return result;
809 #else
810         NO_XML_SUPPORT();
811         return NULL;
812 #endif
813 }
814
815
816 xmltype *
817 xmlroot(xmltype *data, text *version, int standalone)
818 {
819 #ifdef USE_LIBXML
820         char       *str;
821         size_t          len;
822         xmlChar    *orig_version;
823         int                     orig_standalone;
824         StringInfoData buf;
825
826         len = VARSIZE(data) - VARHDRSZ;
827         str = text_to_cstring((text *) data);
828
829         parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
830
831         if (version)
832                 orig_version = xml_text2xmlChar(version);
833         else
834                 orig_version = NULL;
835
836         switch (standalone)
837         {
838                 case XML_STANDALONE_YES:
839                         orig_standalone = 1;
840                         break;
841                 case XML_STANDALONE_NO:
842                         orig_standalone = 0;
843                         break;
844                 case XML_STANDALONE_NO_VALUE:
845                         orig_standalone = -1;
846                         break;
847                 case XML_STANDALONE_OMITTED:
848                         /* leave original value */
849                         break;
850         }
851
852         initStringInfo(&buf);
853         print_xml_decl(&buf, orig_version, 0, orig_standalone);
854         appendStringInfoString(&buf, str + len);
855
856         return stringinfo_to_xmltype(&buf);
857 #else
858         NO_XML_SUPPORT();
859         return NULL;
860 #endif
861 }
862
863
864 /*
865  * Validate document (given as string) against DTD (given as external link)
866  *
867  * This has been removed because it is a security hole: unprivileged users
868  * should not be able to use Postgres to fetch arbitrary external files,
869  * which unfortunately is exactly what libxml is willing to do with the DTD
870  * parameter.
871  */
872 Datum
873 xmlvalidate(PG_FUNCTION_ARGS)
874 {
875         ereport(ERROR,
876                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
877                          errmsg("xmlvalidate is not implemented")));
878         return 0;
879 }
880
881
882 bool
883 xml_is_document(xmltype *arg)
884 {
885 #ifdef USE_LIBXML
886         bool            result;
887         volatile xmlDocPtr doc = NULL;
888         MemoryContext ccxt = CurrentMemoryContext;
889
890         /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
891         PG_TRY();
892         {
893                 doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
894                                                 GetDatabaseEncoding());
895                 result = true;
896         }
897         PG_CATCH();
898         {
899                 ErrorData  *errdata;
900                 MemoryContext ecxt;
901
902                 ecxt = MemoryContextSwitchTo(ccxt);
903                 errdata = CopyErrorData();
904                 if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT)
905                 {
906                         FlushErrorState();
907                         result = false;
908                 }
909                 else
910                 {
911                         MemoryContextSwitchTo(ecxt);
912                         PG_RE_THROW();
913                 }
914         }
915         PG_END_TRY();
916
917         if (doc)
918                 xmlFreeDoc(doc);
919
920         return result;
921 #else                                                   /* not USE_LIBXML */
922         NO_XML_SUPPORT();
923         return false;
924 #endif   /* not USE_LIBXML */
925 }
926
927
928 #ifdef USE_LIBXML
929
930 /*
931  * pg_xml_init_library --- set up for use of libxml
932  *
933  * This should be called by each function that is about to use libxml
934  * facilities but doesn't require error handling.  It initializes libxml
935  * and verifies compatibility with the loaded libxml version.  These are
936  * once-per-session activities.
937  *
938  * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
939  * check)
940  */
941 void
942 pg_xml_init_library(void)
943 {
944         static bool first_time = true;
945
946         if (first_time)
947         {
948                 /* Stuff we need do only once per session */
949
950                 /*
951                  * Currently, we have no pure UTF-8 support for internals -- check if
952                  * we can work.
953                  */
954                 if (sizeof(char) != sizeof(xmlChar))
955                         ereport(ERROR,
956                                         (errmsg("could not initialize XML library"),
957                                          errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
958                                                            (int) sizeof(char), (int) sizeof(xmlChar))));
959
960 #ifdef USE_LIBXMLCONTEXT
961                 /* Set up libxml's memory allocation our way */
962                 xml_memory_init();
963 #endif
964
965                 /* Check library compatibility */
966                 LIBXML_TEST_VERSION;
967
968                 first_time = false;
969         }
970 }
971
972 /*
973  * pg_xml_init --- set up for use of libxml and register an error handler
974  *
975  * This should be called by each function that is about to use libxml
976  * facilities and requires error handling.  It initializes libxml with
977  * pg_xml_init_library() and establishes our libxml error handler.
978  *
979  * strictness determines which errors are reported and which are ignored.
980  *
981  * Calls to this function MUST be followed by a PG_TRY block that guarantees
982  * that pg_xml_done() is called during either normal or error exit.
983  *
984  * This is exported for use by contrib/xml2, as well as other code that might
985  * wish to share use of this module's libxml error handler.
986  */
987 PgXmlErrorContext *
988 pg_xml_init(PgXmlStrictness strictness)
989 {
990         PgXmlErrorContext *errcxt;
991         void       *new_errcxt;
992
993         /* Do one-time setup if needed */
994         pg_xml_init_library();
995
996         /* Create error handling context structure */
997         errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
998         errcxt->magic = ERRCXT_MAGIC;
999         errcxt->strictness = strictness;
1000         errcxt->err_occurred = false;
1001         initStringInfo(&errcxt->err_buf);
1002
1003         /*
1004          * Save original error handler and install ours. libxml originally didn't
1005          * distinguish between the contexts for generic and for structured error
1006          * handlers.  If we're using an old libxml version, we must thus save the
1007          * generic error context, even though we're using a structured error
1008          * handler.
1009          */
1010         errcxt->saved_errfunc = xmlStructuredError;
1011
1012 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1013         errcxt->saved_errcxt = xmlStructuredErrorContext;
1014 #else
1015         errcxt->saved_errcxt = xmlGenericErrorContext;
1016 #endif
1017
1018         xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler);
1019
1020         /*
1021          * Verify that xmlSetStructuredErrorFunc set the context variable we
1022          * expected it to.  If not, the error context pointer we just saved is not
1023          * the correct thing to restore, and since that leaves us without a way to
1024          * restore the context in pg_xml_done, we must fail.
1025          *
1026          * The only known situation in which this test fails is if we compile with
1027          * headers from a libxml2 that doesn't track the structured error context
1028          * separately (< 2.7.4), but at runtime use a version that does, or vice
1029          * versa.  The libxml2 authors did not treat that change as constituting
1030          * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1031          * fails to protect us from this.
1032          */
1033
1034 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1035         new_errcxt = xmlStructuredErrorContext;
1036 #else
1037         new_errcxt = xmlGenericErrorContext;
1038 #endif
1039
1040         if (new_errcxt != (void *) errcxt)
1041                 ereport(ERROR,
1042                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1043                                  errmsg("could not set up XML error handler"),
1044                                  errhint("This probably indicates that the version of libxml2"
1045                                                  " being used is not compatible with the libxml2"
1046                                                  " header files that PostgreSQL was built with.")));
1047
1048         /*
1049          * Also, install an entity loader to prevent unwanted fetches of external
1050          * files and URLs.
1051          */
1052         errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1053         xmlSetExternalEntityLoader(xmlPgEntityLoader);
1054
1055         return errcxt;
1056 }
1057
1058
1059 /*
1060  * pg_xml_done --- restore previous libxml error handling
1061  *
1062  * Resets libxml's global error-handling state to what it was before
1063  * pg_xml_init() was called.
1064  *
1065  * This routine verifies that all pending errors have been dealt with
1066  * (in assert-enabled builds, anyway).
1067  */
1068 void
1069 pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1070 {
1071         void       *cur_errcxt;
1072
1073         /* An assert seems like enough protection here */
1074         Assert(errcxt->magic == ERRCXT_MAGIC);
1075
1076         /*
1077          * In a normal exit, there should be no un-handled libxml errors.  But we
1078          * shouldn't try to enforce this during error recovery, since the longjmp
1079          * could have been thrown before xml_ereport had a chance to run.
1080          */
1081         Assert(!errcxt->err_occurred || isError);
1082
1083         /*
1084          * Check that libxml's global state is correct, warn if not.  This is a
1085          * real test and not an Assert because it has a higher probability of
1086          * happening.
1087          */
1088 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1089         cur_errcxt = xmlStructuredErrorContext;
1090 #else
1091         cur_errcxt = xmlGenericErrorContext;
1092 #endif
1093
1094         if (cur_errcxt != (void *) errcxt)
1095                 elog(WARNING, "libxml error handling state is out of sync with xml.c");
1096
1097         /* Restore the saved handlers */
1098         xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1099         xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1100
1101         /*
1102          * Mark the struct as invalid, just in case somebody somehow manages to
1103          * call xml_errorHandler or xml_ereport with it.
1104          */
1105         errcxt->magic = 0;
1106
1107         /* Release memory */
1108         pfree(errcxt->err_buf.data);
1109         pfree(errcxt);
1110 }
1111
1112
1113 /*
1114  * pg_xml_error_occurred() --- test the error flag
1115  */
1116 bool
1117 pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1118 {
1119         return errcxt->err_occurred;
1120 }
1121
1122
1123 /*
1124  * SQL/XML allows storing "XML documents" or "XML content".  "XML
1125  * documents" are specified by the XML specification and are parsed
1126  * easily by libxml.  "XML content" is specified by SQL/XML as the
1127  * production "XMLDecl? content".  But libxml can only parse the
1128  * "content" part, so we have to parse the XML declaration ourselves
1129  * to complete this.
1130  */
1131
1132 #define CHECK_XML_SPACE(p) \
1133         do { \
1134                 if (!xmlIsBlank_ch(*(p))) \
1135                         return XML_ERR_SPACE_REQUIRED; \
1136         } while (0)
1137
1138 #define SKIP_XML_SPACE(p) \
1139         while (xmlIsBlank_ch(*(p))) (p)++
1140
1141 /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1142 /* Beware of multiple evaluations of argument! */
1143 #define PG_XMLISNAMECHAR(c) \
1144         (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1145                         || xmlIsDigit_ch(c) \
1146                         || c == '.' || c == '-' || c == '_' || c == ':' \
1147                         || xmlIsCombiningQ(c) \
1148                         || xmlIsExtender_ch(c))
1149
1150 /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1151 static xmlChar *
1152 xml_pnstrdup(const xmlChar *str, size_t len)
1153 {
1154         xmlChar    *result;
1155
1156         result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1157         memcpy(result, str, len * sizeof(xmlChar));
1158         result[len] = 0;
1159         return result;
1160 }
1161
1162 /* Ditto, except input is char* */
1163 static xmlChar *
1164 pg_xmlCharStrndup(char *str, size_t len)
1165 {
1166         xmlChar    *result;
1167
1168         result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1169         memcpy(result, str, len);
1170         result[len] = '\0';
1171
1172         return result;
1173 }
1174
1175 /*
1176  * str is the null-terminated input string.  Remaining arguments are
1177  * output arguments; each can be NULL if value is not wanted.
1178  * version and encoding are returned as locally-palloc'd strings.
1179  * Result is 0 if OK, an error code if not.
1180  */
1181 static int
1182 parse_xml_decl(const xmlChar *str, size_t *lenp,
1183                            xmlChar **version, xmlChar **encoding, int *standalone)
1184 {
1185         const xmlChar *p;
1186         const xmlChar *save_p;
1187         size_t          len;
1188         int                     utf8char;
1189         int                     utf8len;
1190
1191         /*
1192          * Only initialize libxml.  We don't need error handling here, but we do
1193          * need to make sure libxml is initialized before calling any of its
1194          * functions.  Note that this is safe (and a no-op) if caller has already
1195          * done pg_xml_init().
1196          */
1197         pg_xml_init_library();
1198
1199         /* Initialize output arguments to "not present" */
1200         if (version)
1201                 *version = NULL;
1202         if (encoding)
1203                 *encoding = NULL;
1204         if (standalone)
1205                 *standalone = -1;
1206
1207         p = str;
1208
1209         if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1210                 goto finished;
1211
1212         /* if next char is name char, it's a PI like <?xml-stylesheet ...?> */
1213         utf8len = strlen((const char *) (p + 5));
1214         utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1215         if (PG_XMLISNAMECHAR(utf8char))
1216                 goto finished;
1217
1218         p += 5;
1219
1220         /* version */
1221         CHECK_XML_SPACE(p);
1222         SKIP_XML_SPACE(p);
1223         if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1224                 return XML_ERR_VERSION_MISSING;
1225         p += 7;
1226         SKIP_XML_SPACE(p);
1227         if (*p != '=')
1228                 return XML_ERR_VERSION_MISSING;
1229         p += 1;
1230         SKIP_XML_SPACE(p);
1231
1232         if (*p == '\'' || *p == '"')
1233         {
1234                 const xmlChar *q;
1235
1236                 q = xmlStrchr(p + 1, *p);
1237                 if (!q)
1238                         return XML_ERR_VERSION_MISSING;
1239
1240                 if (version)
1241                         *version = xml_pnstrdup(p + 1, q - p - 1);
1242                 p = q + 1;
1243         }
1244         else
1245                 return XML_ERR_VERSION_MISSING;
1246
1247         /* encoding */
1248         save_p = p;
1249         SKIP_XML_SPACE(p);
1250         if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1251         {
1252                 CHECK_XML_SPACE(save_p);
1253                 p += 8;
1254                 SKIP_XML_SPACE(p);
1255                 if (*p != '=')
1256                         return XML_ERR_MISSING_ENCODING;
1257                 p += 1;
1258                 SKIP_XML_SPACE(p);
1259
1260                 if (*p == '\'' || *p == '"')
1261                 {
1262                         const xmlChar *q;
1263
1264                         q = xmlStrchr(p + 1, *p);
1265                         if (!q)
1266                                 return XML_ERR_MISSING_ENCODING;
1267
1268                         if (encoding)
1269                                 *encoding = xml_pnstrdup(p + 1, q - p - 1);
1270                         p = q + 1;
1271                 }
1272                 else
1273                         return XML_ERR_MISSING_ENCODING;
1274         }
1275         else
1276         {
1277                 p = save_p;
1278         }
1279
1280         /* standalone */
1281         save_p = p;
1282         SKIP_XML_SPACE(p);
1283         if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1284         {
1285                 CHECK_XML_SPACE(save_p);
1286                 p += 10;
1287                 SKIP_XML_SPACE(p);
1288                 if (*p != '=')
1289                         return XML_ERR_STANDALONE_VALUE;
1290                 p += 1;
1291                 SKIP_XML_SPACE(p);
1292                 if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1293                         xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1294                 {
1295                         if (standalone)
1296                                 *standalone = 1;
1297                         p += 5;
1298                 }
1299                 else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1300                                  xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1301                 {
1302                         if (standalone)
1303                                 *standalone = 0;
1304                         p += 4;
1305                 }
1306                 else
1307                         return XML_ERR_STANDALONE_VALUE;
1308         }
1309         else
1310         {
1311                 p = save_p;
1312         }
1313
1314         SKIP_XML_SPACE(p);
1315         if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1316                 return XML_ERR_XMLDECL_NOT_FINISHED;
1317         p += 2;
1318
1319 finished:
1320         len = p - str;
1321
1322         for (p = str; p < str + len; p++)
1323                 if (*p > 127)
1324                         return XML_ERR_INVALID_CHAR;
1325
1326         if (lenp)
1327                 *lenp = len;
1328
1329         return XML_ERR_OK;
1330 }
1331
1332
1333 /*
1334  * Write an XML declaration.  On output, we adjust the XML declaration
1335  * as follows.  (These rules are the moral equivalent of the clause
1336  * "Serialization of an XML value" in the SQL standard.)
1337  *
1338  * We try to avoid generating an XML declaration if possible.  This is
1339  * so that you don't get trivial things like xml '<foo/>' resulting in
1340  * '<?xml version="1.0"?><foo/>', which would surely be annoying.  We
1341  * must provide a declaration if the standalone property is specified
1342  * or if we include an encoding declaration.  If we have a
1343  * declaration, we must specify a version (XML requires this).
1344  * Otherwise we only make a declaration if the version is not "1.0",
1345  * which is the default version specified in SQL:2003.
1346  */
1347 static bool
1348 print_xml_decl(StringInfo buf, const xmlChar *version,
1349                            pg_enc encoding, int standalone)
1350 {
1351         if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1352                 || (encoding && encoding != PG_UTF8)
1353                 || standalone != -1)
1354         {
1355                 appendStringInfoString(buf, "<?xml");
1356
1357                 if (version)
1358                         appendStringInfo(buf, " version=\"%s\"", version);
1359                 else
1360                         appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1361
1362                 if (encoding && encoding != PG_UTF8)
1363                 {
1364                         /*
1365                          * XXX might be useful to convert this to IANA names (ISO-8859-1
1366                          * instead of LATIN1 etc.); needs field experience
1367                          */
1368                         appendStringInfo(buf, " encoding=\"%s\"",
1369                                                          pg_encoding_to_char(encoding));
1370                 }
1371
1372                 if (standalone == 1)
1373                         appendStringInfoString(buf, " standalone=\"yes\"");
1374                 else if (standalone == 0)
1375                         appendStringInfoString(buf, " standalone=\"no\"");
1376                 appendStringInfoString(buf, "?>");
1377
1378                 return true;
1379         }
1380         else
1381                 return false;
1382 }
1383
1384
1385 /*
1386  * Convert a C string to XML internal representation
1387  *
1388  * Note: it is caller's responsibility to xmlFreeDoc() the result,
1389  * else a permanent memory leak will ensue!
1390  *
1391  * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1392  * yet do not use SAX - see xmlreader.c)
1393  */
1394 static xmlDocPtr
1395 xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1396                   int encoding)
1397 {
1398         int32           len;
1399         xmlChar    *string;
1400         xmlChar    *utf8string;
1401         PgXmlErrorContext *xmlerrcxt;
1402         volatile xmlParserCtxtPtr ctxt = NULL;
1403         volatile xmlDocPtr doc = NULL;
1404
1405         len = VARSIZE_ANY_EXHDR(data);          /* will be useful later */
1406         string = xml_text2xmlChar(data);
1407
1408         utf8string = pg_do_encoding_conversion(string,
1409                                                                                    len,
1410                                                                                    encoding,
1411                                                                                    PG_UTF8);
1412
1413         /* Start up libxml and its parser */
1414         xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1415
1416         /* Use a TRY block to ensure we clean up correctly */
1417         PG_TRY();
1418         {
1419                 xmlInitParser();
1420
1421                 ctxt = xmlNewParserCtxt();
1422                 if (ctxt == NULL || xmlerrcxt->err_occurred)
1423                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1424                                                 "could not allocate parser context");
1425
1426                 if (xmloption_arg == XMLOPTION_DOCUMENT)
1427                 {
1428                         /*
1429                          * Note, that here we try to apply DTD defaults
1430                          * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
1431                          * 'Default values defined by internal DTD are applied'. As for
1432                          * external DTDs, we try to support them too, (see SQL/XML:2008 GR
1433                          * 10.16.7.e)
1434                          */
1435                         doc = xmlCtxtReadDoc(ctxt, utf8string,
1436                                                                  NULL,
1437                                                                  "UTF-8",
1438                                                                  XML_PARSE_NOENT | XML_PARSE_DTDATTR
1439                                                    | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
1440                         if (doc == NULL || xmlerrcxt->err_occurred)
1441                                 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
1442                                                         "invalid XML document");
1443                 }
1444                 else
1445                 {
1446                         int                     res_code;
1447                         size_t          count;
1448                         xmlChar    *version;
1449                         int                     standalone;
1450
1451                         res_code = parse_xml_decl(utf8string,
1452                                                                           &count, &version, NULL, &standalone);
1453                         if (res_code != 0)
1454                                 xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
1455                                                           "invalid XML content: invalid XML declaration",
1456                                                                         res_code);
1457
1458                         doc = xmlNewDoc(version);
1459                         Assert(doc->encoding == NULL);
1460                         doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1461                         doc->standalone = standalone;
1462
1463                         /* allow empty content */
1464                         if (*(utf8string + count))
1465                         {
1466                                 res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1467                                                                                                    utf8string + count, NULL);
1468                                 if (res_code != 0 || xmlerrcxt->err_occurred)
1469                                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1470                                                                 "invalid XML content");
1471                         }
1472                 }
1473         }
1474         PG_CATCH();
1475         {
1476                 if (doc != NULL)
1477                         xmlFreeDoc(doc);
1478                 if (ctxt != NULL)
1479                         xmlFreeParserCtxt(ctxt);
1480
1481                 pg_xml_done(xmlerrcxt, true);
1482
1483                 PG_RE_THROW();
1484         }
1485         PG_END_TRY();
1486
1487         xmlFreeParserCtxt(ctxt);
1488
1489         pg_xml_done(xmlerrcxt, false);
1490
1491         return doc;
1492 }
1493
1494
1495 /*
1496  * xmlChar<->text conversions
1497  */
1498 static xmlChar *
1499 xml_text2xmlChar(text *in)
1500 {
1501         return (xmlChar *) text_to_cstring(in);
1502 }
1503
1504
1505 #ifdef USE_LIBXMLCONTEXT
1506
1507 /*
1508  * Manage the special context used for all libxml allocations (but only
1509  * in special debug builds; see notes at top of file)
1510  */
1511 static void
1512 xml_memory_init(void)
1513 {
1514         /* Create memory context if not there already */
1515         if (LibxmlContext == NULL)
1516                 LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1517                                                                                           "Libxml context",
1518                                                                                           ALLOCSET_DEFAULT_SIZES);
1519
1520         /* Re-establish the callbacks even if already set */
1521         xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1522 }
1523
1524 /*
1525  * Wrappers for memory management functions
1526  */
1527 static void *
1528 xml_palloc(size_t size)
1529 {
1530         return MemoryContextAlloc(LibxmlContext, size);
1531 }
1532
1533
1534 static void *
1535 xml_repalloc(void *ptr, size_t size)
1536 {
1537         return repalloc(ptr, size);
1538 }
1539
1540
1541 static void
1542 xml_pfree(void *ptr)
1543 {
1544         /* At least some parts of libxml assume xmlFree(NULL) is allowed */
1545         if (ptr)
1546                 pfree(ptr);
1547 }
1548
1549
1550 static char *
1551 xml_pstrdup(const char *string)
1552 {
1553         return MemoryContextStrdup(LibxmlContext, string);
1554 }
1555 #endif   /* USE_LIBXMLCONTEXT */
1556
1557
1558 /*
1559  * xmlPgEntityLoader --- entity loader callback function
1560  *
1561  * Silently prevent any external entity URL from being loaded.  We don't want
1562  * to throw an error, so instead make the entity appear to expand to an empty
1563  * string.
1564  *
1565  * We would prefer to allow loading entities that exist in the system's
1566  * global XML catalog; but the available libxml2 APIs make that a complex
1567  * and fragile task.  For now, just shut down all external access.
1568  */
1569 static xmlParserInputPtr
1570 xmlPgEntityLoader(const char *URL, const char *ID,
1571                                   xmlParserCtxtPtr ctxt)
1572 {
1573         return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
1574 }
1575
1576
1577 /*
1578  * xml_ereport --- report an XML-related error
1579  *
1580  * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
1581  * standard.  This function adds libxml's native error message, if any, as
1582  * detail.
1583  *
1584  * This is exported for modules that want to share the core libxml error
1585  * handler.  Note that pg_xml_init() *must* have been called previously.
1586  */
1587 void
1588 xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
1589 {
1590         char       *detail;
1591
1592         /* Defend against someone passing us a bogus context struct */
1593         if (errcxt->magic != ERRCXT_MAGIC)
1594                 elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
1595
1596         /* Flag that the current libxml error has been reported */
1597         errcxt->err_occurred = false;
1598
1599         /* Include detail only if we have some text from libxml */
1600         if (errcxt->err_buf.len > 0)
1601                 detail = errcxt->err_buf.data;
1602         else
1603                 detail = NULL;
1604
1605         ereport(level,
1606                         (errcode(sqlcode),
1607                          errmsg_internal("%s", msg),
1608                          detail ? errdetail_internal("%s", detail) : 0));
1609 }
1610
1611
1612 /*
1613  * Error handler for libxml errors and warnings
1614  */
1615 static void
1616 xml_errorHandler(void *data, xmlErrorPtr error)
1617 {
1618         PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
1619         xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
1620         xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
1621         xmlNodePtr      node = error->node;
1622         const xmlChar *name = (node != NULL &&
1623                                                  node->type == XML_ELEMENT_NODE) ? node->name : NULL;
1624         int                     domain = error->domain;
1625         int                     level = error->level;
1626         StringInfo      errorBuf;
1627
1628         /*
1629          * Defend against someone passing us a bogus context struct.
1630          *
1631          * We force a backend exit if this check fails because longjmp'ing out of
1632          * libxml would likely render it unsafe to use further.
1633          */
1634         if (xmlerrcxt->magic != ERRCXT_MAGIC)
1635                 elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
1636
1637         /*----------
1638          * Older libxml versions report some errors differently.
1639          * First, some errors were previously reported as coming from the parser
1640          * domain but are now reported as coming from the namespace domain.
1641          * Second, some warnings were upgraded to errors.
1642          * We attempt to compensate for that here.
1643          *----------
1644          */
1645         switch (error->code)
1646         {
1647                 case XML_WAR_NS_URI:
1648                         level = XML_ERR_ERROR;
1649                         domain = XML_FROM_NAMESPACE;
1650                         break;
1651
1652                 case XML_ERR_NS_DECL_ERROR:
1653                 case XML_WAR_NS_URI_RELATIVE:
1654                 case XML_WAR_NS_COLUMN:
1655                 case XML_NS_ERR_XML_NAMESPACE:
1656                 case XML_NS_ERR_UNDEFINED_NAMESPACE:
1657                 case XML_NS_ERR_QNAME:
1658                 case XML_NS_ERR_ATTRIBUTE_REDEFINED:
1659                 case XML_NS_ERR_EMPTY:
1660                         domain = XML_FROM_NAMESPACE;
1661                         break;
1662         }
1663
1664         /* Decide whether to act on the error or not */
1665         switch (domain)
1666         {
1667                 case XML_FROM_PARSER:
1668                 case XML_FROM_NONE:
1669                 case XML_FROM_MEMORY:
1670                 case XML_FROM_IO:
1671
1672                         /*
1673                          * Suppress warnings about undeclared entities.  We need to do
1674                          * this to avoid problems due to not loading DTD definitions.
1675                          */
1676                         if (error->code == XML_WAR_UNDECLARED_ENTITY)
1677                                 return;
1678
1679                         /* Otherwise, accept error regardless of the parsing purpose */
1680                         break;
1681
1682                 default:
1683                         /* Ignore error if only doing well-formedness check */
1684                         if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
1685                                 return;
1686                         break;
1687         }
1688
1689         /* Prepare error message in errorBuf */
1690         errorBuf = makeStringInfo();
1691
1692         if (error->line > 0)
1693                 appendStringInfo(errorBuf, "line %d: ", error->line);
1694         if (name != NULL)
1695                 appendStringInfo(errorBuf, "element %s: ", name);
1696         appendStringInfoString(errorBuf, error->message);
1697
1698         /*
1699          * Append context information to errorBuf.
1700          *
1701          * xmlParserPrintFileContext() uses libxml's "generic" error handler to
1702          * write the context.  Since we don't want to duplicate libxml
1703          * functionality here, we set up a generic error handler temporarily.
1704          *
1705          * We use appendStringInfo() directly as libxml's generic error handler.
1706          * This should work because it has essentially the same signature as
1707          * libxml expects, namely (void *ptr, const char *msg, ...).
1708          */
1709         if (input != NULL)
1710         {
1711                 xmlGenericErrorFunc errFuncSaved = xmlGenericError;
1712                 void       *errCtxSaved = xmlGenericErrorContext;
1713
1714                 xmlSetGenericErrorFunc((void *) errorBuf,
1715                                                            (xmlGenericErrorFunc) appendStringInfo);
1716
1717                 /* Add context information to errorBuf */
1718                 appendStringInfoLineSeparator(errorBuf);
1719
1720                 xmlParserPrintFileContext(input);
1721
1722                 /* Restore generic error func */
1723                 xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
1724         }
1725
1726         /* Get rid of any trailing newlines in errorBuf */
1727         chopStringInfoNewlines(errorBuf);
1728
1729         /*
1730          * Legacy error handling mode.  err_occurred is never set, we just add the
1731          * message to err_buf.  This mode exists because the xml2 contrib module
1732          * uses our error-handling infrastructure, but we don't want to change its
1733          * behaviour since it's deprecated anyway.  This is also why we don't
1734          * distinguish between notices, warnings and errors here --- the old-style
1735          * generic error handler wouldn't have done that either.
1736          */
1737         if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
1738         {
1739                 appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1740                 appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
1741
1742                 pfree(errorBuf->data);
1743                 pfree(errorBuf);
1744                 return;
1745         }
1746
1747         /*
1748          * We don't want to ereport() here because that'd probably leave libxml in
1749          * an inconsistent state.  Instead, we remember the error and ereport()
1750          * from xml_ereport().
1751          *
1752          * Warnings and notices can be reported immediately since they won't cause
1753          * a longjmp() out of libxml.
1754          */
1755         if (level >= XML_ERR_ERROR)
1756         {
1757                 appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1758                 appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
1759
1760                 xmlerrcxt->err_occurred = true;
1761         }
1762         else if (level >= XML_ERR_WARNING)
1763         {
1764                 ereport(WARNING,
1765                                 (errmsg_internal("%s", errorBuf->data)));
1766         }
1767         else
1768         {
1769                 ereport(NOTICE,
1770                                 (errmsg_internal("%s", errorBuf->data)));
1771         }
1772
1773         pfree(errorBuf->data);
1774         pfree(errorBuf);
1775 }
1776
1777
1778 /*
1779  * Wrapper for "ereport" function for XML-related errors.  The "msg"
1780  * is the SQL-level message; some can be adopted from the SQL/XML
1781  * standard.  This function uses "code" to create a textual detail
1782  * message.  At the moment, we only need to cover those codes that we
1783  * may raise in this file.
1784  */
1785 static void
1786 xml_ereport_by_code(int level, int sqlcode,
1787                                         const char *msg, int code)
1788 {
1789         const char *det;
1790
1791         switch (code)
1792         {
1793                 case XML_ERR_INVALID_CHAR:
1794                         det = gettext_noop("Invalid character value.");
1795                         break;
1796                 case XML_ERR_SPACE_REQUIRED:
1797                         det = gettext_noop("Space required.");
1798                         break;
1799                 case XML_ERR_STANDALONE_VALUE:
1800                         det = gettext_noop("standalone accepts only 'yes' or 'no'.");
1801                         break;
1802                 case XML_ERR_VERSION_MISSING:
1803                         det = gettext_noop("Malformed declaration: missing version.");
1804                         break;
1805                 case XML_ERR_MISSING_ENCODING:
1806                         det = gettext_noop("Missing encoding in text declaration.");
1807                         break;
1808                 case XML_ERR_XMLDECL_NOT_FINISHED:
1809                         det = gettext_noop("Parsing XML declaration: '?>' expected.");
1810                         break;
1811                 default:
1812                         det = gettext_noop("Unrecognized libxml error code: %d.");
1813                         break;
1814         }
1815
1816         ereport(level,
1817                         (errcode(sqlcode),
1818                          errmsg_internal("%s", msg),
1819                          errdetail(det, code)));
1820 }
1821
1822
1823 /*
1824  * Remove all trailing newlines from a StringInfo string
1825  */
1826 static void
1827 chopStringInfoNewlines(StringInfo str)
1828 {
1829         while (str->len > 0 && str->data[str->len - 1] == '\n')
1830                 str->data[--str->len] = '\0';
1831 }
1832
1833
1834 /*
1835  * Append a newline after removing any existing trailing newlines
1836  */
1837 static void
1838 appendStringInfoLineSeparator(StringInfo str)
1839 {
1840         chopStringInfoNewlines(str);
1841         if (str->len > 0)
1842                 appendStringInfoChar(str, '\n');
1843 }
1844
1845
1846 /*
1847  * Convert one char in the current server encoding to a Unicode codepoint.
1848  */
1849 static pg_wchar
1850 sqlchar_to_unicode(char *s)
1851 {
1852         char       *utf8string;
1853         pg_wchar        ret[2];                 /* need space for trailing zero */
1854
1855         /* note we're not assuming s is null-terminated */
1856         utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
1857
1858         pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
1859                                                                   pg_encoding_mblen(PG_UTF8, utf8string));
1860
1861         if (utf8string != s)
1862                 pfree(utf8string);
1863
1864         return ret[0];
1865 }
1866
1867
1868 static bool
1869 is_valid_xml_namefirst(pg_wchar c)
1870 {
1871         /* (Letter | '_' | ':') */
1872         return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1873                         || c == '_' || c == ':');
1874 }
1875
1876
1877 static bool
1878 is_valid_xml_namechar(pg_wchar c)
1879 {
1880         /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1881         return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1882                         || xmlIsDigitQ(c)
1883                         || c == '.' || c == '-' || c == '_' || c == ':'
1884                         || xmlIsCombiningQ(c)
1885                         || xmlIsExtenderQ(c));
1886 }
1887 #endif   /* USE_LIBXML */
1888
1889
1890 /*
1891  * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
1892  */
1893 char *
1894 map_sql_identifier_to_xml_name(char *ident, bool fully_escaped,
1895                                                            bool escape_period)
1896 {
1897 #ifdef USE_LIBXML
1898         StringInfoData buf;
1899         char       *p;
1900
1901         /*
1902          * SQL/XML doesn't make use of this case anywhere, so it's probably a
1903          * mistake.
1904          */
1905         Assert(fully_escaped || !escape_period);
1906
1907         initStringInfo(&buf);
1908
1909         for (p = ident; *p; p += pg_mblen(p))
1910         {
1911                 if (*p == ':' && (p == ident || fully_escaped))
1912                         appendStringInfoString(&buf, "_x003A_");
1913                 else if (*p == '_' && *(p + 1) == 'x')
1914                         appendStringInfoString(&buf, "_x005F_");
1915                 else if (fully_escaped && p == ident &&
1916                                  pg_strncasecmp(p, "xml", 3) == 0)
1917                 {
1918                         if (*p == 'x')
1919                                 appendStringInfoString(&buf, "_x0078_");
1920                         else
1921                                 appendStringInfoString(&buf, "_x0058_");
1922                 }
1923                 else if (escape_period && *p == '.')
1924                         appendStringInfoString(&buf, "_x002E_");
1925                 else
1926                 {
1927                         pg_wchar        u = sqlchar_to_unicode(p);
1928
1929                         if ((p == ident)
1930                                 ? !is_valid_xml_namefirst(u)
1931                                 : !is_valid_xml_namechar(u))
1932                                 appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
1933                         else
1934                                 appendBinaryStringInfo(&buf, p, pg_mblen(p));
1935                 }
1936         }
1937
1938         return buf.data;
1939 #else                                                   /* not USE_LIBXML */
1940         NO_XML_SUPPORT();
1941         return NULL;
1942 #endif   /* not USE_LIBXML */
1943 }
1944
1945
1946 /*
1947  * Map a Unicode codepoint into the current server encoding.
1948  */
1949 static char *
1950 unicode_to_sqlchar(pg_wchar c)
1951 {
1952         char            utf8string[8];  /* need room for trailing zero */
1953         char       *result;
1954
1955         memset(utf8string, 0, sizeof(utf8string));
1956         unicode_to_utf8(c, (unsigned char *) utf8string);
1957
1958         result = pg_any_to_server(utf8string, strlen(utf8string), PG_UTF8);
1959         /* if pg_any_to_server didn't strdup, we must */
1960         if (result == utf8string)
1961                 result = pstrdup(result);
1962         return result;
1963 }
1964
1965
1966 /*
1967  * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
1968  */
1969 char *
1970 map_xml_name_to_sql_identifier(char *name)
1971 {
1972         StringInfoData buf;
1973         char       *p;
1974
1975         initStringInfo(&buf);
1976
1977         for (p = name; *p; p += pg_mblen(p))
1978         {
1979                 if (*p == '_' && *(p + 1) == 'x'
1980                         && isxdigit((unsigned char) *(p + 2))
1981                         && isxdigit((unsigned char) *(p + 3))
1982                         && isxdigit((unsigned char) *(p + 4))
1983                         && isxdigit((unsigned char) *(p + 5))
1984                         && *(p + 6) == '_')
1985                 {
1986                         unsigned int u;
1987
1988                         sscanf(p + 2, "%X", &u);
1989                         appendStringInfoString(&buf, unicode_to_sqlchar(u));
1990                         p += 6;
1991                 }
1992                 else
1993                         appendBinaryStringInfo(&buf, p, pg_mblen(p));
1994         }
1995
1996         return buf.data;
1997 }
1998
1999 /*
2000  * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2001  *
2002  * When xml_escape_strings is true, then certain characters in string
2003  * values are replaced by entity references (&lt; etc.), as specified
2004  * in SQL/XML:2008 section 9.8 GR 9) a) iii).   This is normally what is
2005  * wanted.  The false case is mainly useful when the resulting value
2006  * is used with xmlTextWriterWriteAttribute() to write out an
2007  * attribute, because that function does the escaping itself.
2008  */
2009 char *
2010 map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2011 {
2012         if (type_is_array_domain(type))
2013         {
2014                 ArrayType  *array;
2015                 Oid                     elmtype;
2016                 int16           elmlen;
2017                 bool            elmbyval;
2018                 char            elmalign;
2019                 int                     num_elems;
2020                 Datum      *elem_values;
2021                 bool       *elem_nulls;
2022                 StringInfoData buf;
2023                 int                     i;
2024
2025                 array = DatumGetArrayTypeP(value);
2026                 elmtype = ARR_ELEMTYPE(array);
2027                 get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
2028
2029                 deconstruct_array(array, elmtype,
2030                                                   elmlen, elmbyval, elmalign,
2031                                                   &elem_values, &elem_nulls,
2032                                                   &num_elems);
2033
2034                 initStringInfo(&buf);
2035
2036                 for (i = 0; i < num_elems; i++)
2037                 {
2038                         if (elem_nulls[i])
2039                                 continue;
2040                         appendStringInfoString(&buf, "<element>");
2041                         appendStringInfoString(&buf,
2042                                                                    map_sql_value_to_xml_value(elem_values[i],
2043                                                                                                                           elmtype, true));
2044                         appendStringInfoString(&buf, "</element>");
2045                 }
2046
2047                 pfree(elem_values);
2048                 pfree(elem_nulls);
2049
2050                 return buf.data;
2051         }
2052         else
2053         {
2054                 Oid                     typeOut;
2055                 bool            isvarlena;
2056                 char       *str;
2057
2058                 /*
2059                  * Flatten domains; the special-case treatments below should apply to,
2060                  * eg, domains over boolean not just boolean.
2061                  */
2062                 type = getBaseType(type);
2063
2064                 /*
2065                  * Special XSD formatting for some data types
2066                  */
2067                 switch (type)
2068                 {
2069                         case BOOLOID:
2070                                 if (DatumGetBool(value))
2071                                         return "true";
2072                                 else
2073                                         return "false";
2074
2075                         case DATEOID:
2076                                 {
2077                                         DateADT         date;
2078                                         struct pg_tm tm;
2079                                         char            buf[MAXDATELEN + 1];
2080
2081                                         date = DatumGetDateADT(value);
2082                                         /* XSD doesn't support infinite values */
2083                                         if (DATE_NOT_FINITE(date))
2084                                                 ereport(ERROR,
2085                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2086                                                                  errmsg("date out of range"),
2087                                                                  errdetail("XML does not support infinite date values.")));
2088                                         j2date(date + POSTGRES_EPOCH_JDATE,
2089                                                    &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2090                                         EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2091
2092                                         return pstrdup(buf);
2093                                 }
2094
2095                         case TIMESTAMPOID:
2096                                 {
2097                                         Timestamp       timestamp;
2098                                         struct pg_tm tm;
2099                                         fsec_t          fsec;
2100                                         char            buf[MAXDATELEN + 1];
2101
2102                                         timestamp = DatumGetTimestamp(value);
2103
2104                                         /* XSD doesn't support infinite values */
2105                                         if (TIMESTAMP_NOT_FINITE(timestamp))
2106                                                 ereport(ERROR,
2107                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2108                                                                  errmsg("timestamp out of range"),
2109                                                                  errdetail("XML does not support infinite timestamp values.")));
2110                                         else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2111                                                 EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2112                                         else
2113                                                 ereport(ERROR,
2114                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2115                                                                  errmsg("timestamp out of range")));
2116
2117                                         return pstrdup(buf);
2118                                 }
2119
2120                         case TIMESTAMPTZOID:
2121                                 {
2122                                         TimestampTz timestamp;
2123                                         struct pg_tm tm;
2124                                         int                     tz;
2125                                         fsec_t          fsec;
2126                                         const char *tzn = NULL;
2127                                         char            buf[MAXDATELEN + 1];
2128
2129                                         timestamp = DatumGetTimestamp(value);
2130
2131                                         /* XSD doesn't support infinite values */
2132                                         if (TIMESTAMP_NOT_FINITE(timestamp))
2133                                                 ereport(ERROR,
2134                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2135                                                                  errmsg("timestamp out of range"),
2136                                                                  errdetail("XML does not support infinite timestamp values.")));
2137                                         else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2138                                                 EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2139                                         else
2140                                                 ereport(ERROR,
2141                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2142                                                                  errmsg("timestamp out of range")));
2143
2144                                         return pstrdup(buf);
2145                                 }
2146
2147 #ifdef USE_LIBXML
2148                         case BYTEAOID:
2149                                 {
2150                                         bytea      *bstr = DatumGetByteaPP(value);
2151                                         PgXmlErrorContext *xmlerrcxt;
2152                                         volatile xmlBufferPtr buf = NULL;
2153                                         volatile xmlTextWriterPtr writer = NULL;
2154                                         char       *result;
2155
2156                                         xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2157
2158                                         PG_TRY();
2159                                         {
2160                                                 buf = xmlBufferCreate();
2161                                                 if (buf == NULL || xmlerrcxt->err_occurred)
2162                                                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2163                                                                                 "could not allocate xmlBuffer");
2164                                                 writer = xmlNewTextWriterMemory(buf, 0);
2165                                                 if (writer == NULL || xmlerrcxt->err_occurred)
2166                                                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2167                                                                                 "could not allocate xmlTextWriter");
2168
2169                                                 if (xmlbinary == XMLBINARY_BASE64)
2170                                                         xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2171                                                                                                  0, VARSIZE_ANY_EXHDR(bstr));
2172                                                 else
2173                                                         xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2174                                                                                                  0, VARSIZE_ANY_EXHDR(bstr));
2175
2176                                                 /* we MUST do this now to flush data out to the buffer */
2177                                                 xmlFreeTextWriter(writer);
2178                                                 writer = NULL;
2179
2180                                                 result = pstrdup((const char *) xmlBufferContent(buf));
2181                                         }
2182                                         PG_CATCH();
2183                                         {
2184                                                 if (writer)
2185                                                         xmlFreeTextWriter(writer);
2186                                                 if (buf)
2187                                                         xmlBufferFree(buf);
2188
2189                                                 pg_xml_done(xmlerrcxt, true);
2190
2191                                                 PG_RE_THROW();
2192                                         }
2193                                         PG_END_TRY();
2194
2195                                         xmlBufferFree(buf);
2196
2197                                         pg_xml_done(xmlerrcxt, false);
2198
2199                                         return result;
2200                                 }
2201 #endif   /* USE_LIBXML */
2202
2203                 }
2204
2205                 /*
2206                  * otherwise, just use the type's native text representation
2207                  */
2208                 getTypeOutputInfo(type, &typeOut, &isvarlena);
2209                 str = OidOutputFunctionCall(typeOut, value);
2210
2211                 /* ... exactly as-is for XML, and when escaping is not wanted */
2212                 if (type == XMLOID || !xml_escape_strings)
2213                         return str;
2214
2215                 /* otherwise, translate special characters as needed */
2216                 return escape_xml(str);
2217         }
2218 }
2219
2220
2221 /*
2222  * Escape characters in text that have special meanings in XML.
2223  *
2224  * Returns a palloc'd string.
2225  *
2226  * NB: this is intentionally not dependent on libxml.
2227  */
2228 char *
2229 escape_xml(const char *str)
2230 {
2231         StringInfoData buf;
2232         const char *p;
2233
2234         initStringInfo(&buf);
2235         for (p = str; *p; p++)
2236         {
2237                 switch (*p)
2238                 {
2239                         case '&':
2240                                 appendStringInfoString(&buf, "&amp;");
2241                                 break;
2242                         case '<':
2243                                 appendStringInfoString(&buf, "&lt;");
2244                                 break;
2245                         case '>':
2246                                 appendStringInfoString(&buf, "&gt;");
2247                                 break;
2248                         case '\r':
2249                                 appendStringInfoString(&buf, "&#x0d;");
2250                                 break;
2251                         default:
2252                                 appendStringInfoCharMacro(&buf, *p);
2253                                 break;
2254                 }
2255         }
2256         return buf.data;
2257 }
2258
2259
2260 static char *
2261 _SPI_strdup(const char *s)
2262 {
2263         size_t          len = strlen(s) + 1;
2264         char       *ret = SPI_palloc(len);
2265
2266         memcpy(ret, s, len);
2267         return ret;
2268 }
2269
2270
2271 /*
2272  * SQL to XML mapping functions
2273  *
2274  * What follows below was at one point intentionally organized so that
2275  * you can read along in the SQL/XML standard. The functions are
2276  * mostly split up the way the clauses lay out in the standards
2277  * document, and the identifiers are also aligned with the standard
2278  * text.  Unfortunately, SQL/XML:2006 reordered the clauses
2279  * differently than SQL/XML:2003, so the order below doesn't make much
2280  * sense anymore.
2281  *
2282  * There are many things going on there:
2283  *
2284  * There are two kinds of mappings: Mapping SQL data (table contents)
2285  * to XML documents, and mapping SQL structure (the "schema") to XML
2286  * Schema.  And there are functions that do both at the same time.
2287  *
2288  * Then you can map a database, a schema, or a table, each in both
2289  * ways.  This breaks down recursively: Mapping a database invokes
2290  * mapping schemas, which invokes mapping tables, which invokes
2291  * mapping rows, which invokes mapping columns, although you can't
2292  * call the last two from the outside.  Because of this, there are a
2293  * number of xyz_internal() functions which are to be called both from
2294  * the function manager wrapper and from some upper layer in a
2295  * recursive call.
2296  *
2297  * See the documentation about what the common function arguments
2298  * nulls, tableforest, and targetns mean.
2299  *
2300  * Some style guidelines for XML output: Use double quotes for quoting
2301  * XML attributes.  Indent XML elements by two spaces, but remember
2302  * that a lot of code is called recursively at different levels, so
2303  * it's better not to indent rather than create output that indents
2304  * and outdents weirdly.  Add newlines to make the output look nice.
2305  */
2306
2307
2308 /*
2309  * Visibility of objects for XML mappings; see SQL/XML:2008 section
2310  * 4.10.8.
2311  */
2312
2313 /*
2314  * Given a query, which must return type oid as first column, produce
2315  * a list of Oids with the query results.
2316  */
2317 static List *
2318 query_to_oid_list(const char *query)
2319 {
2320         uint64          i;
2321         List       *list = NIL;
2322
2323         SPI_execute(query, true, 0);
2324
2325         for (i = 0; i < SPI_processed; i++)
2326         {
2327                 Datum           oid;
2328                 bool            isnull;
2329
2330                 oid = SPI_getbinval(SPI_tuptable->vals[i],
2331                                                         SPI_tuptable->tupdesc,
2332                                                         1,
2333                                                         &isnull);
2334                 if (!isnull)
2335                         list = lappend_oid(list, DatumGetObjectId(oid));
2336         }
2337
2338         return list;
2339 }
2340
2341
2342 static List *
2343 schema_get_xml_visible_tables(Oid nspid)
2344 {
2345         StringInfoData query;
2346
2347         initStringInfo(&query);
2348         appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class"
2349                                          " WHERE relnamespace = %u AND relkind IN ("
2350                                          CppAsString2(RELKIND_RELATION) ","
2351                                          CppAsString2(RELKIND_MATVIEW) ","
2352                                          CppAsString2(RELKIND_VIEW) ")"
2353                                          " AND pg_catalog.has_table_privilege (oid, 'SELECT')"
2354                                          " ORDER BY relname;", nspid);
2355
2356         return query_to_oid_list(query.data);
2357 }
2358
2359
2360 /*
2361  * Including the system schemas is probably not useful for a database
2362  * mapping.
2363  */
2364 #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2365
2366 #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2367
2368
2369 static List *
2370 database_get_xml_visible_schemas(void)
2371 {
2372         return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2373 }
2374
2375
2376 static List *
2377 database_get_xml_visible_tables(void)
2378 {
2379         /* At the moment there is no order required here. */
2380         return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class"
2381                                                          " WHERE relkind IN ("
2382                                                          CppAsString2(RELKIND_RELATION) ","
2383                                                          CppAsString2(RELKIND_MATVIEW) ","
2384                                                          CppAsString2(RELKIND_VIEW) ")"
2385                                                          " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')"
2386                                                          " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2387 }
2388
2389
2390 /*
2391  * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2392  * section 9.11.
2393  */
2394
2395 static StringInfo
2396 table_to_xml_internal(Oid relid,
2397                                           const char *xmlschema, bool nulls, bool tableforest,
2398                                           const char *targetns, bool top_level)
2399 {
2400         StringInfoData query;
2401
2402         initStringInfo(&query);
2403         appendStringInfo(&query, "SELECT * FROM %s",
2404                                          DatumGetCString(DirectFunctionCall1(regclassout,
2405                                                                                                   ObjectIdGetDatum(relid))));
2406         return query_to_xml_internal(query.data, get_rel_name(relid),
2407                                                                  xmlschema, nulls, tableforest,
2408                                                                  targetns, top_level);
2409 }
2410
2411
2412 Datum
2413 table_to_xml(PG_FUNCTION_ARGS)
2414 {
2415         Oid                     relid = PG_GETARG_OID(0);
2416         bool            nulls = PG_GETARG_BOOL(1);
2417         bool            tableforest = PG_GETARG_BOOL(2);
2418         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2419
2420         PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2421                                                                                                                   nulls, tableforest,
2422                                                                                                                    targetns, true)));
2423 }
2424
2425
2426 Datum
2427 query_to_xml(PG_FUNCTION_ARGS)
2428 {
2429         char       *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2430         bool            nulls = PG_GETARG_BOOL(1);
2431         bool            tableforest = PG_GETARG_BOOL(2);
2432         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2433
2434         PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2435                                                                                                         NULL, nulls, tableforest,
2436                                                                                                                    targetns, true)));
2437 }
2438
2439
2440 Datum
2441 cursor_to_xml(PG_FUNCTION_ARGS)
2442 {
2443         char       *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2444         int32           count = PG_GETARG_INT32(1);
2445         bool            nulls = PG_GETARG_BOOL(2);
2446         bool            tableforest = PG_GETARG_BOOL(3);
2447         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2448
2449         StringInfoData result;
2450         Portal          portal;
2451         uint64          i;
2452
2453         initStringInfo(&result);
2454
2455         SPI_connect();
2456         portal = SPI_cursor_find(name);
2457         if (portal == NULL)
2458                 ereport(ERROR,
2459                                 (errcode(ERRCODE_UNDEFINED_CURSOR),
2460                                  errmsg("cursor \"%s\" does not exist", name)));
2461
2462         SPI_cursor_fetch(portal, true, count);
2463         for (i = 0; i < SPI_processed; i++)
2464                 SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2465                                                                   tableforest, targetns, true);
2466
2467         SPI_finish();
2468
2469         PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2470 }
2471
2472
2473 /*
2474  * Write the start tag of the root element of a data mapping.
2475  *
2476  * top_level means that this is the very top level of the eventual
2477  * output.  For example, when the user calls table_to_xml, then a call
2478  * with a table name to this function is the top level.  When the user
2479  * calls database_to_xml, then a call with a schema name to this
2480  * function is not the top level.  If top_level is false, then the XML
2481  * namespace declarations are omitted, because they supposedly already
2482  * appeared earlier in the output.  Repeating them is not wrong, but
2483  * it looks ugly.
2484  */
2485 static void
2486 xmldata_root_element_start(StringInfo result, const char *eltname,
2487                                                    const char *xmlschema, const char *targetns,
2488                                                    bool top_level)
2489 {
2490         /* This isn't really wrong but currently makes no sense. */
2491         Assert(top_level || !xmlschema);
2492
2493         appendStringInfo(result, "<%s", eltname);
2494         if (top_level)
2495         {
2496                 appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
2497                 if (strlen(targetns) > 0)
2498                         appendStringInfo(result, " xmlns=\"%s\"", targetns);
2499         }
2500         if (xmlschema)
2501         {
2502                 /* FIXME: better targets */
2503                 if (strlen(targetns) > 0)
2504                         appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
2505                 else
2506                         appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
2507         }
2508         appendStringInfoString(result, ">\n");
2509 }
2510
2511
2512 static void
2513 xmldata_root_element_end(StringInfo result, const char *eltname)
2514 {
2515         appendStringInfo(result, "</%s>\n", eltname);
2516 }
2517
2518
2519 static StringInfo
2520 query_to_xml_internal(const char *query, char *tablename,
2521                                           const char *xmlschema, bool nulls, bool tableforest,
2522                                           const char *targetns, bool top_level)
2523 {
2524         StringInfo      result;
2525         char       *xmltn;
2526         uint64          i;
2527
2528         if (tablename)
2529                 xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
2530         else
2531                 xmltn = "table";
2532
2533         result = makeStringInfo();
2534
2535         SPI_connect();
2536         if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
2537                 ereport(ERROR,
2538                                 (errcode(ERRCODE_DATA_EXCEPTION),
2539                                  errmsg("invalid query")));
2540
2541         if (!tableforest)
2542         {
2543                 xmldata_root_element_start(result, xmltn, xmlschema,
2544                                                                    targetns, top_level);
2545                 appendStringInfoChar(result, '\n');
2546         }
2547
2548         if (xmlschema)
2549                 appendStringInfo(result, "%s\n\n", xmlschema);
2550
2551         for (i = 0; i < SPI_processed; i++)
2552                 SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
2553                                                                   tableforest, targetns, top_level);
2554
2555         if (!tableforest)
2556                 xmldata_root_element_end(result, xmltn);
2557
2558         SPI_finish();
2559
2560         return result;
2561 }
2562
2563
2564 Datum
2565 table_to_xmlschema(PG_FUNCTION_ARGS)
2566 {
2567         Oid                     relid = PG_GETARG_OID(0);
2568         bool            nulls = PG_GETARG_BOOL(1);
2569         bool            tableforest = PG_GETARG_BOOL(2);
2570         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2571         const char *result;
2572         Relation        rel;
2573
2574         rel = heap_open(relid, AccessShareLock);
2575         result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2576                                                                                 tableforest, targetns);
2577         heap_close(rel, NoLock);
2578
2579         PG_RETURN_XML_P(cstring_to_xmltype(result));
2580 }
2581
2582
2583 Datum
2584 query_to_xmlschema(PG_FUNCTION_ARGS)
2585 {
2586         char       *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2587         bool            nulls = PG_GETARG_BOOL(1);
2588         bool            tableforest = PG_GETARG_BOOL(2);
2589         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2590         const char *result;
2591         SPIPlanPtr      plan;
2592         Portal          portal;
2593
2594         SPI_connect();
2595
2596         if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2597                 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2598
2599         if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2600                 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2601
2602         result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2603                                                                                                         InvalidOid, nulls,
2604                                                                                                         tableforest, targetns));
2605         SPI_cursor_close(portal);
2606         SPI_finish();
2607
2608         PG_RETURN_XML_P(cstring_to_xmltype(result));
2609 }
2610
2611
2612 Datum
2613 cursor_to_xmlschema(PG_FUNCTION_ARGS)
2614 {
2615         char       *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2616         bool            nulls = PG_GETARG_BOOL(1);
2617         bool            tableforest = PG_GETARG_BOOL(2);
2618         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2619         const char *xmlschema;
2620         Portal          portal;
2621
2622         SPI_connect();
2623         portal = SPI_cursor_find(name);
2624         if (portal == NULL)
2625                 ereport(ERROR,
2626                                 (errcode(ERRCODE_UNDEFINED_CURSOR),
2627                                  errmsg("cursor \"%s\" does not exist", name)));
2628
2629         xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2630                                                                                                            InvalidOid, nulls,
2631                                                                                                          tableforest, targetns));
2632         SPI_finish();
2633
2634         PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
2635 }
2636
2637
2638 Datum
2639 table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2640 {
2641         Oid                     relid = PG_GETARG_OID(0);
2642         bool            nulls = PG_GETARG_BOOL(1);
2643         bool            tableforest = PG_GETARG_BOOL(2);
2644         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2645         Relation        rel;
2646         const char *xmlschema;
2647
2648         rel = heap_open(relid, AccessShareLock);
2649         xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2650                                                                                    tableforest, targetns);
2651         heap_close(rel, NoLock);
2652
2653         PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
2654                                                                                            xmlschema, nulls, tableforest,
2655                                                                                                                    targetns, true)));
2656 }
2657
2658
2659 Datum
2660 query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2661 {
2662         char       *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2663         bool            nulls = PG_GETARG_BOOL(1);
2664         bool            tableforest = PG_GETARG_BOOL(2);
2665         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2666
2667         const char *xmlschema;
2668         SPIPlanPtr      plan;
2669         Portal          portal;
2670
2671         SPI_connect();
2672
2673         if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2674                 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2675
2676         if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2677                 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2678
2679         xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2680                                                                   InvalidOid, nulls, tableforest, targetns));
2681         SPI_cursor_close(portal);
2682         SPI_finish();
2683
2684         PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2685                                                                                            xmlschema, nulls, tableforest,
2686                                                                                                                    targetns, true)));
2687 }
2688
2689
2690 /*
2691  * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
2692  * sections 9.13, 9.14.
2693  */
2694
2695 static StringInfo
2696 schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
2697                                            bool tableforest, const char *targetns, bool top_level)
2698 {
2699         StringInfo      result;
2700         char       *xmlsn;
2701         List       *relid_list;
2702         ListCell   *cell;
2703
2704         xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
2705                                                                                    true, false);
2706         result = makeStringInfo();
2707
2708         xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
2709         appendStringInfoChar(result, '\n');
2710
2711         if (xmlschema)
2712                 appendStringInfo(result, "%s\n\n", xmlschema);
2713
2714         SPI_connect();
2715
2716         relid_list = schema_get_xml_visible_tables(nspid);
2717
2718         foreach(cell, relid_list)
2719         {
2720                 Oid                     relid = lfirst_oid(cell);
2721                 StringInfo      subres;
2722
2723                 subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
2724                                                                            targetns, false);
2725
2726                 appendStringInfoString(result, subres->data);
2727                 appendStringInfoChar(result, '\n');
2728         }
2729
2730         SPI_finish();
2731
2732         xmldata_root_element_end(result, xmlsn);
2733
2734         return result;
2735 }
2736
2737
2738 Datum
2739 schema_to_xml(PG_FUNCTION_ARGS)
2740 {
2741         Name            name = PG_GETARG_NAME(0);
2742         bool            nulls = PG_GETARG_BOOL(1);
2743         bool            tableforest = PG_GETARG_BOOL(2);
2744         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2745
2746         char       *schemaname;
2747         Oid                     nspid;
2748
2749         schemaname = NameStr(*name);
2750         nspid = LookupExplicitNamespace(schemaname, false);
2751
2752         PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
2753                                                                            nulls, tableforest, targetns, true)));
2754 }
2755
2756
2757 /*
2758  * Write the start element of the root element of an XML Schema mapping.
2759  */
2760 static void
2761 xsd_schema_element_start(StringInfo result, const char *targetns)
2762 {
2763         appendStringInfoString(result,
2764                                                    "<xsd:schema\n"
2765                                                    "    xmlns:xsd=\"" NAMESPACE_XSD "\"");
2766         if (strlen(targetns) > 0)
2767                 appendStringInfo(result,
2768                                                  "\n"
2769                                                  "    targetNamespace=\"%s\"\n"
2770                                                  "    elementFormDefault=\"qualified\"",
2771                                                  targetns);
2772         appendStringInfoString(result,
2773                                                    ">\n\n");
2774 }
2775
2776
2777 static void
2778 xsd_schema_element_end(StringInfo result)
2779 {
2780         appendStringInfoString(result, "</xsd:schema>");
2781 }
2782
2783
2784 static StringInfo
2785 schema_to_xmlschema_internal(const char *schemaname, bool nulls,
2786                                                          bool tableforest, const char *targetns)
2787 {
2788         Oid                     nspid;
2789         List       *relid_list;
2790         List       *tupdesc_list;
2791         ListCell   *cell;
2792         StringInfo      result;
2793
2794         result = makeStringInfo();
2795
2796         nspid = LookupExplicitNamespace(schemaname, false);
2797
2798         xsd_schema_element_start(result, targetns);
2799
2800         SPI_connect();
2801
2802         relid_list = schema_get_xml_visible_tables(nspid);
2803
2804         tupdesc_list = NIL;
2805         foreach(cell, relid_list)
2806         {
2807                 Relation        rel;
2808
2809                 rel = heap_open(lfirst_oid(cell), AccessShareLock);
2810                 tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
2811                 heap_close(rel, NoLock);
2812         }
2813
2814         appendStringInfoString(result,
2815                                                    map_sql_typecoll_to_xmlschema_types(tupdesc_list));
2816
2817         appendStringInfoString(result,
2818                                                  map_sql_schema_to_xmlschema_types(nspid, relid_list,
2819                                                                                           nulls, tableforest, targetns));
2820
2821         xsd_schema_element_end(result);
2822
2823         SPI_finish();
2824
2825         return result;
2826 }
2827
2828
2829 Datum
2830 schema_to_xmlschema(PG_FUNCTION_ARGS)
2831 {
2832         Name            name = PG_GETARG_NAME(0);
2833         bool            nulls = PG_GETARG_BOOL(1);
2834         bool            tableforest = PG_GETARG_BOOL(2);
2835         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2836
2837         PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
2838                                                                                          nulls, tableforest, targetns)));
2839 }
2840
2841
2842 Datum
2843 schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2844 {
2845         Name            name = PG_GETARG_NAME(0);
2846         bool            nulls = PG_GETARG_BOOL(1);
2847         bool            tableforest = PG_GETARG_BOOL(2);
2848         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2849         char       *schemaname;
2850         Oid                     nspid;
2851         StringInfo      xmlschema;
2852
2853         schemaname = NameStr(*name);
2854         nspid = LookupExplicitNamespace(schemaname, false);
2855
2856         xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
2857                                                                                          tableforest, targetns);
2858
2859         PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
2860                                                                                                           xmlschema->data, nulls,
2861                                                                                           tableforest, targetns, true)));
2862 }
2863
2864
2865 /*
2866  * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
2867  * sections 9.16, 9.17.
2868  */
2869
2870 static StringInfo
2871 database_to_xml_internal(const char *xmlschema, bool nulls,
2872                                                  bool tableforest, const char *targetns)
2873 {
2874         StringInfo      result;
2875         List       *nspid_list;
2876         ListCell   *cell;
2877         char       *xmlcn;
2878
2879         xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
2880                                                                                    true, false);
2881         result = makeStringInfo();
2882
2883         xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
2884         appendStringInfoChar(result, '\n');
2885
2886         if (xmlschema)
2887                 appendStringInfo(result, "%s\n\n", xmlschema);
2888
2889         SPI_connect();
2890
2891         nspid_list = database_get_xml_visible_schemas();
2892
2893         foreach(cell, nspid_list)
2894         {
2895                 Oid                     nspid = lfirst_oid(cell);
2896                 StringInfo      subres;
2897
2898                 subres = schema_to_xml_internal(nspid, NULL, nulls,
2899                                                                                 tableforest, targetns, false);
2900
2901                 appendStringInfoString(result, subres->data);
2902                 appendStringInfoChar(result, '\n');
2903         }
2904
2905         SPI_finish();
2906
2907         xmldata_root_element_end(result, xmlcn);
2908
2909         return result;
2910 }
2911
2912
2913 Datum
2914 database_to_xml(PG_FUNCTION_ARGS)
2915 {
2916         bool            nulls = PG_GETARG_BOOL(0);
2917         bool            tableforest = PG_GETARG_BOOL(1);
2918         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
2919
2920         PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
2921                                                                                                         tableforest, targetns)));
2922 }
2923
2924
2925 static StringInfo
2926 database_to_xmlschema_internal(bool nulls, bool tableforest,
2927                                                            const char *targetns)
2928 {
2929         List       *relid_list;
2930         List       *nspid_list;
2931         List       *tupdesc_list;
2932         ListCell   *cell;
2933         StringInfo      result;
2934
2935         result = makeStringInfo();
2936
2937         xsd_schema_element_start(result, targetns);
2938
2939         SPI_connect();
2940
2941         relid_list = database_get_xml_visible_tables();
2942         nspid_list = database_get_xml_visible_schemas();
2943
2944         tupdesc_list = NIL;
2945         foreach(cell, relid_list)
2946         {
2947                 Relation        rel;
2948
2949                 rel = heap_open(lfirst_oid(cell), AccessShareLock);
2950                 tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
2951                 heap_close(rel, NoLock);
2952         }
2953
2954         appendStringInfoString(result,
2955                                                    map_sql_typecoll_to_xmlschema_types(tupdesc_list));
2956
2957         appendStringInfoString(result,
2958                                                    map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
2959
2960         xsd_schema_element_end(result);
2961
2962         SPI_finish();
2963
2964         return result;
2965 }
2966
2967
2968 Datum
2969 database_to_xmlschema(PG_FUNCTION_ARGS)
2970 {
2971         bool            nulls = PG_GETARG_BOOL(0);
2972         bool            tableforest = PG_GETARG_BOOL(1);
2973         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
2974
2975         PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
2976                                                                                                         tableforest, targetns)));
2977 }
2978
2979
2980 Datum
2981 database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2982 {
2983         bool            nulls = PG_GETARG_BOOL(0);
2984         bool            tableforest = PG_GETARG_BOOL(1);
2985         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
2986         StringInfo      xmlschema;
2987
2988         xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
2989
2990         PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
2991                                                                                          nulls, tableforest, targetns)));
2992 }
2993
2994
2995 /*
2996  * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
2997  * 9.2.
2998  */
2999 static char *
3000 map_multipart_sql_identifier_to_xml_name(char *a, char *b, char *c, char *d)
3001 {
3002         StringInfoData result;
3003
3004         initStringInfo(&result);
3005
3006         if (a)
3007                 appendStringInfoString(&result,
3008                                                            map_sql_identifier_to_xml_name(a, true, true));
3009         if (b)
3010                 appendStringInfo(&result, ".%s",
3011                                                  map_sql_identifier_to_xml_name(b, true, true));
3012         if (c)
3013                 appendStringInfo(&result, ".%s",
3014                                                  map_sql_identifier_to_xml_name(c, true, true));
3015         if (d)
3016                 appendStringInfo(&result, ".%s",
3017                                                  map_sql_identifier_to_xml_name(d, true, true));
3018
3019         return result.data;
3020 }
3021
3022
3023 /*
3024  * Map an SQL table to an XML Schema document; see SQL/XML:2008
3025  * section 9.11.
3026  *
3027  * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3028  * 9.9.
3029  */
3030 static const char *
3031 map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
3032                                                    bool tableforest, const char *targetns)
3033 {
3034         int                     i;
3035         char       *xmltn;
3036         char       *tabletypename;
3037         char       *rowtypename;
3038         StringInfoData result;
3039
3040         initStringInfo(&result);
3041
3042         if (OidIsValid(relid))
3043         {
3044                 HeapTuple       tuple;
3045                 Form_pg_class reltuple;
3046
3047                 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
3048                 if (!HeapTupleIsValid(tuple))
3049                         elog(ERROR, "cache lookup failed for relation %u", relid);
3050                 reltuple = (Form_pg_class) GETSTRUCT(tuple);
3051
3052                 xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
3053                                                                                            true, false);
3054
3055                 tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
3056                                                                                          get_database_name(MyDatabaseId),
3057                                                                   get_namespace_name(reltuple->relnamespace),
3058                                                                                                  NameStr(reltuple->relname));
3059
3060                 rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
3061                                                                                          get_database_name(MyDatabaseId),
3062                                                                   get_namespace_name(reltuple->relnamespace),
3063                                                                                                  NameStr(reltuple->relname));
3064
3065                 ReleaseSysCache(tuple);
3066         }
3067         else
3068         {
3069                 if (tableforest)
3070                         xmltn = "row";
3071                 else
3072                         xmltn = "table";
3073
3074                 tabletypename = "TableType";
3075                 rowtypename = "RowType";
3076         }
3077
3078         xsd_schema_element_start(&result, targetns);
3079
3080         appendStringInfoString(&result,
3081                                    map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3082
3083         appendStringInfo(&result,
3084                                          "<xsd:complexType name=\"%s\">\n"
3085                                          "  <xsd:sequence>\n",
3086                                          rowtypename);
3087
3088         for (i = 0; i < tupdesc->natts; i++)
3089         {
3090                 if (tupdesc->attrs[i]->attisdropped)
3091                         continue;
3092                 appendStringInfo(&result,
3093                            "    <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3094                   map_sql_identifier_to_xml_name(NameStr(tupdesc->attrs[i]->attname),
3095                                                                                  true, false),
3096                                    map_sql_type_to_xml_name(tupdesc->attrs[i]->atttypid, -1),
3097                                                  nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3098         }
3099
3100         appendStringInfoString(&result,
3101                                                    "  </xsd:sequence>\n"
3102                                                    "</xsd:complexType>\n\n");
3103
3104         if (!tableforest)
3105         {
3106                 appendStringInfo(&result,
3107                                                  "<xsd:complexType name=\"%s\">\n"
3108                                                  "  <xsd:sequence>\n"
3109                                                  "    <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3110                                                  "  </xsd:sequence>\n"
3111                                                  "</xsd:complexType>\n\n",
3112                                                  tabletypename, rowtypename);
3113
3114                 appendStringInfo(&result,
3115                                                  "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3116                                                  xmltn, tabletypename);
3117         }
3118         else
3119                 appendStringInfo(&result,
3120                                                  "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3121                                                  xmltn, rowtypename);
3122
3123         xsd_schema_element_end(&result);
3124
3125         return result.data;
3126 }
3127
3128
3129 /*
3130  * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3131  * section 9.12.
3132  */
3133 static const char *
3134 map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3135                                                                   bool tableforest, const char *targetns)
3136 {
3137         char       *dbname;
3138         char       *nspname;
3139         char       *xmlsn;
3140         char       *schematypename;
3141         StringInfoData result;
3142         ListCell   *cell;
3143
3144         dbname = get_database_name(MyDatabaseId);
3145         nspname = get_namespace_name(nspid);
3146
3147         initStringInfo(&result);
3148
3149         xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3150
3151         schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3152                                                                                                                           dbname,
3153                                                                                                                           nspname,
3154                                                                                                                           NULL);
3155
3156         appendStringInfo(&result,
3157                                          "<xsd:complexType name=\"%s\">\n", schematypename);
3158         if (!tableforest)
3159                 appendStringInfoString(&result,
3160                                                            "  <xsd:all>\n");
3161         else
3162                 appendStringInfoString(&result,
3163                                                            "  <xsd:sequence>\n");
3164
3165         foreach(cell, relid_list)
3166         {
3167                 Oid                     relid = lfirst_oid(cell);
3168                 char       *relname = get_rel_name(relid);
3169                 char       *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3170                 char       *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3171                                                                                                                                           dbname,
3172                                                                                                                                          nspname,
3173                                                                                                                                         relname);
3174
3175                 if (!tableforest)
3176                         appendStringInfo(&result,
3177                                                          "    <xsd:element name=\"%s\" type=\"%s\"/>\n",
3178                                                          xmltn, tabletypename);
3179                 else
3180                         appendStringInfo(&result,
3181                                                          "    <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3182                                                          xmltn, tabletypename);
3183         }
3184
3185         if (!tableforest)
3186                 appendStringInfoString(&result,
3187                                                            "  </xsd:all>\n");
3188         else
3189                 appendStringInfoString(&result,
3190                                                            "  </xsd:sequence>\n");
3191         appendStringInfoString(&result,
3192                                                    "</xsd:complexType>\n\n");
3193
3194         appendStringInfo(&result,
3195                                          "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3196                                          xmlsn, schematypename);
3197
3198         return result.data;
3199 }
3200
3201
3202 /*
3203  * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3204  * section 9.15.
3205  */
3206 static const char *
3207 map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3208                                                                    bool tableforest, const char *targetns)
3209 {
3210         char       *dbname;
3211         char       *xmlcn;
3212         char       *catalogtypename;
3213         StringInfoData result;
3214         ListCell   *cell;
3215
3216         dbname = get_database_name(MyDatabaseId);
3217
3218         initStringInfo(&result);
3219
3220         xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3221
3222         catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3223                                                                                                                            dbname,
3224                                                                                                                            NULL,
3225                                                                                                                            NULL);
3226
3227         appendStringInfo(&result,
3228                                          "<xsd:complexType name=\"%s\">\n", catalogtypename);
3229         appendStringInfoString(&result,
3230                                                    "  <xsd:all>\n");
3231
3232         foreach(cell, nspid_list)
3233         {
3234                 Oid                     nspid = lfirst_oid(cell);
3235                 char       *nspname = get_namespace_name(nspid);
3236                 char       *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3237                 char       *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3238                                                                                                                                           dbname,
3239                                                                                                                                          nspname,
3240                                                                                                                                            NULL);
3241
3242                 appendStringInfo(&result,
3243                                                  "    <xsd:element name=\"%s\" type=\"%s\"/>\n",
3244                                                  xmlsn, schematypename);
3245         }
3246
3247         appendStringInfoString(&result,
3248                                                    "  </xsd:all>\n");
3249         appendStringInfoString(&result,
3250                                                    "</xsd:complexType>\n\n");
3251
3252         appendStringInfo(&result,
3253                                          "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3254                                          xmlcn, catalogtypename);
3255
3256         return result.data;
3257 }
3258
3259
3260 /*
3261  * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3262  */
3263 static const char *
3264 map_sql_type_to_xml_name(Oid typeoid, int typmod)
3265 {
3266         StringInfoData result;
3267
3268         initStringInfo(&result);
3269
3270         switch (typeoid)
3271         {
3272                 case BPCHAROID:
3273                         if (typmod == -1)
3274                                 appendStringInfoString(&result, "CHAR");
3275                         else
3276                                 appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3277                         break;
3278                 case VARCHAROID:
3279                         if (typmod == -1)
3280                                 appendStringInfoString(&result, "VARCHAR");
3281                         else
3282                                 appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3283                         break;
3284                 case NUMERICOID:
3285                         if (typmod == -1)
3286                                 appendStringInfoString(&result, "NUMERIC");
3287                         else
3288                                 appendStringInfo(&result, "NUMERIC_%d_%d",
3289                                                                  ((typmod - VARHDRSZ) >> 16) & 0xffff,
3290                                                                  (typmod - VARHDRSZ) & 0xffff);
3291                         break;
3292                 case INT4OID:
3293                         appendStringInfoString(&result, "INTEGER");
3294                         break;
3295                 case INT2OID:
3296                         appendStringInfoString(&result, "SMALLINT");
3297                         break;
3298                 case INT8OID:
3299                         appendStringInfoString(&result, "BIGINT");
3300                         break;
3301                 case FLOAT4OID:
3302                         appendStringInfoString(&result, "REAL");
3303                         break;
3304                 case FLOAT8OID:
3305                         appendStringInfoString(&result, "DOUBLE");
3306                         break;
3307                 case BOOLOID:
3308                         appendStringInfoString(&result, "BOOLEAN");
3309                         break;
3310                 case TIMEOID:
3311                         if (typmod == -1)
3312                                 appendStringInfoString(&result, "TIME");
3313                         else
3314                                 appendStringInfo(&result, "TIME_%d", typmod);
3315                         break;
3316                 case TIMETZOID:
3317                         if (typmod == -1)
3318                                 appendStringInfoString(&result, "TIME_WTZ");
3319                         else
3320                                 appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3321                         break;
3322                 case TIMESTAMPOID:
3323                         if (typmod == -1)
3324                                 appendStringInfoString(&result, "TIMESTAMP");
3325                         else
3326                                 appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3327                         break;
3328                 case TIMESTAMPTZOID:
3329                         if (typmod == -1)
3330                                 appendStringInfoString(&result, "TIMESTAMP_WTZ");
3331                         else
3332                                 appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3333                         break;
3334                 case DATEOID:
3335                         appendStringInfoString(&result, "DATE");
3336                         break;
3337                 case XMLOID:
3338                         appendStringInfoString(&result, "XML");
3339                         break;
3340                 default:
3341                         {
3342                                 HeapTuple       tuple;
3343                                 Form_pg_type typtuple;
3344
3345                                 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3346                                 if (!HeapTupleIsValid(tuple))
3347                                         elog(ERROR, "cache lookup failed for type %u", typeoid);
3348                                 typtuple = (Form_pg_type) GETSTRUCT(tuple);
3349
3350                                 appendStringInfoString(&result,
3351                                                                            map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3352                                                                                          get_database_name(MyDatabaseId),
3353                                                                   get_namespace_name(typtuple->typnamespace),
3354                                                                                                 NameStr(typtuple->typname)));
3355
3356                                 ReleaseSysCache(tuple);
3357                         }
3358         }
3359
3360         return result.data;
3361 }
3362
3363
3364 /*
3365  * Map a collection of SQL data types to XML Schema data types; see
3366  * SQL/XML:2008 section 9.7.
3367  */
3368 static const char *
3369 map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3370 {
3371         List       *uniquetypes = NIL;
3372         int                     i;
3373         StringInfoData result;
3374         ListCell   *cell0;
3375
3376         /* extract all column types used in the set of TupleDescs */
3377         foreach(cell0, tupdesc_list)
3378         {
3379                 TupleDesc       tupdesc = (TupleDesc) lfirst(cell0);
3380
3381                 for (i = 0; i < tupdesc->natts; i++)
3382                 {
3383                         if (tupdesc->attrs[i]->attisdropped)
3384                                 continue;
3385                         uniquetypes = list_append_unique_oid(uniquetypes,
3386                                                                                                  tupdesc->attrs[i]->atttypid);
3387                 }
3388         }
3389
3390         /* add base types of domains */
3391         foreach(cell0, uniquetypes)
3392         {
3393                 Oid                     typid = lfirst_oid(cell0);
3394                 Oid                     basetypid = getBaseType(typid);
3395
3396                 if (basetypid != typid)
3397                         uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3398         }
3399
3400         /* Convert to textual form */
3401         initStringInfo(&result);
3402
3403         foreach(cell0, uniquetypes)
3404         {
3405                 appendStringInfo(&result, "%s\n",
3406                                                  map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3407                                                                                                                 -1));
3408         }
3409
3410         return result.data;
3411 }
3412
3413
3414 /*
3415  * Map an SQL data type to a named XML Schema data type; see
3416  * SQL/XML:2008 sections 9.5 and 9.6.
3417  *
3418  * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3419  * a name attribute, which this function does.  The name-less version
3420  * 9.5 doesn't appear to be required anywhere.)
3421  */
3422 static const char *
3423 map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3424 {
3425         StringInfoData result;
3426         const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3427
3428         initStringInfo(&result);
3429
3430         if (typeoid == XMLOID)
3431         {
3432                 appendStringInfoString(&result,
3433                                                            "<xsd:complexType mixed=\"true\">\n"
3434                                                            "  <xsd:sequence>\n"
3435                                                            "    <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3436                                                            "  </xsd:sequence>\n"
3437                                                            "</xsd:complexType>\n");
3438         }
3439         else
3440         {
3441                 appendStringInfo(&result,
3442                                                  "<xsd:simpleType name=\"%s\">\n", typename);
3443
3444                 switch (typeoid)
3445                 {
3446                         case BPCHAROID:
3447                         case VARCHAROID:
3448                         case TEXTOID:
3449                                 appendStringInfo(&result,
3450                                                                  "  <xsd:restriction base=\"xsd:string\">\n");
3451                                 if (typmod != -1)
3452                                         appendStringInfo(&result,
3453                                                                          "    <xsd:maxLength value=\"%d\"/>\n",
3454                                                                          typmod - VARHDRSZ);
3455                                 appendStringInfoString(&result, "  </xsd:restriction>\n");
3456                                 break;
3457
3458                         case BYTEAOID:
3459                                 appendStringInfo(&result,
3460                                                                  "  <xsd:restriction base=\"xsd:%s\">\n"
3461                                                                  "  </xsd:restriction>\n",
3462                                 xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3463                                 break;
3464
3465                         case NUMERICOID:
3466                                 if (typmod != -1)
3467                                         appendStringInfo(&result,
3468                                                                  "  <xsd:restriction base=\"xsd:decimal\">\n"
3469                                                                          "    <xsd:totalDigits value=\"%d\"/>\n"
3470                                                                    "    <xsd:fractionDigits value=\"%d\"/>\n"
3471                                                                          "  </xsd:restriction>\n",
3472                                                                          ((typmod - VARHDRSZ) >> 16) & 0xffff,
3473                                                                          (typmod - VARHDRSZ) & 0xffff);
3474                                 break;
3475
3476                         case INT2OID:
3477                                 appendStringInfo(&result,
3478                                                                  "  <xsd:restriction base=\"xsd:short\">\n"
3479                                                                  "    <xsd:maxInclusive value=\"%d\"/>\n"
3480                                                                  "    <xsd:minInclusive value=\"%d\"/>\n"
3481                                                                  "  </xsd:restriction>\n",
3482                                                                  SHRT_MAX, SHRT_MIN);
3483                                 break;
3484
3485                         case INT4OID:
3486                                 appendStringInfo(&result,
3487                                                                  "  <xsd:restriction base=\"xsd:int\">\n"
3488                                                                  "    <xsd:maxInclusive value=\"%d\"/>\n"
3489                                                                  "    <xsd:minInclusive value=\"%d\"/>\n"
3490                                                                  "  </xsd:restriction>\n",
3491                                                                  INT_MAX, INT_MIN);
3492                                 break;
3493
3494                         case INT8OID:
3495                                 appendStringInfo(&result,
3496                                                                  "  <xsd:restriction base=\"xsd:long\">\n"
3497                                            "    <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
3498                                            "    <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
3499                                                                  "  </xsd:restriction>\n",
3500                                                            (((uint64) 1) << (sizeof(int64) * 8 - 1)) - 1,
3501                                                                  (((uint64) 1) << (sizeof(int64) * 8 - 1)));
3502                                 break;
3503
3504                         case FLOAT4OID:
3505                                 appendStringInfoString(&result,
3506                                 "  <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
3507                                 break;
3508
3509                         case FLOAT8OID:
3510                                 appendStringInfoString(&result,
3511                                                                            "  <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
3512                                 break;
3513
3514                         case BOOLOID:
3515                                 appendStringInfoString(&result,
3516                                                                            "  <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
3517                                 break;
3518
3519                         case TIMEOID:
3520                         case TIMETZOID:
3521                                 {
3522                                         const char *tz = (typeoid == TIMETZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3523
3524                                         if (typmod == -1)
3525                                                 appendStringInfo(&result,
3526                                                                         "  <xsd:restriction base=\"xsd:time\">\n"
3527                                                                                  "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3528                                                                                  "  </xsd:restriction>\n", tz);
3529                                         else if (typmod == 0)
3530                                                 appendStringInfo(&result,
3531                                                                         "  <xsd:restriction base=\"xsd:time\">\n"
3532                                                                                  "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3533                                                                                  "  </xsd:restriction>\n", tz);
3534                                         else
3535                                                 appendStringInfo(&result,
3536                                                                         "  <xsd:restriction base=\"xsd:time\">\n"
3537                                                                                  "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3538                                                         "  </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3539                                         break;
3540                                 }
3541
3542                         case TIMESTAMPOID:
3543                         case TIMESTAMPTZOID:
3544                                 {
3545                                         const char *tz = (typeoid == TIMESTAMPTZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3546
3547                                         if (typmod == -1)
3548                                                 appendStringInfo(&result,
3549                                                                 "  <xsd:restriction base=\"xsd:dateTime\">\n"
3550                                                                                  "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3551                                                                                  "  </xsd:restriction>\n", tz);
3552                                         else if (typmod == 0)
3553                                                 appendStringInfo(&result,
3554                                                                 "  <xsd:restriction base=\"xsd:dateTime\">\n"
3555                                                                                  "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3556                                                                                  "  </xsd:restriction>\n", tz);
3557                                         else
3558                                                 appendStringInfo(&result,
3559                                                                 "  <xsd:restriction base=\"xsd:dateTime\">\n"
3560                                                                                  "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3561                                                         "  </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3562                                         break;
3563                                 }
3564
3565                         case DATEOID:
3566                                 appendStringInfoString(&result,
3567                                                                         "  <xsd:restriction base=\"xsd:date\">\n"
3568                                                                            "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
3569                                                                            "  </xsd:restriction>\n");
3570                                 break;
3571
3572                         default:
3573                                 if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
3574                                 {
3575                                         Oid                     base_typeoid;
3576                                         int32           base_typmod = -1;
3577
3578                                         base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
3579
3580                                         appendStringInfo(&result,
3581                                                                          "  <xsd:restriction base=\"%s\"/>\n",
3582                                                 map_sql_type_to_xml_name(base_typeoid, base_typmod));
3583                                 }
3584                                 break;
3585                 }
3586                 appendStringInfoString(&result, "</xsd:simpleType>\n");
3587         }
3588
3589         return result.data;
3590 }
3591
3592
3593 /*
3594  * Map an SQL row to an XML element, taking the row from the active
3595  * SPI cursor.  See also SQL/XML:2008 section 9.10.
3596  */
3597 static void
3598 SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
3599                                                   bool nulls, bool tableforest,
3600                                                   const char *targetns, bool top_level)
3601 {
3602         int                     i;
3603         char       *xmltn;
3604
3605         if (tablename)
3606                 xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3607         else
3608         {
3609                 if (tableforest)
3610                         xmltn = "row";
3611                 else
3612                         xmltn = "table";
3613         }
3614
3615         if (tableforest)
3616                 xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
3617         else
3618                 appendStringInfoString(result, "<row>\n");
3619
3620         for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
3621         {
3622                 char       *colname;
3623                 Datum           colval;
3624                 bool            isnull;
3625
3626                 colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
3627                                                                                                  true, false);
3628                 colval = SPI_getbinval(SPI_tuptable->vals[rownum],
3629                                                            SPI_tuptable->tupdesc,
3630                                                            i,
3631                                                            &isnull);
3632                 if (isnull)
3633                 {
3634                         if (nulls)
3635                                 appendStringInfo(result, "  <%s xsi:nil=\"true\"/>\n", colname);
3636                 }
3637                 else
3638                         appendStringInfo(result, "  <%s>%s</%s>\n",
3639                                                          colname,
3640                                                          map_sql_value_to_xml_value(colval,
3641                                                           SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
3642                                                          colname);
3643         }
3644
3645         if (tableforest)
3646         {
3647                 xmldata_root_element_end(result, xmltn);
3648                 appendStringInfoChar(result, '\n');
3649         }
3650         else
3651                 appendStringInfoString(result, "</row>\n\n");
3652 }
3653
3654
3655 /*
3656  * XPath related functions
3657  */
3658
3659 #ifdef USE_LIBXML
3660
3661 /*
3662  * Convert XML node to text (dump subtree in case of element,
3663  * return value otherwise)
3664  */
3665 static text *
3666 xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
3667 {
3668         xmltype    *result;
3669
3670         if (cur->type == XML_ELEMENT_NODE)
3671         {
3672                 xmlBufferPtr buf;
3673                 xmlNodePtr      cur_copy;
3674
3675                 buf = xmlBufferCreate();
3676
3677                 /*
3678                  * The result of xmlNodeDump() won't contain namespace definitions
3679                  * from parent nodes, but xmlCopyNode() duplicates a node along with
3680                  * its required namespace definitions.
3681                  */
3682                 cur_copy = xmlCopyNode(cur, 1);
3683
3684                 if (cur_copy == NULL)
3685                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3686                                                 "could not copy node");
3687
3688                 PG_TRY();
3689                 {
3690                         xmlNodeDump(buf, NULL, cur_copy, 0, 1);
3691                         result = xmlBuffer_to_xmltype(buf);
3692                 }
3693                 PG_CATCH();
3694                 {
3695                         xmlFreeNode(cur_copy);
3696                         xmlBufferFree(buf);
3697                         PG_RE_THROW();
3698                 }
3699                 PG_END_TRY();
3700                 xmlFreeNode(cur_copy);
3701                 xmlBufferFree(buf);
3702         }
3703         else
3704         {
3705                 xmlChar    *str;
3706
3707                 str = xmlXPathCastNodeToString(cur);
3708                 PG_TRY();
3709                 {
3710                         /* Here we rely on XML having the same representation as TEXT */
3711                         char       *escaped = escape_xml((char *) str);
3712
3713                         result = (xmltype *) cstring_to_text(escaped);
3714                         pfree(escaped);
3715                 }
3716                 PG_CATCH();
3717                 {
3718                         xmlFree(str);
3719                         PG_RE_THROW();
3720                 }
3721                 PG_END_TRY();
3722                 xmlFree(str);
3723         }
3724
3725         return result;
3726 }
3727
3728 /*
3729  * Convert an XML XPath object (the result of evaluating an XPath expression)
3730  * to an array of xml values, which are appended to astate.  The function
3731  * result value is the number of elements in the array.
3732  *
3733  * If "astate" is NULL then we don't generate the array value, but we still
3734  * return the number of elements it would have had.
3735  *
3736  * Nodesets are converted to an array containing the nodes' textual
3737  * representations.  Primitive values (float, double, string) are converted
3738  * to a single-element array containing the value's string representation.
3739  */
3740 static int
3741 xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
3742                                            ArrayBuildState *astate,
3743                                            PgXmlErrorContext *xmlerrcxt)
3744 {
3745         int                     result = 0;
3746         Datum           datum;
3747         Oid                     datumtype;
3748         char       *result_str;
3749
3750         switch (xpathobj->type)
3751         {
3752                 case XPATH_NODESET:
3753                         if (xpathobj->nodesetval != NULL)
3754                         {
3755                                 result = xpathobj->nodesetval->nodeNr;
3756                                 if (astate != NULL)
3757                                 {
3758                                         int                     i;
3759
3760                                         for (i = 0; i < result; i++)
3761                                         {
3762                                                 datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
3763                                                                                                                                  xmlerrcxt));
3764                                                 (void) accumArrayResult(astate, datum, false,
3765                                                                                                 XMLOID, CurrentMemoryContext);
3766                                         }
3767                                 }
3768                         }
3769                         return result;
3770
3771                 case XPATH_BOOLEAN:
3772                         if (astate == NULL)
3773                                 return 1;
3774                         datum = BoolGetDatum(xpathobj->boolval);
3775                         datumtype = BOOLOID;
3776                         break;
3777
3778                 case XPATH_NUMBER:
3779                         if (astate == NULL)
3780                                 return 1;
3781                         datum = Float8GetDatum(xpathobj->floatval);
3782                         datumtype = FLOAT8OID;
3783                         break;
3784
3785                 case XPATH_STRING:
3786                         if (astate == NULL)
3787                                 return 1;
3788                         datum = CStringGetDatum((char *) xpathobj->stringval);
3789                         datumtype = CSTRINGOID;
3790                         break;
3791
3792                 default:
3793                         elog(ERROR, "xpath expression result type %d is unsupported",
3794                                  xpathobj->type);
3795                         return 0;                       /* keep compiler quiet */
3796         }
3797
3798         /* Common code for scalar-value cases */
3799         result_str = map_sql_value_to_xml_value(datum, datumtype, true);
3800         datum = PointerGetDatum(cstring_to_xmltype(result_str));
3801         (void) accumArrayResult(astate, datum, false,
3802                                                         XMLOID, CurrentMemoryContext);
3803         return 1;
3804 }
3805
3806
3807 /*
3808  * Common code for xpath() and xmlexists()
3809  *
3810  * Evaluate XPath expression and return number of nodes in res_items
3811  * and array of XML values in astate.  Either of those pointers can be
3812  * NULL if the corresponding result isn't wanted.
3813  *
3814  * It is up to the user to ensure that the XML passed is in fact
3815  * an XML document - XPath doesn't work easily on fragments without
3816  * a context node being known.
3817  */
3818 static void
3819 xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
3820                            int *res_nitems, ArrayBuildState *astate)
3821 {
3822         PgXmlErrorContext *xmlerrcxt;
3823         volatile xmlParserCtxtPtr ctxt = NULL;
3824         volatile xmlDocPtr doc = NULL;
3825         volatile xmlXPathContextPtr xpathctx = NULL;
3826         volatile xmlXPathCompExprPtr xpathcomp = NULL;
3827         volatile xmlXPathObjectPtr xpathobj = NULL;
3828         char       *datastr;
3829         int32           len;
3830         int32           xpath_len;
3831         xmlChar    *string;
3832         xmlChar    *xpath_expr;
3833         int                     i;
3834         int                     ndim;
3835         Datum      *ns_names_uris;
3836         bool       *ns_names_uris_nulls;
3837         int                     ns_count;
3838
3839         /*
3840          * Namespace mappings are passed as text[].  If an empty array is passed
3841          * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
3842          * Else, a 2-dimensional array with length of the second axis being equal
3843          * to 2 should be passed, i.e., every subarray contains 2 elements, the
3844          * first element defining the name, the second one the URI.  Example:
3845          * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
3846          * 'http://example2.com']].
3847          */
3848         ndim = namespaces ? ARR_NDIM(namespaces) : 0;
3849         if (ndim != 0)
3850         {
3851                 int                *dims;
3852
3853                 dims = ARR_DIMS(namespaces);
3854
3855                 if (ndim != 2 || dims[1] != 2)
3856                         ereport(ERROR,
3857                                         (errcode(ERRCODE_DATA_EXCEPTION),
3858                                          errmsg("invalid array for XML namespace mapping"),
3859                                          errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
3860
3861                 Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
3862
3863                 deconstruct_array(namespaces, TEXTOID, -1, false, 'i',
3864                                                   &ns_names_uris, &ns_names_uris_nulls,
3865                                                   &ns_count);
3866
3867                 Assert((ns_count % 2) == 0);    /* checked above */
3868                 ns_count /= 2;                  /* count pairs only */
3869         }
3870         else
3871         {
3872                 ns_names_uris = NULL;
3873                 ns_names_uris_nulls = NULL;
3874                 ns_count = 0;
3875         }
3876
3877         datastr = VARDATA(data);
3878         len = VARSIZE(data) - VARHDRSZ;
3879         xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text);
3880         if (xpath_len == 0)
3881                 ereport(ERROR,
3882                                 (errcode(ERRCODE_DATA_EXCEPTION),
3883                                  errmsg("empty XPath expression")));
3884
3885         string = pg_xmlCharStrndup(datastr, len);
3886         xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
3887
3888         xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
3889
3890         PG_TRY();
3891         {
3892                 xmlInitParser();
3893
3894                 /*
3895                  * redundant XML parsing (two parsings for the same value during one
3896                  * command execution are possible)
3897                  */
3898                 ctxt = xmlNewParserCtxt();
3899                 if (ctxt == NULL || xmlerrcxt->err_occurred)
3900                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3901                                                 "could not allocate parser context");
3902                 doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
3903                 if (doc == NULL || xmlerrcxt->err_occurred)
3904                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
3905                                                 "could not parse XML document");
3906                 xpathctx = xmlXPathNewContext(doc);
3907                 if (xpathctx == NULL || xmlerrcxt->err_occurred)
3908                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3909                                                 "could not allocate XPath context");
3910                 xpathctx->node = xmlDocGetRootElement(doc);
3911                 if (xpathctx->node == NULL || xmlerrcxt->err_occurred)
3912                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
3913                                                 "could not find root XML element");
3914
3915                 /* register namespaces, if any */
3916                 if (ns_count > 0)
3917                 {
3918                         for (i = 0; i < ns_count; i++)
3919                         {
3920                                 char       *ns_name;
3921                                 char       *ns_uri;
3922
3923                                 if (ns_names_uris_nulls[i * 2] ||
3924                                         ns_names_uris_nulls[i * 2 + 1])
3925                                         ereport(ERROR,
3926                                                         (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
3927                                           errmsg("neither namespace name nor URI may be null")));
3928                                 ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
3929                                 ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
3930                                 if (xmlXPathRegisterNs(xpathctx,
3931                                                                            (xmlChar *) ns_name,
3932                                                                            (xmlChar *) ns_uri) != 0)
3933                                         ereport(ERROR,          /* is this an internal error??? */
3934                                                         (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
3935                                                                         ns_name, ns_uri)));
3936                         }
3937                 }
3938
3939                 xpathcomp = xmlXPathCompile(xpath_expr);
3940                 if (xpathcomp == NULL || xmlerrcxt->err_occurred)
3941                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
3942                                                 "invalid XPath expression");
3943
3944                 /*
3945                  * Version 2.6.27 introduces a function named
3946                  * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
3947                  * but we can derive the existence by whether any nodes are returned,
3948                  * thereby preventing a library version upgrade and keeping the code
3949                  * the same.
3950                  */
3951                 xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
3952                 if (xpathobj == NULL || xmlerrcxt->err_occurred)
3953                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
3954                                                 "could not create XPath object");
3955
3956                 /*
3957                  * Extract the results as requested.
3958                  */
3959                 if (res_nitems != NULL)
3960                         *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
3961                 else
3962                         (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
3963         }
3964         PG_CATCH();
3965         {
3966                 if (xpathobj)
3967                         xmlXPathFreeObject(xpathobj);
3968                 if (xpathcomp)
3969                         xmlXPathFreeCompExpr(xpathcomp);
3970                 if (xpathctx)
3971                         xmlXPathFreeContext(xpathctx);
3972                 if (doc)
3973                         xmlFreeDoc(doc);
3974                 if (ctxt)
3975                         xmlFreeParserCtxt(ctxt);
3976
3977                 pg_xml_done(xmlerrcxt, true);
3978
3979                 PG_RE_THROW();
3980         }
3981         PG_END_TRY();
3982
3983         xmlXPathFreeObject(xpathobj);
3984         xmlXPathFreeCompExpr(xpathcomp);
3985         xmlXPathFreeContext(xpathctx);
3986         xmlFreeDoc(doc);
3987         xmlFreeParserCtxt(ctxt);
3988
3989         pg_xml_done(xmlerrcxt, false);
3990 }
3991 #endif   /* USE_LIBXML */
3992
3993 /*
3994  * Evaluate XPath expression and return array of XML values.
3995  *
3996  * As we have no support of XQuery sequences yet, this function seems
3997  * to be the most useful one (array of XML functions plays a role of
3998  * some kind of substitution for XQuery sequences).
3999  */
4000 Datum
4001 xpath(PG_FUNCTION_ARGS)
4002 {
4003 #ifdef USE_LIBXML
4004         text       *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4005         xmltype    *data = PG_GETARG_XML_P(1);
4006         ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4007         ArrayBuildState *astate;
4008
4009         astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
4010         xpath_internal(xpath_expr_text, data, namespaces,
4011                                    NULL, astate);
4012         PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
4013 #else
4014         NO_XML_SUPPORT();
4015         return 0;
4016 #endif
4017 }
4018
4019 /*
4020  * Determines if the node specified by the supplied XPath exists
4021  * in a given XML document, returning a boolean.
4022  */
4023 Datum
4024 xmlexists(PG_FUNCTION_ARGS)
4025 {
4026 #ifdef USE_LIBXML
4027         text       *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4028         xmltype    *data = PG_GETARG_XML_P(1);
4029         int                     res_nitems;
4030
4031         xpath_internal(xpath_expr_text, data, NULL,
4032                                    &res_nitems, NULL);
4033
4034         PG_RETURN_BOOL(res_nitems > 0);
4035 #else
4036         NO_XML_SUPPORT();
4037         return 0;
4038 #endif
4039 }
4040
4041 /*
4042  * Determines if the node specified by the supplied XPath exists
4043  * in a given XML document, returning a boolean. Differs from
4044  * xmlexists as it supports namespaces and is not defined in SQL/XML.
4045  */
4046 Datum
4047 xpath_exists(PG_FUNCTION_ARGS)
4048 {
4049 #ifdef USE_LIBXML
4050         text       *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4051         xmltype    *data = PG_GETARG_XML_P(1);
4052         ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4053         int                     res_nitems;
4054
4055         xpath_internal(xpath_expr_text, data, namespaces,
4056                                    &res_nitems, NULL);
4057
4058         PG_RETURN_BOOL(res_nitems > 0);
4059 #else
4060         NO_XML_SUPPORT();
4061         return 0;
4062 #endif
4063 }
4064
4065 /*
4066  * Functions for checking well-formed-ness
4067  */
4068
4069 #ifdef USE_LIBXML
4070 static bool
4071 wellformed_xml(text *data, XmlOptionType xmloption_arg)
4072 {
4073         bool            result;
4074         volatile xmlDocPtr doc = NULL;
4075
4076         /* We want to catch any exceptions and return false */
4077         PG_TRY();
4078         {
4079                 doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding());
4080                 result = true;
4081         }
4082         PG_CATCH();
4083         {
4084                 FlushErrorState();
4085                 result = false;
4086         }
4087         PG_END_TRY();
4088
4089         if (doc)
4090                 xmlFreeDoc(doc);
4091
4092         return result;
4093 }
4094 #endif
4095
4096 Datum
4097 xml_is_well_formed(PG_FUNCTION_ARGS)
4098 {
4099 #ifdef USE_LIBXML
4100         text       *data = PG_GETARG_TEXT_PP(0);
4101
4102         PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4103 #else
4104         NO_XML_SUPPORT();
4105         return 0;
4106 #endif   /* not USE_LIBXML */
4107 }
4108
4109 Datum
4110 xml_is_well_formed_document(PG_FUNCTION_ARGS)
4111 {
4112 #ifdef USE_LIBXML
4113         text       *data = PG_GETARG_TEXT_PP(0);
4114
4115         PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4116 #else
4117         NO_XML_SUPPORT();
4118         return 0;
4119 #endif   /* not USE_LIBXML */
4120 }
4121
4122 Datum
4123 xml_is_well_formed_content(PG_FUNCTION_ARGS)
4124 {
4125 #ifdef USE_LIBXML
4126         text       *data = PG_GETARG_TEXT_PP(0);
4127
4128         PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4129 #else
4130         NO_XML_SUPPORT();
4131         return 0;
4132 #endif   /* not USE_LIBXML */
4133 }
4134
4135 /*
4136  * support functions for XMLTABLE
4137  *
4138  */
4139 #ifdef USE_LIBXML
4140
4141 /*
4142  * Returns private data from executor state. Ensure validity by check with
4143  * MAGIC number.
4144  */
4145 static inline XmlTableBuilderData *
4146 GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4147 {
4148         XmlTableBuilderData *result;
4149
4150         if (!IsA(state, TableFuncScanState))
4151                 elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4152         result = (XmlTableBuilderData *) state->opaque;
4153         if (result->magic != XMLTABLE_CONTEXT_MAGIC)
4154                 elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4155
4156         return result;
4157 }
4158 #endif
4159
4160 /*
4161  * XmlTableInitOpaque
4162  *              Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4163  *              the XML parser.
4164  *
4165  * Note: Because we call pg_xml_init() here and pg_xml_done() in
4166  * XmlTableDestroyOpaque, it is critical for robustness that no other
4167  * executor nodes run until this node is processed to completion.  Caller
4168  * must execute this to completion (probably filling a tuplestore to exhaust
4169  * this node in a single pass) instead of using row-per-call mode.
4170  */
4171 static void
4172 XmlTableInitOpaque(TableFuncScanState *state, int natts)
4173 {
4174 #ifdef USE_LIBXML
4175         volatile xmlParserCtxtPtr ctxt = NULL;
4176         XmlTableBuilderData *xtCxt;
4177         PgXmlErrorContext *xmlerrcxt;
4178
4179         xtCxt = palloc0(sizeof(XmlTableBuilderData));
4180         xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4181         xtCxt->natts = natts;
4182         xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
4183
4184         xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4185
4186         PG_TRY();
4187         {
4188                 xmlInitParser();
4189
4190                 ctxt = xmlNewParserCtxt();
4191                 if (ctxt == NULL || xmlerrcxt->err_occurred)
4192                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4193                                                 "could not allocate parser context");
4194         }
4195         PG_CATCH();
4196         {
4197                 if (ctxt != NULL)
4198                         xmlFreeParserCtxt(ctxt);
4199
4200                 pg_xml_done(xmlerrcxt, true);
4201
4202                 PG_RE_THROW();
4203         }
4204         PG_END_TRY();
4205
4206         xtCxt->xmlerrcxt = xmlerrcxt;
4207         xtCxt->ctxt = ctxt;
4208
4209         state->opaque = xtCxt;
4210 #else
4211         NO_XML_SUPPORT();
4212 #endif   /* not USE_LIBXML */
4213 }
4214
4215 /*
4216  * XmlTableSetDocument
4217  *              Install the input document
4218  */
4219 static void
4220 XmlTableSetDocument(TableFuncScanState *state, Datum value)
4221 {
4222 #ifdef USE_LIBXML
4223         XmlTableBuilderData *xtCxt;
4224         xmltype    *xmlval = DatumGetXmlP(value);
4225         char       *str;
4226         xmlChar    *xstr;
4227         int                     length;
4228         volatile xmlDocPtr doc = NULL;
4229         volatile xmlXPathContextPtr xpathcxt = NULL;
4230
4231         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
4232
4233         /*
4234          * Use out function for casting to string (remove encoding property). See
4235          * comment in xml_out.
4236          */
4237         str = xml_out_internal(xmlval, 0);
4238
4239         length = strlen(str);
4240         xstr = pg_xmlCharStrndup(str, length);
4241
4242         PG_TRY();
4243         {
4244                 doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4245                 if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
4246                         xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4247                                                 "could not parse XML document");
4248                 xpathcxt = xmlXPathNewContext(doc);
4249                 if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
4250                         xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4251                                                 "could not allocate XPath context");
4252                 xpathcxt->node = xmlDocGetRootElement(doc);
4253                 if (xpathcxt->node == NULL || xtCxt->xmlerrcxt->err_occurred)
4254                         xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4255                                                 "could not find root XML element");
4256         }
4257         PG_CATCH();
4258         {
4259                 if (xpathcxt != NULL)
4260                         xmlXPathFreeContext(xpathcxt);
4261                 if (doc != NULL)
4262                         xmlFreeDoc(doc);
4263
4264                 PG_RE_THROW();
4265         }
4266         PG_END_TRY();
4267
4268         xtCxt->doc = doc;
4269         xtCxt->xpathcxt = xpathcxt;
4270 #else
4271         NO_XML_SUPPORT();
4272 #endif   /* not USE_LIBXML */
4273 }
4274
4275 /*
4276  * XmlTableSetNamespace
4277  *              Add a namespace declaration
4278  */
4279 static void
4280 XmlTableSetNamespace(TableFuncScanState *state, char *name, char *uri)
4281 {
4282 #ifdef USE_LIBXML
4283         XmlTableBuilderData *xtCxt;
4284
4285         if (name == NULL)
4286                 ereport(ERROR,
4287                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4288                                  errmsg("DEFAULT namespace is not supported")));
4289         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4290
4291         if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4292                                                    pg_xmlCharStrndup(name, strlen(name)),
4293                                                    pg_xmlCharStrndup(uri, strlen(uri))))
4294                 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4295                                         "could not set XML namespace");
4296 #else
4297         NO_XML_SUPPORT();
4298 #endif   /* not USE_LIBXML */
4299 }
4300
4301 /*
4302  * XmlTableSetRowFilter
4303  *              Install the row-filter Xpath expression.
4304  */
4305 static void
4306 XmlTableSetRowFilter(TableFuncScanState *state, char *path)
4307 {
4308 #ifdef USE_LIBXML
4309         XmlTableBuilderData *xtCxt;
4310         xmlChar    *xstr;
4311
4312         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4313
4314         if (*path == '\0')
4315                 ereport(ERROR,
4316                                 (errcode(ERRCODE_DATA_EXCEPTION),
4317                                  errmsg("row path filter must not be empty string")));
4318
4319         xstr = pg_xmlCharStrndup(path, strlen(path));
4320
4321         xtCxt->xpathcomp = xmlXPathCompile(xstr);
4322         if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
4323                 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_SYNTAX_ERROR,
4324                                         "invalid XPath expression");
4325 #else
4326         NO_XML_SUPPORT();
4327 #endif   /* not USE_LIBXML */
4328 }
4329
4330 /*
4331  * XmlTableSetColumnFilter
4332  *              Install the column-filter Xpath expression, for the given column.
4333  */
4334 static void
4335 XmlTableSetColumnFilter(TableFuncScanState *state, char *path, int colnum)
4336 {
4337 #ifdef USE_LIBXML
4338         XmlTableBuilderData *xtCxt;
4339         xmlChar    *xstr;
4340
4341         AssertArg(PointerIsValid(path));
4342
4343         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4344
4345         if (*path == '\0')
4346                 ereport(ERROR,
4347                                 (errcode(ERRCODE_DATA_EXCEPTION),
4348                                  errmsg("column path filter must not be empty string")));
4349
4350         xstr = pg_xmlCharStrndup(path, strlen(path));
4351
4352         xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr);
4353         if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
4354                 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4355                                         "invalid XPath expression");
4356 #else
4357         NO_XML_SUPPORT();
4358 #endif   /* not USE_LIBXML */
4359 }
4360
4361 /*
4362  * XmlTableFetchRow
4363  *              Prepare the next "current" tuple for upcoming GetValue calls.
4364  *              Returns FALSE if the row-filter expression returned no more rows.
4365  */
4366 static bool
4367 XmlTableFetchRow(TableFuncScanState *state)
4368 {
4369 #ifdef USE_LIBXML
4370         XmlTableBuilderData *xtCxt;
4371
4372         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4373
4374         /*
4375          * XmlTable returns table - set of composite values. The error context, is
4376          * used for producement more values, between two calls, there can be
4377          * created and used another libxml2 error context. It is libxml2 global
4378          * value, so it should be refreshed any time before any libxml2 usage,
4379          * that is finished by returning some value.
4380          */
4381         xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4382
4383         if (xtCxt->xpathobj == NULL)
4384         {
4385                 xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4386                 if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4387                         xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4388                                                 "could not create XPath object");
4389
4390                 xtCxt->row_count = 0;
4391         }
4392
4393         if (xtCxt->xpathobj->type == XPATH_NODESET)
4394         {
4395                 if (xtCxt->xpathobj->nodesetval != NULL)
4396                 {
4397                         if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
4398                                 return true;
4399                 }
4400         }
4401
4402         return false;
4403 #else
4404         NO_XML_SUPPORT();
4405         return false;
4406 #endif   /* not USE_LIBXML */
4407 }
4408
4409 /*
4410  * XmlTableGetValue
4411  *              Return the value for column number 'colnum' for the current row.  If
4412  *              column -1 is requested, return representation of the whole row.
4413  *
4414  * This leaks memory, so be sure to reset often the context in which it's
4415  * called.
4416  */
4417 static Datum
4418 XmlTableGetValue(TableFuncScanState *state, int colnum,
4419                                  Oid typid, int32 typmod, bool *isnull)
4420 {
4421 #ifdef USE_LIBXML
4422         XmlTableBuilderData *xtCxt;
4423         Datum           result = (Datum) 0;
4424         xmlNodePtr      cur;
4425         char       *cstr = NULL;
4426         volatile xmlXPathObjectPtr xpathobj = NULL;
4427
4428         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4429
4430         Assert(xtCxt->xpathobj &&
4431                    xtCxt->xpathobj->type == XPATH_NODESET &&
4432                    xtCxt->xpathobj->nodesetval != NULL);
4433
4434         /* Propagate context related error context to libxml2 */
4435         xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4436
4437         *isnull = false;
4438
4439         cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
4440
4441         Assert(xtCxt->xpathscomp[colnum] != NULL);
4442
4443         PG_TRY();
4444         {
4445                 /* Set current node as entry point for XPath evaluation */
4446                 xtCxt->xpathcxt->node = cur;
4447
4448                 /* Evaluate column path */
4449                 xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
4450                 if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4451                         xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4452                                                 "could not create XPath object");
4453
4454                 /*
4455                  * There are four possible cases, depending on the number of nodes
4456                  * returned by the XPath expression and the type of the target column:
4457                  * a) XPath returns no nodes.  b) One node is returned, and column is
4458                  * of type XML.  c) One node, column type other than XML.  d) Multiple
4459                  * nodes are returned.
4460                  */
4461                 if (xpathobj->type == XPATH_NODESET)
4462                 {
4463                         int                     count = 0;
4464
4465                         if (xpathobj->nodesetval != NULL)
4466                                 count = xpathobj->nodesetval->nodeNr;
4467
4468                         if (xpathobj->nodesetval == NULL || count == 0)
4469                         {
4470                                 *isnull = true;
4471                         }
4472                         else if (count == 1 && typid == XMLOID)
4473                         {
4474                                 text       *textstr;
4475
4476                                 /* simple case, result is one value */
4477                                 textstr = xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[0],
4478                                                                                            xtCxt->xmlerrcxt);
4479                                 cstr = text_to_cstring(textstr);
4480                         }
4481                         else if (count == 1)
4482                         {
4483                                 xmlChar    *str;
4484
4485                                 str = xmlNodeListGetString(xtCxt->doc,
4486                                                    xpathobj->nodesetval->nodeTab[0]->xmlChildrenNode,
4487                                                                                    1);
4488
4489                                 if (str != NULL)
4490                                 {
4491                                         PG_TRY();
4492                                         {
4493                                                 cstr = pstrdup((char *) str);
4494                                         }
4495                                         PG_CATCH();
4496                                         {
4497                                                 xmlFree(str);
4498                                                 PG_RE_THROW();
4499                                         }
4500                                         PG_END_TRY();
4501                                         xmlFree(str);
4502                                 }
4503                                 else
4504                                 {
4505                                         /*
4506                                          * This line ensure mapping of empty tags to PostgreSQL
4507                                          * value. Usually we would to map a empty tag to empty
4508                                          * string. But this mapping can create empty string when
4509                                          * user doesn't expect it - when empty tag is enforced
4510                                          * by libxml2 - when user uses a text() function for
4511                                          * example.
4512                                          */
4513                                         cstr = "";
4514                                 }
4515                         }
4516                         else
4517                         {
4518                                 StringInfoData str;
4519                                 int                     i;
4520
4521                                 Assert(count > 1);
4522
4523                                 /*
4524                                  * When evaluating the XPath expression returns multiple
4525                                  * nodes, the result is the concatenation of them all. The
4526                                  * target type must be XML.
4527                                  */
4528                                 if (typid != XMLOID)
4529                                         ereport(ERROR,
4530                                                         (errcode(ERRCODE_CARDINALITY_VIOLATION),
4531                                                          errmsg("more than one value returned by column XPath expression")));
4532
4533                                 /* Concatenate serialized values */
4534                                 initStringInfo(&str);
4535                                 for (i = 0; i < count; i++)
4536                                 {
4537                                         appendStringInfoText(&str,
4538                                            xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4539                                                                                         xtCxt->xmlerrcxt));
4540                                 }
4541                                 cstr = str.data;
4542                         }
4543                 }
4544                 else if (xpathobj->type == XPATH_STRING)
4545                 {
4546                         cstr = (char *) xpathobj->stringval;
4547                 }
4548                 else
4549                         elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
4550
4551                 /*
4552                  * By here, either cstr contains the result value, or the isnull flag
4553                  * has been set.
4554                  */
4555                 Assert(cstr || *isnull);
4556
4557                 if (!*isnull)
4558                         result = InputFunctionCall(&state->in_functions[colnum],
4559                                                                            cstr,
4560                                                                            state->typioparams[colnum],
4561                                                                            typmod);
4562         }
4563         PG_CATCH();
4564         {
4565                 if (xpathobj != NULL)
4566                         xmlXPathFreeObject(xpathobj);
4567                 PG_RE_THROW();
4568         }
4569         PG_END_TRY();
4570
4571         xmlXPathFreeObject(xpathobj);
4572
4573         return result;
4574 #else
4575         NO_XML_SUPPORT();
4576         return 0;
4577 #endif   /* not USE_LIBXML */
4578 }
4579
4580 /*
4581  * XmlTableDestroyOpaque
4582  *              Release all libxml2 resources
4583  */
4584 static void
4585 XmlTableDestroyOpaque(TableFuncScanState *state)
4586 {
4587 #ifdef USE_LIBXML
4588         XmlTableBuilderData *xtCxt;
4589
4590         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
4591
4592         /* Propagate context related error context to libxml2 */
4593         xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4594
4595         if (xtCxt->xpathscomp != NULL)
4596         {
4597                 int                     i;
4598
4599                 for (i = 0; i < xtCxt->natts; i++)
4600                         if (xtCxt->xpathscomp[i] != NULL)
4601                                 xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
4602         }
4603
4604         if (xtCxt->xpathobj != NULL)
4605                 xmlXPathFreeObject(xtCxt->xpathobj);
4606         if (xtCxt->xpathcomp != NULL)
4607                 xmlXPathFreeCompExpr(xtCxt->xpathcomp);
4608         if (xtCxt->xpathcxt != NULL)
4609                 xmlXPathFreeContext(xtCxt->xpathcxt);
4610         if (xtCxt->doc != NULL)
4611                 xmlFreeDoc(xtCxt->doc);
4612         if (xtCxt->ctxt != NULL)
4613                 xmlFreeParserCtxt(xtCxt->ctxt);
4614
4615         pg_xml_done(xtCxt->xmlerrcxt, true);
4616
4617         /* not valid anymore */
4618         xtCxt->magic = 0;
4619         state->opaque = NULL;
4620
4621 #else
4622         NO_XML_SUPPORT();
4623 #endif   /* not USE_LIBXML */
4624 }