]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/xml.c
Suppress compiler warning in non-USE_LIBXML builds.
[postgresql] / src / backend / utils / adt / xml.c
1 /*-------------------------------------------------------------------------
2  *
3  * xml.c
4  *        XML data type support.
5  *
6  *
7  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/backend/utils/adt/xml.c
11  *
12  *-------------------------------------------------------------------------
13  */
14
15 /*
16  * Generally, XML type support is only available when libxml use was
17  * configured during the build.  But even if that is not done, the
18  * type and all the functions are available, but most of them will
19  * fail.  For one thing, this avoids having to manage variant catalog
20  * installations.  But it also has nice effects such as that you can
21  * dump a database containing XML type data even if the server is not
22  * linked with libxml.  Thus, make sure xml_out() works even if nothing
23  * else does.
24  */
25
26 /*
27  * Notes on memory management:
28  *
29  * Sometimes libxml allocates global structures in the hope that it can reuse
30  * them later on.  This makes it impractical to change the xmlMemSetup
31  * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32  * allocated with malloc() or vice versa.  Since libxml might be used by
33  * loadable modules, eg libperl, our only safe choices are to change the
34  * functions at postmaster/backend launch or not at all.  Since we'd rather
35  * not activate libxml in sessions that might never use it, the latter choice
36  * is the preferred one.  However, for debugging purposes it can be awfully
37  * handy to constrain libxml's allocations to be done in a specific palloc
38  * context, where they're easy to track.  Therefore there is code here that
39  * can be enabled in debug builds to redirect libxml's allocations into a
40  * special context LibxmlContext.  It's not recommended to turn this on in
41  * a production build because of the possibility of bad interactions with
42  * external modules.
43  */
44 /* #define USE_LIBXMLCONTEXT */
45
46 #include "postgres.h"
47
48 #ifdef USE_LIBXML
49 #include <libxml/chvalid.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/tree.h>
53 #include <libxml/uri.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/xmlversion.h>
56 #include <libxml/xmlwriter.h>
57 #include <libxml/xpath.h>
58 #include <libxml/xpathInternals.h>
59
60 /*
61  * We used to check for xmlStructuredErrorContext via a configure test; but
62  * that doesn't work on Windows, so instead use this grottier method of
63  * testing the library version number.
64  */
65 #if LIBXML_VERSION >= 20704
66 #define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
67 #endif
68 #endif   /* USE_LIBXML */
69
70 #include "access/htup_details.h"
71 #include "catalog/namespace.h"
72 #include "catalog/pg_type.h"
73 #include "commands/dbcommands.h"
74 #include "executor/executor.h"
75 #include "executor/spi.h"
76 #include "executor/tablefunc.h"
77 #include "fmgr.h"
78 #include "lib/stringinfo.h"
79 #include "libpq/pqformat.h"
80 #include "mb/pg_wchar.h"
81 #include "miscadmin.h"
82 #include "nodes/execnodes.h"
83 #include "nodes/nodeFuncs.h"
84 #include "utils/array.h"
85 #include "utils/builtins.h"
86 #include "utils/date.h"
87 #include "utils/datetime.h"
88 #include "utils/lsyscache.h"
89 #include "utils/memutils.h"
90 #include "utils/rel.h"
91 #include "utils/syscache.h"
92 #include "utils/xml.h"
93
94
95 /* GUC variables */
96 int                     xmlbinary;
97 int                     xmloption;
98
99 #ifdef USE_LIBXML
100
101 /* random number to identify PgXmlErrorContext */
102 #define ERRCXT_MAGIC    68275028
103
104 struct PgXmlErrorContext
105 {
106         int                     magic;
107         /* strictness argument passed to pg_xml_init */
108         PgXmlStrictness strictness;
109         /* current error status and accumulated message, if any */
110         bool            err_occurred;
111         StringInfoData err_buf;
112         /* previous libxml error handling state (saved by pg_xml_init) */
113         xmlStructuredErrorFunc saved_errfunc;
114         void       *saved_errcxt;
115         /* previous libxml entity handler (saved by pg_xml_init) */
116         xmlExternalEntityLoader saved_entityfunc;
117 };
118
119 static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
120                                   xmlParserCtxtPtr ctxt);
121 static void xml_errorHandler(void *data, xmlErrorPtr error);
122 static void xml_ereport_by_code(int level, int sqlcode,
123                                         const char *msg, int errcode);
124 static void chopStringInfoNewlines(StringInfo str);
125 static void appendStringInfoLineSeparator(StringInfo str);
126
127 #ifdef USE_LIBXMLCONTEXT
128
129 static MemoryContext LibxmlContext = NULL;
130
131 static void xml_memory_init(void);
132 static void *xml_palloc(size_t size);
133 static void *xml_repalloc(void *ptr, size_t size);
134 static void xml_pfree(void *ptr);
135 static char *xml_pstrdup(const char *string);
136 #endif   /* USE_LIBXMLCONTEXT */
137
138 static xmlChar *xml_text2xmlChar(text *in);
139 static int parse_xml_decl(const xmlChar *str, size_t *lenp,
140                            xmlChar **version, xmlChar **encoding, int *standalone);
141 static bool print_xml_decl(StringInfo buf, const xmlChar *version,
142                            pg_enc encoding, int standalone);
143 static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
144                   bool preserve_whitespace, int encoding);
145 static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
146 static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
147                                            ArrayBuildState *astate,
148                                            PgXmlErrorContext *xmlerrcxt);
149 static xmlChar *pg_xmlCharStrndup(char *str, size_t len);
150 #endif   /* USE_LIBXML */
151
152 static StringInfo query_to_xml_internal(const char *query, char *tablename,
153                                           const char *xmlschema, bool nulls, bool tableforest,
154                                           const char *targetns, bool top_level);
155 static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
156                                                  bool nulls, bool tableforest, const char *targetns);
157 static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
158                                                                   List *relid_list, bool nulls,
159                                                                   bool tableforest, const char *targetns);
160 static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
161                                                                    bool nulls, bool tableforest,
162                                                                    const char *targetns);
163 static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
164 static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
165 static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
166 static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
167                                                   char *tablename, bool nulls, bool tableforest,
168                                                   const char *targetns, bool top_level);
169
170 /* XMLTABLE support */
171 #ifdef USE_LIBXML
172 /* random number to identify XmlTableContext */
173 #define XMLTABLE_CONTEXT_MAGIC  46922182
174 typedef struct XmlTableBuilderData
175 {
176         int                     magic;
177         int                     natts;
178         long int        row_count;
179         PgXmlErrorContext *xmlerrcxt;
180         xmlParserCtxtPtr ctxt;
181         xmlDocPtr       doc;
182         xmlXPathContextPtr xpathcxt;
183         xmlXPathCompExprPtr xpathcomp;
184         xmlXPathObjectPtr xpathobj;
185         xmlXPathCompExprPtr *xpathscomp;
186 } XmlTableBuilderData;
187 #endif
188
189 static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
190 static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
191 static void XmlTableSetNamespace(struct TableFuncScanState *state, char *name,
192                                          char *uri);
193 static void XmlTableSetRowFilter(struct TableFuncScanState *state, char *path);
194 static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
195                                                 char *path, int colnum);
196 static bool XmlTableFetchRow(struct TableFuncScanState *state);
197 static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
198                                  Oid typid, int32 typmod, bool *isnull);
199 static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
200
201 const TableFuncRoutine XmlTableRoutine =
202 {
203         XmlTableInitOpaque,
204         XmlTableSetDocument,
205         XmlTableSetNamespace,
206         XmlTableSetRowFilter,
207         XmlTableSetColumnFilter,
208         XmlTableFetchRow,
209         XmlTableGetValue,
210         XmlTableDestroyOpaque
211 };
212
213 #define NO_XML_SUPPORT() \
214         ereport(ERROR, \
215                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
216                          errmsg("unsupported XML feature"), \
217                          errdetail("This functionality requires the server to be built with libxml support."), \
218                          errhint("You need to rebuild PostgreSQL using --with-libxml.")))
219
220
221 /* from SQL/XML:2008 section 4.9 */
222 #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
223 #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
224 #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
225
226
227 #ifdef USE_LIBXML
228
229 static int
230 xmlChar_to_encoding(const xmlChar *encoding_name)
231 {
232         int                     encoding = pg_char_to_encoding((const char *) encoding_name);
233
234         if (encoding < 0)
235                 ereport(ERROR,
236                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
237                                  errmsg("invalid encoding name \"%s\"",
238                                                 (const char *) encoding_name)));
239         return encoding;
240 }
241 #endif
242
243
244 /*
245  * xml_in uses a plain C string to VARDATA conversion, so for the time being
246  * we use the conversion function for the text datatype.
247  *
248  * This is only acceptable so long as xmltype and text use the same
249  * representation.
250  */
251 Datum
252 xml_in(PG_FUNCTION_ARGS)
253 {
254 #ifdef USE_LIBXML
255         char       *s = PG_GETARG_CSTRING(0);
256         xmltype    *vardata;
257         xmlDocPtr       doc;
258
259         vardata = (xmltype *) cstring_to_text(s);
260
261         /*
262          * Parse the data to check if it is well-formed XML data.  Assume that
263          * ERROR occurred if parsing failed.
264          */
265         doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding());
266         xmlFreeDoc(doc);
267
268         PG_RETURN_XML_P(vardata);
269 #else
270         NO_XML_SUPPORT();
271         return 0;
272 #endif
273 }
274
275
276 #define PG_XML_DEFAULT_VERSION "1.0"
277
278
279 /*
280  * xml_out_internal uses a plain VARDATA to C string conversion, so for the
281  * time being we use the conversion function for the text datatype.
282  *
283  * This is only acceptable so long as xmltype and text use the same
284  * representation.
285  */
286 static char *
287 xml_out_internal(xmltype *x, pg_enc target_encoding)
288 {
289         char       *str = text_to_cstring((text *) x);
290
291 #ifdef USE_LIBXML
292         size_t          len = strlen(str);
293         xmlChar    *version;
294         int                     standalone;
295         int                     res_code;
296
297         if ((res_code = parse_xml_decl((xmlChar *) str,
298                                                                    &len, &version, NULL, &standalone)) == 0)
299         {
300                 StringInfoData buf;
301
302                 initStringInfo(&buf);
303
304                 if (!print_xml_decl(&buf, version, target_encoding, standalone))
305                 {
306                         /*
307                          * If we are not going to produce an XML declaration, eat a single
308                          * newline in the original string to prevent empty first lines in
309                          * the output.
310                          */
311                         if (*(str + len) == '\n')
312                                 len += 1;
313                 }
314                 appendStringInfoString(&buf, str + len);
315
316                 pfree(str);
317
318                 return buf.data;
319         }
320
321         xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
322                                                 "could not parse XML declaration in stored value",
323                                                 res_code);
324 #endif
325         return str;
326 }
327
328
329 Datum
330 xml_out(PG_FUNCTION_ARGS)
331 {
332         xmltype    *x = PG_GETARG_XML_P(0);
333
334         /*
335          * xml_out removes the encoding property in all cases.  This is because we
336          * cannot control from here whether the datum will be converted to a
337          * different client encoding, so we'd do more harm than good by including
338          * it.
339          */
340         PG_RETURN_CSTRING(xml_out_internal(x, 0));
341 }
342
343
344 Datum
345 xml_recv(PG_FUNCTION_ARGS)
346 {
347 #ifdef USE_LIBXML
348         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
349         xmltype    *result;
350         char       *str;
351         char       *newstr;
352         int                     nbytes;
353         xmlDocPtr       doc;
354         xmlChar    *encodingStr = NULL;
355         int                     encoding;
356
357         /*
358          * Read the data in raw format. We don't know yet what the encoding is, as
359          * that information is embedded in the xml declaration; so we have to
360          * parse that before converting to server encoding.
361          */
362         nbytes = buf->len - buf->cursor;
363         str = (char *) pq_getmsgbytes(buf, nbytes);
364
365         /*
366          * We need a null-terminated string to pass to parse_xml_decl().  Rather
367          * than make a separate copy, make the temporary result one byte bigger
368          * than it needs to be.
369          */
370         result = palloc(nbytes + 1 + VARHDRSZ);
371         SET_VARSIZE(result, nbytes + VARHDRSZ);
372         memcpy(VARDATA(result), str, nbytes);
373         str = VARDATA(result);
374         str[nbytes] = '\0';
375
376         parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
377
378         /*
379          * If encoding wasn't explicitly specified in the XML header, treat it as
380          * UTF-8, as that's the default in XML. This is different from xml_in(),
381          * where the input has to go through the normal client to server encoding
382          * conversion.
383          */
384         encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
385
386         /*
387          * Parse the data to check if it is well-formed XML data.  Assume that
388          * xml_parse will throw ERROR if not.
389          */
390         doc = xml_parse(result, xmloption, true, encoding);
391         xmlFreeDoc(doc);
392
393         /* Now that we know what we're dealing with, convert to server encoding */
394         newstr = pg_any_to_server(str, nbytes, encoding);
395
396         if (newstr != str)
397         {
398                 pfree(result);
399                 result = (xmltype *) cstring_to_text(newstr);
400                 pfree(newstr);
401         }
402
403         PG_RETURN_XML_P(result);
404 #else
405         NO_XML_SUPPORT();
406         return 0;
407 #endif
408 }
409
410
411 Datum
412 xml_send(PG_FUNCTION_ARGS)
413 {
414         xmltype    *x = PG_GETARG_XML_P(0);
415         char       *outval;
416         StringInfoData buf;
417
418         /*
419          * xml_out_internal doesn't convert the encoding, it just prints the right
420          * declaration. pq_sendtext will do the conversion.
421          */
422         outval = xml_out_internal(x, pg_get_client_encoding());
423
424         pq_begintypsend(&buf);
425         pq_sendtext(&buf, outval, strlen(outval));
426         pfree(outval);
427         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
428 }
429
430
431 #ifdef USE_LIBXML
432 static void
433 appendStringInfoText(StringInfo str, const text *t)
434 {
435         appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
436 }
437 #endif
438
439
440 static xmltype *
441 stringinfo_to_xmltype(StringInfo buf)
442 {
443         return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
444 }
445
446
447 static xmltype *
448 cstring_to_xmltype(const char *string)
449 {
450         return (xmltype *) cstring_to_text(string);
451 }
452
453
454 #ifdef USE_LIBXML
455 static xmltype *
456 xmlBuffer_to_xmltype(xmlBufferPtr buf)
457 {
458         return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
459                                                                                                 xmlBufferLength(buf));
460 }
461 #endif
462
463
464 Datum
465 xmlcomment(PG_FUNCTION_ARGS)
466 {
467 #ifdef USE_LIBXML
468         text       *arg = PG_GETARG_TEXT_P(0);
469         char       *argdata = VARDATA(arg);
470         int                     len = VARSIZE(arg) - VARHDRSZ;
471         StringInfoData buf;
472         int                     i;
473
474         /* check for "--" in string or "-" at the end */
475         for (i = 1; i < len; i++)
476         {
477                 if (argdata[i] == '-' && argdata[i - 1] == '-')
478                         ereport(ERROR,
479                                         (errcode(ERRCODE_INVALID_XML_COMMENT),
480                                          errmsg("invalid XML comment")));
481         }
482         if (len > 0 && argdata[len - 1] == '-')
483                 ereport(ERROR,
484                                 (errcode(ERRCODE_INVALID_XML_COMMENT),
485                                  errmsg("invalid XML comment")));
486
487         initStringInfo(&buf);
488         appendStringInfoString(&buf, "<!--");
489         appendStringInfoText(&buf, arg);
490         appendStringInfoString(&buf, "-->");
491
492         PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
493 #else
494         NO_XML_SUPPORT();
495         return 0;
496 #endif
497 }
498
499
500
501 /*
502  * TODO: xmlconcat needs to merge the notations and unparsed entities
503  * of the argument values.  Not very important in practice, though.
504  */
505 xmltype *
506 xmlconcat(List *args)
507 {
508 #ifdef USE_LIBXML
509         int                     global_standalone = 1;
510         xmlChar    *global_version = NULL;
511         bool            global_version_no_value = false;
512         StringInfoData buf;
513         ListCell   *v;
514
515         initStringInfo(&buf);
516         foreach(v, args)
517         {
518                 xmltype    *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
519                 size_t          len;
520                 xmlChar    *version;
521                 int                     standalone;
522                 char       *str;
523
524                 len = VARSIZE(x) - VARHDRSZ;
525                 str = text_to_cstring((text *) x);
526
527                 parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
528
529                 if (standalone == 0 && global_standalone == 1)
530                         global_standalone = 0;
531                 if (standalone < 0)
532                         global_standalone = -1;
533
534                 if (!version)
535                         global_version_no_value = true;
536                 else if (!global_version)
537                         global_version = version;
538                 else if (xmlStrcmp(version, global_version) != 0)
539                         global_version_no_value = true;
540
541                 appendStringInfoString(&buf, str + len);
542                 pfree(str);
543         }
544
545         if (!global_version_no_value || global_standalone >= 0)
546         {
547                 StringInfoData buf2;
548
549                 initStringInfo(&buf2);
550
551                 print_xml_decl(&buf2,
552                                            (!global_version_no_value) ? global_version : NULL,
553                                            0,
554                                            global_standalone);
555
556                 appendStringInfoString(&buf2, buf.data);
557                 buf = buf2;
558         }
559
560         return stringinfo_to_xmltype(&buf);
561 #else
562         NO_XML_SUPPORT();
563         return NULL;
564 #endif
565 }
566
567
568 /*
569  * XMLAGG support
570  */
571 Datum
572 xmlconcat2(PG_FUNCTION_ARGS)
573 {
574         if (PG_ARGISNULL(0))
575         {
576                 if (PG_ARGISNULL(1))
577                         PG_RETURN_NULL();
578                 else
579                         PG_RETURN_XML_P(PG_GETARG_XML_P(1));
580         }
581         else if (PG_ARGISNULL(1))
582                 PG_RETURN_XML_P(PG_GETARG_XML_P(0));
583         else
584                 PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
585                                                                                          PG_GETARG_XML_P(1))));
586 }
587
588
589 Datum
590 texttoxml(PG_FUNCTION_ARGS)
591 {
592         text       *data = PG_GETARG_TEXT_P(0);
593
594         PG_RETURN_XML_P(xmlparse(data, xmloption, true));
595 }
596
597
598 Datum
599 xmltotext(PG_FUNCTION_ARGS)
600 {
601         xmltype    *data = PG_GETARG_XML_P(0);
602
603         /* It's actually binary compatible. */
604         PG_RETURN_TEXT_P((text *) data);
605 }
606
607
608 text *
609 xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg)
610 {
611         if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data))
612                 ereport(ERROR,
613                                 (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
614                                  errmsg("not an XML document")));
615
616         /* It's actually binary compatible, save for the above check. */
617         return (text *) data;
618 }
619
620
621 xmltype *
622 xmlelement(XmlExprState *xmlExpr, ExprContext *econtext)
623 {
624 #ifdef USE_LIBXML
625         XmlExpr    *xexpr = (XmlExpr *) xmlExpr->xprstate.expr;
626         xmltype    *result;
627         List       *named_arg_strings;
628         List       *arg_strings;
629         int                     i;
630         ListCell   *arg;
631         ListCell   *narg;
632         PgXmlErrorContext *xmlerrcxt;
633         volatile xmlBufferPtr buf = NULL;
634         volatile xmlTextWriterPtr writer = NULL;
635
636         /*
637          * We first evaluate all the arguments, then start up libxml and create
638          * the result.  This avoids issues if one of the arguments involves a call
639          * to some other function or subsystem that wants to use libxml on its own
640          * terms.
641          */
642         named_arg_strings = NIL;
643         i = 0;
644         foreach(arg, xmlExpr->named_args)
645         {
646                 ExprState  *e = (ExprState *) lfirst(arg);
647                 Datum           value;
648                 bool            isnull;
649                 char       *str;
650
651                 value = ExecEvalExpr(e, econtext, &isnull);
652                 if (isnull)
653                         str = NULL;
654                 else
655                         str = map_sql_value_to_xml_value(value, exprType((Node *) e->expr), false);
656                 named_arg_strings = lappend(named_arg_strings, str);
657                 i++;
658         }
659
660         arg_strings = NIL;
661         foreach(arg, xmlExpr->args)
662         {
663                 ExprState  *e = (ExprState *) lfirst(arg);
664                 Datum           value;
665                 bool            isnull;
666                 char       *str;
667
668                 value = ExecEvalExpr(e, econtext, &isnull);
669                 /* here we can just forget NULL elements immediately */
670                 if (!isnull)
671                 {
672                         str = map_sql_value_to_xml_value(value,
673                                                                                    exprType((Node *) e->expr), true);
674                         arg_strings = lappend(arg_strings, str);
675                 }
676         }
677
678         /* now safe to run libxml */
679         xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
680
681         PG_TRY();
682         {
683                 buf = xmlBufferCreate();
684                 if (buf == NULL || xmlerrcxt->err_occurred)
685                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
686                                                 "could not allocate xmlBuffer");
687                 writer = xmlNewTextWriterMemory(buf, 0);
688                 if (writer == NULL || xmlerrcxt->err_occurred)
689                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
690                                                 "could not allocate xmlTextWriter");
691
692                 xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
693
694                 forboth(arg, named_arg_strings, narg, xexpr->arg_names)
695                 {
696                         char       *str = (char *) lfirst(arg);
697                         char       *argname = strVal(lfirst(narg));
698
699                         if (str)
700                                 xmlTextWriterWriteAttribute(writer,
701                                                                                         (xmlChar *) argname,
702                                                                                         (xmlChar *) str);
703                 }
704
705                 foreach(arg, arg_strings)
706                 {
707                         char       *str = (char *) lfirst(arg);
708
709                         xmlTextWriterWriteRaw(writer, (xmlChar *) str);
710                 }
711
712                 xmlTextWriterEndElement(writer);
713
714                 /* we MUST do this now to flush data out to the buffer ... */
715                 xmlFreeTextWriter(writer);
716                 writer = NULL;
717
718                 result = xmlBuffer_to_xmltype(buf);
719         }
720         PG_CATCH();
721         {
722                 if (writer)
723                         xmlFreeTextWriter(writer);
724                 if (buf)
725                         xmlBufferFree(buf);
726
727                 pg_xml_done(xmlerrcxt, true);
728
729                 PG_RE_THROW();
730         }
731         PG_END_TRY();
732
733         xmlBufferFree(buf);
734
735         pg_xml_done(xmlerrcxt, false);
736
737         return result;
738 #else
739         NO_XML_SUPPORT();
740         return NULL;
741 #endif
742 }
743
744
745 xmltype *
746 xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
747 {
748 #ifdef USE_LIBXML
749         xmlDocPtr       doc;
750
751         doc = xml_parse(data, xmloption_arg, preserve_whitespace,
752                                         GetDatabaseEncoding());
753         xmlFreeDoc(doc);
754
755         return (xmltype *) data;
756 #else
757         NO_XML_SUPPORT();
758         return NULL;
759 #endif
760 }
761
762
763 xmltype *
764 xmlpi(char *target, text *arg, bool arg_is_null, bool *result_is_null)
765 {
766 #ifdef USE_LIBXML
767         xmltype    *result;
768         StringInfoData buf;
769
770         if (pg_strcasecmp(target, "xml") == 0)
771                 ereport(ERROR,
772                                 (errcode(ERRCODE_SYNTAX_ERROR), /* really */
773                                  errmsg("invalid XML processing instruction"),
774                                  errdetail("XML processing instruction target name cannot be \"%s\".", target)));
775
776         /*
777          * Following the SQL standard, the null check comes after the syntax check
778          * above.
779          */
780         *result_is_null = arg_is_null;
781         if (*result_is_null)
782                 return NULL;
783
784         initStringInfo(&buf);
785
786         appendStringInfo(&buf, "<?%s", target);
787
788         if (arg != NULL)
789         {
790                 char       *string;
791
792                 string = text_to_cstring(arg);
793                 if (strstr(string, "?>") != NULL)
794                         ereport(ERROR,
795                                         (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
796                                          errmsg("invalid XML processing instruction"),
797                         errdetail("XML processing instruction cannot contain \"?>\".")));
798
799                 appendStringInfoChar(&buf, ' ');
800                 appendStringInfoString(&buf, string + strspn(string, " "));
801                 pfree(string);
802         }
803         appendStringInfoString(&buf, "?>");
804
805         result = stringinfo_to_xmltype(&buf);
806         pfree(buf.data);
807         return result;
808 #else
809         NO_XML_SUPPORT();
810         return NULL;
811 #endif
812 }
813
814
815 xmltype *
816 xmlroot(xmltype *data, text *version, int standalone)
817 {
818 #ifdef USE_LIBXML
819         char       *str;
820         size_t          len;
821         xmlChar    *orig_version;
822         int                     orig_standalone;
823         StringInfoData buf;
824
825         len = VARSIZE(data) - VARHDRSZ;
826         str = text_to_cstring((text *) data);
827
828         parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
829
830         if (version)
831                 orig_version = xml_text2xmlChar(version);
832         else
833                 orig_version = NULL;
834
835         switch (standalone)
836         {
837                 case XML_STANDALONE_YES:
838                         orig_standalone = 1;
839                         break;
840                 case XML_STANDALONE_NO:
841                         orig_standalone = 0;
842                         break;
843                 case XML_STANDALONE_NO_VALUE:
844                         orig_standalone = -1;
845                         break;
846                 case XML_STANDALONE_OMITTED:
847                         /* leave original value */
848                         break;
849         }
850
851         initStringInfo(&buf);
852         print_xml_decl(&buf, orig_version, 0, orig_standalone);
853         appendStringInfoString(&buf, str + len);
854
855         return stringinfo_to_xmltype(&buf);
856 #else
857         NO_XML_SUPPORT();
858         return NULL;
859 #endif
860 }
861
862
863 /*
864  * Validate document (given as string) against DTD (given as external link)
865  *
866  * This has been removed because it is a security hole: unprivileged users
867  * should not be able to use Postgres to fetch arbitrary external files,
868  * which unfortunately is exactly what libxml is willing to do with the DTD
869  * parameter.
870  */
871 Datum
872 xmlvalidate(PG_FUNCTION_ARGS)
873 {
874         ereport(ERROR,
875                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
876                          errmsg("xmlvalidate is not implemented")));
877         return 0;
878 }
879
880
881 bool
882 xml_is_document(xmltype *arg)
883 {
884 #ifdef USE_LIBXML
885         bool            result;
886         volatile xmlDocPtr doc = NULL;
887         MemoryContext ccxt = CurrentMemoryContext;
888
889         /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
890         PG_TRY();
891         {
892                 doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
893                                                 GetDatabaseEncoding());
894                 result = true;
895         }
896         PG_CATCH();
897         {
898                 ErrorData  *errdata;
899                 MemoryContext ecxt;
900
901                 ecxt = MemoryContextSwitchTo(ccxt);
902                 errdata = CopyErrorData();
903                 if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT)
904                 {
905                         FlushErrorState();
906                         result = false;
907                 }
908                 else
909                 {
910                         MemoryContextSwitchTo(ecxt);
911                         PG_RE_THROW();
912                 }
913         }
914         PG_END_TRY();
915
916         if (doc)
917                 xmlFreeDoc(doc);
918
919         return result;
920 #else                                                   /* not USE_LIBXML */
921         NO_XML_SUPPORT();
922         return false;
923 #endif   /* not USE_LIBXML */
924 }
925
926
927 #ifdef USE_LIBXML
928
929 /*
930  * pg_xml_init_library --- set up for use of libxml
931  *
932  * This should be called by each function that is about to use libxml
933  * facilities but doesn't require error handling.  It initializes libxml
934  * and verifies compatibility with the loaded libxml version.  These are
935  * once-per-session activities.
936  *
937  * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
938  * check)
939  */
940 void
941 pg_xml_init_library(void)
942 {
943         static bool first_time = true;
944
945         if (first_time)
946         {
947                 /* Stuff we need do only once per session */
948
949                 /*
950                  * Currently, we have no pure UTF-8 support for internals -- check if
951                  * we can work.
952                  */
953                 if (sizeof(char) != sizeof(xmlChar))
954                         ereport(ERROR,
955                                         (errmsg("could not initialize XML library"),
956                                          errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
957                                                            (int) sizeof(char), (int) sizeof(xmlChar))));
958
959 #ifdef USE_LIBXMLCONTEXT
960                 /* Set up libxml's memory allocation our way */
961                 xml_memory_init();
962 #endif
963
964                 /* Check library compatibility */
965                 LIBXML_TEST_VERSION;
966
967                 first_time = false;
968         }
969 }
970
971 /*
972  * pg_xml_init --- set up for use of libxml and register an error handler
973  *
974  * This should be called by each function that is about to use libxml
975  * facilities and requires error handling.  It initializes libxml with
976  * pg_xml_init_library() and establishes our libxml error handler.
977  *
978  * strictness determines which errors are reported and which are ignored.
979  *
980  * Calls to this function MUST be followed by a PG_TRY block that guarantees
981  * that pg_xml_done() is called during either normal or error exit.
982  *
983  * This is exported for use by contrib/xml2, as well as other code that might
984  * wish to share use of this module's libxml error handler.
985  */
986 PgXmlErrorContext *
987 pg_xml_init(PgXmlStrictness strictness)
988 {
989         PgXmlErrorContext *errcxt;
990         void       *new_errcxt;
991
992         /* Do one-time setup if needed */
993         pg_xml_init_library();
994
995         /* Create error handling context structure */
996         errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
997         errcxt->magic = ERRCXT_MAGIC;
998         errcxt->strictness = strictness;
999         errcxt->err_occurred = false;
1000         initStringInfo(&errcxt->err_buf);
1001
1002         /*
1003          * Save original error handler and install ours. libxml originally didn't
1004          * distinguish between the contexts for generic and for structured error
1005          * handlers.  If we're using an old libxml version, we must thus save the
1006          * generic error context, even though we're using a structured error
1007          * handler.
1008          */
1009         errcxt->saved_errfunc = xmlStructuredError;
1010
1011 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1012         errcxt->saved_errcxt = xmlStructuredErrorContext;
1013 #else
1014         errcxt->saved_errcxt = xmlGenericErrorContext;
1015 #endif
1016
1017         xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler);
1018
1019         /*
1020          * Verify that xmlSetStructuredErrorFunc set the context variable we
1021          * expected it to.  If not, the error context pointer we just saved is not
1022          * the correct thing to restore, and since that leaves us without a way to
1023          * restore the context in pg_xml_done, we must fail.
1024          *
1025          * The only known situation in which this test fails is if we compile with
1026          * headers from a libxml2 that doesn't track the structured error context
1027          * separately (< 2.7.4), but at runtime use a version that does, or vice
1028          * versa.  The libxml2 authors did not treat that change as constituting
1029          * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1030          * fails to protect us from this.
1031          */
1032
1033 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1034         new_errcxt = xmlStructuredErrorContext;
1035 #else
1036         new_errcxt = xmlGenericErrorContext;
1037 #endif
1038
1039         if (new_errcxt != (void *) errcxt)
1040                 ereport(ERROR,
1041                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1042                                  errmsg("could not set up XML error handler"),
1043                                  errhint("This probably indicates that the version of libxml2"
1044                                                  " being used is not compatible with the libxml2"
1045                                                  " header files that PostgreSQL was built with.")));
1046
1047         /*
1048          * Also, install an entity loader to prevent unwanted fetches of external
1049          * files and URLs.
1050          */
1051         errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1052         xmlSetExternalEntityLoader(xmlPgEntityLoader);
1053
1054         return errcxt;
1055 }
1056
1057
1058 /*
1059  * pg_xml_done --- restore previous libxml error handling
1060  *
1061  * Resets libxml's global error-handling state to what it was before
1062  * pg_xml_init() was called.
1063  *
1064  * This routine verifies that all pending errors have been dealt with
1065  * (in assert-enabled builds, anyway).
1066  */
1067 void
1068 pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1069 {
1070         void       *cur_errcxt;
1071
1072         /* An assert seems like enough protection here */
1073         Assert(errcxt->magic == ERRCXT_MAGIC);
1074
1075         /*
1076          * In a normal exit, there should be no un-handled libxml errors.  But we
1077          * shouldn't try to enforce this during error recovery, since the longjmp
1078          * could have been thrown before xml_ereport had a chance to run.
1079          */
1080         Assert(!errcxt->err_occurred || isError);
1081
1082         /*
1083          * Check that libxml's global state is correct, warn if not.  This is a
1084          * real test and not an Assert because it has a higher probability of
1085          * happening.
1086          */
1087 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1088         cur_errcxt = xmlStructuredErrorContext;
1089 #else
1090         cur_errcxt = xmlGenericErrorContext;
1091 #endif
1092
1093         if (cur_errcxt != (void *) errcxt)
1094                 elog(WARNING, "libxml error handling state is out of sync with xml.c");
1095
1096         /* Restore the saved handlers */
1097         xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1098         xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1099
1100         /*
1101          * Mark the struct as invalid, just in case somebody somehow manages to
1102          * call xml_errorHandler or xml_ereport with it.
1103          */
1104         errcxt->magic = 0;
1105
1106         /* Release memory */
1107         pfree(errcxt->err_buf.data);
1108         pfree(errcxt);
1109 }
1110
1111
1112 /*
1113  * pg_xml_error_occurred() --- test the error flag
1114  */
1115 bool
1116 pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1117 {
1118         return errcxt->err_occurred;
1119 }
1120
1121
1122 /*
1123  * SQL/XML allows storing "XML documents" or "XML content".  "XML
1124  * documents" are specified by the XML specification and are parsed
1125  * easily by libxml.  "XML content" is specified by SQL/XML as the
1126  * production "XMLDecl? content".  But libxml can only parse the
1127  * "content" part, so we have to parse the XML declaration ourselves
1128  * to complete this.
1129  */
1130
1131 #define CHECK_XML_SPACE(p) \
1132         do { \
1133                 if (!xmlIsBlank_ch(*(p))) \
1134                         return XML_ERR_SPACE_REQUIRED; \
1135         } while (0)
1136
1137 #define SKIP_XML_SPACE(p) \
1138         while (xmlIsBlank_ch(*(p))) (p)++
1139
1140 /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1141 /* Beware of multiple evaluations of argument! */
1142 #define PG_XMLISNAMECHAR(c) \
1143         (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1144                         || xmlIsDigit_ch(c) \
1145                         || c == '.' || c == '-' || c == '_' || c == ':' \
1146                         || xmlIsCombiningQ(c) \
1147                         || xmlIsExtender_ch(c))
1148
1149 /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1150 static xmlChar *
1151 xml_pnstrdup(const xmlChar *str, size_t len)
1152 {
1153         xmlChar    *result;
1154
1155         result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1156         memcpy(result, str, len * sizeof(xmlChar));
1157         result[len] = 0;
1158         return result;
1159 }
1160
1161 /* Ditto, except input is char* */
1162 static xmlChar *
1163 pg_xmlCharStrndup(char *str, size_t len)
1164 {
1165         xmlChar    *result;
1166
1167         result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1168         memcpy(result, str, len);
1169         result[len] = '\0';
1170
1171         return result;
1172 }
1173
1174 /*
1175  * str is the null-terminated input string.  Remaining arguments are
1176  * output arguments; each can be NULL if value is not wanted.
1177  * version and encoding are returned as locally-palloc'd strings.
1178  * Result is 0 if OK, an error code if not.
1179  */
1180 static int
1181 parse_xml_decl(const xmlChar *str, size_t *lenp,
1182                            xmlChar **version, xmlChar **encoding, int *standalone)
1183 {
1184         const xmlChar *p;
1185         const xmlChar *save_p;
1186         size_t          len;
1187         int                     utf8char;
1188         int                     utf8len;
1189
1190         /*
1191          * Only initialize libxml.  We don't need error handling here, but we do
1192          * need to make sure libxml is initialized before calling any of its
1193          * functions.  Note that this is safe (and a no-op) if caller has already
1194          * done pg_xml_init().
1195          */
1196         pg_xml_init_library();
1197
1198         /* Initialize output arguments to "not present" */
1199         if (version)
1200                 *version = NULL;
1201         if (encoding)
1202                 *encoding = NULL;
1203         if (standalone)
1204                 *standalone = -1;
1205
1206         p = str;
1207
1208         if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1209                 goto finished;
1210
1211         /* if next char is name char, it's a PI like <?xml-stylesheet ...?> */
1212         utf8len = strlen((const char *) (p + 5));
1213         utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1214         if (PG_XMLISNAMECHAR(utf8char))
1215                 goto finished;
1216
1217         p += 5;
1218
1219         /* version */
1220         CHECK_XML_SPACE(p);
1221         SKIP_XML_SPACE(p);
1222         if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1223                 return XML_ERR_VERSION_MISSING;
1224         p += 7;
1225         SKIP_XML_SPACE(p);
1226         if (*p != '=')
1227                 return XML_ERR_VERSION_MISSING;
1228         p += 1;
1229         SKIP_XML_SPACE(p);
1230
1231         if (*p == '\'' || *p == '"')
1232         {
1233                 const xmlChar *q;
1234
1235                 q = xmlStrchr(p + 1, *p);
1236                 if (!q)
1237                         return XML_ERR_VERSION_MISSING;
1238
1239                 if (version)
1240                         *version = xml_pnstrdup(p + 1, q - p - 1);
1241                 p = q + 1;
1242         }
1243         else
1244                 return XML_ERR_VERSION_MISSING;
1245
1246         /* encoding */
1247         save_p = p;
1248         SKIP_XML_SPACE(p);
1249         if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1250         {
1251                 CHECK_XML_SPACE(save_p);
1252                 p += 8;
1253                 SKIP_XML_SPACE(p);
1254                 if (*p != '=')
1255                         return XML_ERR_MISSING_ENCODING;
1256                 p += 1;
1257                 SKIP_XML_SPACE(p);
1258
1259                 if (*p == '\'' || *p == '"')
1260                 {
1261                         const xmlChar *q;
1262
1263                         q = xmlStrchr(p + 1, *p);
1264                         if (!q)
1265                                 return XML_ERR_MISSING_ENCODING;
1266
1267                         if (encoding)
1268                                 *encoding = xml_pnstrdup(p + 1, q - p - 1);
1269                         p = q + 1;
1270                 }
1271                 else
1272                         return XML_ERR_MISSING_ENCODING;
1273         }
1274         else
1275         {
1276                 p = save_p;
1277         }
1278
1279         /* standalone */
1280         save_p = p;
1281         SKIP_XML_SPACE(p);
1282         if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1283         {
1284                 CHECK_XML_SPACE(save_p);
1285                 p += 10;
1286                 SKIP_XML_SPACE(p);
1287                 if (*p != '=')
1288                         return XML_ERR_STANDALONE_VALUE;
1289                 p += 1;
1290                 SKIP_XML_SPACE(p);
1291                 if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1292                         xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1293                 {
1294                         if (standalone)
1295                                 *standalone = 1;
1296                         p += 5;
1297                 }
1298                 else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1299                                  xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1300                 {
1301                         if (standalone)
1302                                 *standalone = 0;
1303                         p += 4;
1304                 }
1305                 else
1306                         return XML_ERR_STANDALONE_VALUE;
1307         }
1308         else
1309         {
1310                 p = save_p;
1311         }
1312
1313         SKIP_XML_SPACE(p);
1314         if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1315                 return XML_ERR_XMLDECL_NOT_FINISHED;
1316         p += 2;
1317
1318 finished:
1319         len = p - str;
1320
1321         for (p = str; p < str + len; p++)
1322                 if (*p > 127)
1323                         return XML_ERR_INVALID_CHAR;
1324
1325         if (lenp)
1326                 *lenp = len;
1327
1328         return XML_ERR_OK;
1329 }
1330
1331
1332 /*
1333  * Write an XML declaration.  On output, we adjust the XML declaration
1334  * as follows.  (These rules are the moral equivalent of the clause
1335  * "Serialization of an XML value" in the SQL standard.)
1336  *
1337  * We try to avoid generating an XML declaration if possible.  This is
1338  * so that you don't get trivial things like xml '<foo/>' resulting in
1339  * '<?xml version="1.0"?><foo/>', which would surely be annoying.  We
1340  * must provide a declaration if the standalone property is specified
1341  * or if we include an encoding declaration.  If we have a
1342  * declaration, we must specify a version (XML requires this).
1343  * Otherwise we only make a declaration if the version is not "1.0",
1344  * which is the default version specified in SQL:2003.
1345  */
1346 static bool
1347 print_xml_decl(StringInfo buf, const xmlChar *version,
1348                            pg_enc encoding, int standalone)
1349 {
1350         if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1351                 || (encoding && encoding != PG_UTF8)
1352                 || standalone != -1)
1353         {
1354                 appendStringInfoString(buf, "<?xml");
1355
1356                 if (version)
1357                         appendStringInfo(buf, " version=\"%s\"", version);
1358                 else
1359                         appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1360
1361                 if (encoding && encoding != PG_UTF8)
1362                 {
1363                         /*
1364                          * XXX might be useful to convert this to IANA names (ISO-8859-1
1365                          * instead of LATIN1 etc.); needs field experience
1366                          */
1367                         appendStringInfo(buf, " encoding=\"%s\"",
1368                                                          pg_encoding_to_char(encoding));
1369                 }
1370
1371                 if (standalone == 1)
1372                         appendStringInfoString(buf, " standalone=\"yes\"");
1373                 else if (standalone == 0)
1374                         appendStringInfoString(buf, " standalone=\"no\"");
1375                 appendStringInfoString(buf, "?>");
1376
1377                 return true;
1378         }
1379         else
1380                 return false;
1381 }
1382
1383
1384 /*
1385  * Convert a C string to XML internal representation
1386  *
1387  * Note: it is caller's responsibility to xmlFreeDoc() the result,
1388  * else a permanent memory leak will ensue!
1389  *
1390  * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1391  * yet do not use SAX - see xmlreader.c)
1392  */
1393 static xmlDocPtr
1394 xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1395                   int encoding)
1396 {
1397         int32           len;
1398         xmlChar    *string;
1399         xmlChar    *utf8string;
1400         PgXmlErrorContext *xmlerrcxt;
1401         volatile xmlParserCtxtPtr ctxt = NULL;
1402         volatile xmlDocPtr doc = NULL;
1403
1404         len = VARSIZE(data) - VARHDRSZ;         /* will be useful later */
1405         string = xml_text2xmlChar(data);
1406
1407         utf8string = pg_do_encoding_conversion(string,
1408                                                                                    len,
1409                                                                                    encoding,
1410                                                                                    PG_UTF8);
1411
1412         /* Start up libxml and its parser */
1413         xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1414
1415         /* Use a TRY block to ensure we clean up correctly */
1416         PG_TRY();
1417         {
1418                 xmlInitParser();
1419
1420                 ctxt = xmlNewParserCtxt();
1421                 if (ctxt == NULL || xmlerrcxt->err_occurred)
1422                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1423                                                 "could not allocate parser context");
1424
1425                 if (xmloption_arg == XMLOPTION_DOCUMENT)
1426                 {
1427                         /*
1428                          * Note, that here we try to apply DTD defaults
1429                          * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
1430                          * 'Default values defined by internal DTD are applied'. As for
1431                          * external DTDs, we try to support them too, (see SQL/XML:2008 GR
1432                          * 10.16.7.e)
1433                          */
1434                         doc = xmlCtxtReadDoc(ctxt, utf8string,
1435                                                                  NULL,
1436                                                                  "UTF-8",
1437                                                                  XML_PARSE_NOENT | XML_PARSE_DTDATTR
1438                                                    | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
1439                         if (doc == NULL || xmlerrcxt->err_occurred)
1440                                 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
1441                                                         "invalid XML document");
1442                 }
1443                 else
1444                 {
1445                         int                     res_code;
1446                         size_t          count;
1447                         xmlChar    *version;
1448                         int                     standalone;
1449
1450                         res_code = parse_xml_decl(utf8string,
1451                                                                           &count, &version, NULL, &standalone);
1452                         if (res_code != 0)
1453                                 xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
1454                                                           "invalid XML content: invalid XML declaration",
1455                                                                         res_code);
1456
1457                         doc = xmlNewDoc(version);
1458                         Assert(doc->encoding == NULL);
1459                         doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1460                         doc->standalone = standalone;
1461
1462                         /* allow empty content */
1463                         if (*(utf8string + count))
1464                         {
1465                                 res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1466                                                                                                    utf8string + count, NULL);
1467                                 if (res_code != 0 || xmlerrcxt->err_occurred)
1468                                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1469                                                                 "invalid XML content");
1470                         }
1471                 }
1472         }
1473         PG_CATCH();
1474         {
1475                 if (doc != NULL)
1476                         xmlFreeDoc(doc);
1477                 if (ctxt != NULL)
1478                         xmlFreeParserCtxt(ctxt);
1479
1480                 pg_xml_done(xmlerrcxt, true);
1481
1482                 PG_RE_THROW();
1483         }
1484         PG_END_TRY();
1485
1486         xmlFreeParserCtxt(ctxt);
1487
1488         pg_xml_done(xmlerrcxt, false);
1489
1490         return doc;
1491 }
1492
1493
1494 /*
1495  * xmlChar<->text conversions
1496  */
1497 static xmlChar *
1498 xml_text2xmlChar(text *in)
1499 {
1500         return (xmlChar *) text_to_cstring(in);
1501 }
1502
1503
1504 #ifdef USE_LIBXMLCONTEXT
1505
1506 /*
1507  * Manage the special context used for all libxml allocations (but only
1508  * in special debug builds; see notes at top of file)
1509  */
1510 static void
1511 xml_memory_init(void)
1512 {
1513         /* Create memory context if not there already */
1514         if (LibxmlContext == NULL)
1515                 LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1516                                                                                           "Libxml context",
1517                                                                                           ALLOCSET_DEFAULT_SIZES);
1518
1519         /* Re-establish the callbacks even if already set */
1520         xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1521 }
1522
1523 /*
1524  * Wrappers for memory management functions
1525  */
1526 static void *
1527 xml_palloc(size_t size)
1528 {
1529         return MemoryContextAlloc(LibxmlContext, size);
1530 }
1531
1532
1533 static void *
1534 xml_repalloc(void *ptr, size_t size)
1535 {
1536         return repalloc(ptr, size);
1537 }
1538
1539
1540 static void
1541 xml_pfree(void *ptr)
1542 {
1543         /* At least some parts of libxml assume xmlFree(NULL) is allowed */
1544         if (ptr)
1545                 pfree(ptr);
1546 }
1547
1548
1549 static char *
1550 xml_pstrdup(const char *string)
1551 {
1552         return MemoryContextStrdup(LibxmlContext, string);
1553 }
1554 #endif   /* USE_LIBXMLCONTEXT */
1555
1556
1557 /*
1558  * xmlPgEntityLoader --- entity loader callback function
1559  *
1560  * Silently prevent any external entity URL from being loaded.  We don't want
1561  * to throw an error, so instead make the entity appear to expand to an empty
1562  * string.
1563  *
1564  * We would prefer to allow loading entities that exist in the system's
1565  * global XML catalog; but the available libxml2 APIs make that a complex
1566  * and fragile task.  For now, just shut down all external access.
1567  */
1568 static xmlParserInputPtr
1569 xmlPgEntityLoader(const char *URL, const char *ID,
1570                                   xmlParserCtxtPtr ctxt)
1571 {
1572         return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
1573 }
1574
1575
1576 /*
1577  * xml_ereport --- report an XML-related error
1578  *
1579  * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
1580  * standard.  This function adds libxml's native error message, if any, as
1581  * detail.
1582  *
1583  * This is exported for modules that want to share the core libxml error
1584  * handler.  Note that pg_xml_init() *must* have been called previously.
1585  */
1586 void
1587 xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
1588 {
1589         char       *detail;
1590
1591         /* Defend against someone passing us a bogus context struct */
1592         if (errcxt->magic != ERRCXT_MAGIC)
1593                 elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
1594
1595         /* Flag that the current libxml error has been reported */
1596         errcxt->err_occurred = false;
1597
1598         /* Include detail only if we have some text from libxml */
1599         if (errcxt->err_buf.len > 0)
1600                 detail = errcxt->err_buf.data;
1601         else
1602                 detail = NULL;
1603
1604         ereport(level,
1605                         (errcode(sqlcode),
1606                          errmsg_internal("%s", msg),
1607                          detail ? errdetail_internal("%s", detail) : 0));
1608 }
1609
1610
1611 /*
1612  * Error handler for libxml errors and warnings
1613  */
1614 static void
1615 xml_errorHandler(void *data, xmlErrorPtr error)
1616 {
1617         PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
1618         xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
1619         xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
1620         xmlNodePtr      node = error->node;
1621         const xmlChar *name = (node != NULL &&
1622                                                  node->type == XML_ELEMENT_NODE) ? node->name : NULL;
1623         int                     domain = error->domain;
1624         int                     level = error->level;
1625         StringInfo      errorBuf;
1626
1627         /*
1628          * Defend against someone passing us a bogus context struct.
1629          *
1630          * We force a backend exit if this check fails because longjmp'ing out of
1631          * libxml would likely render it unsafe to use further.
1632          */
1633         if (xmlerrcxt->magic != ERRCXT_MAGIC)
1634                 elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
1635
1636         /*----------
1637          * Older libxml versions report some errors differently.
1638          * First, some errors were previously reported as coming from the parser
1639          * domain but are now reported as coming from the namespace domain.
1640          * Second, some warnings were upgraded to errors.
1641          * We attempt to compensate for that here.
1642          *----------
1643          */
1644         switch (error->code)
1645         {
1646                 case XML_WAR_NS_URI:
1647                         level = XML_ERR_ERROR;
1648                         domain = XML_FROM_NAMESPACE;
1649                         break;
1650
1651                 case XML_ERR_NS_DECL_ERROR:
1652                 case XML_WAR_NS_URI_RELATIVE:
1653                 case XML_WAR_NS_COLUMN:
1654                 case XML_NS_ERR_XML_NAMESPACE:
1655                 case XML_NS_ERR_UNDEFINED_NAMESPACE:
1656                 case XML_NS_ERR_QNAME:
1657                 case XML_NS_ERR_ATTRIBUTE_REDEFINED:
1658                 case XML_NS_ERR_EMPTY:
1659                         domain = XML_FROM_NAMESPACE;
1660                         break;
1661         }
1662
1663         /* Decide whether to act on the error or not */
1664         switch (domain)
1665         {
1666                 case XML_FROM_PARSER:
1667                 case XML_FROM_NONE:
1668                 case XML_FROM_MEMORY:
1669                 case XML_FROM_IO:
1670
1671                         /*
1672                          * Suppress warnings about undeclared entities.  We need to do
1673                          * this to avoid problems due to not loading DTD definitions.
1674                          */
1675                         if (error->code == XML_WAR_UNDECLARED_ENTITY)
1676                                 return;
1677
1678                         /* Otherwise, accept error regardless of the parsing purpose */
1679                         break;
1680
1681                 default:
1682                         /* Ignore error if only doing well-formedness check */
1683                         if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
1684                                 return;
1685                         break;
1686         }
1687
1688         /* Prepare error message in errorBuf */
1689         errorBuf = makeStringInfo();
1690
1691         if (error->line > 0)
1692                 appendStringInfo(errorBuf, "line %d: ", error->line);
1693         if (name != NULL)
1694                 appendStringInfo(errorBuf, "element %s: ", name);
1695         appendStringInfoString(errorBuf, error->message);
1696
1697         /*
1698          * Append context information to errorBuf.
1699          *
1700          * xmlParserPrintFileContext() uses libxml's "generic" error handler to
1701          * write the context.  Since we don't want to duplicate libxml
1702          * functionality here, we set up a generic error handler temporarily.
1703          *
1704          * We use appendStringInfo() directly as libxml's generic error handler.
1705          * This should work because it has essentially the same signature as
1706          * libxml expects, namely (void *ptr, const char *msg, ...).
1707          */
1708         if (input != NULL)
1709         {
1710                 xmlGenericErrorFunc errFuncSaved = xmlGenericError;
1711                 void       *errCtxSaved = xmlGenericErrorContext;
1712
1713                 xmlSetGenericErrorFunc((void *) errorBuf,
1714                                                            (xmlGenericErrorFunc) appendStringInfo);
1715
1716                 /* Add context information to errorBuf */
1717                 appendStringInfoLineSeparator(errorBuf);
1718
1719                 xmlParserPrintFileContext(input);
1720
1721                 /* Restore generic error func */
1722                 xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
1723         }
1724
1725         /* Get rid of any trailing newlines in errorBuf */
1726         chopStringInfoNewlines(errorBuf);
1727
1728         /*
1729          * Legacy error handling mode.  err_occurred is never set, we just add the
1730          * message to err_buf.  This mode exists because the xml2 contrib module
1731          * uses our error-handling infrastructure, but we don't want to change its
1732          * behaviour since it's deprecated anyway.  This is also why we don't
1733          * distinguish between notices, warnings and errors here --- the old-style
1734          * generic error handler wouldn't have done that either.
1735          */
1736         if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
1737         {
1738                 appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1739                 appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
1740
1741                 pfree(errorBuf->data);
1742                 pfree(errorBuf);
1743                 return;
1744         }
1745
1746         /*
1747          * We don't want to ereport() here because that'd probably leave libxml in
1748          * an inconsistent state.  Instead, we remember the error and ereport()
1749          * from xml_ereport().
1750          *
1751          * Warnings and notices can be reported immediately since they won't cause
1752          * a longjmp() out of libxml.
1753          */
1754         if (level >= XML_ERR_ERROR)
1755         {
1756                 appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1757                 appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
1758
1759                 xmlerrcxt->err_occurred = true;
1760         }
1761         else if (level >= XML_ERR_WARNING)
1762         {
1763                 ereport(WARNING,
1764                                 (errmsg_internal("%s", errorBuf->data)));
1765         }
1766         else
1767         {
1768                 ereport(NOTICE,
1769                                 (errmsg_internal("%s", errorBuf->data)));
1770         }
1771
1772         pfree(errorBuf->data);
1773         pfree(errorBuf);
1774 }
1775
1776
1777 /*
1778  * Wrapper for "ereport" function for XML-related errors.  The "msg"
1779  * is the SQL-level message; some can be adopted from the SQL/XML
1780  * standard.  This function uses "code" to create a textual detail
1781  * message.  At the moment, we only need to cover those codes that we
1782  * may raise in this file.
1783  */
1784 static void
1785 xml_ereport_by_code(int level, int sqlcode,
1786                                         const char *msg, int code)
1787 {
1788         const char *det;
1789
1790         switch (code)
1791         {
1792                 case XML_ERR_INVALID_CHAR:
1793                         det = gettext_noop("Invalid character value.");
1794                         break;
1795                 case XML_ERR_SPACE_REQUIRED:
1796                         det = gettext_noop("Space required.");
1797                         break;
1798                 case XML_ERR_STANDALONE_VALUE:
1799                         det = gettext_noop("standalone accepts only 'yes' or 'no'.");
1800                         break;
1801                 case XML_ERR_VERSION_MISSING:
1802                         det = gettext_noop("Malformed declaration: missing version.");
1803                         break;
1804                 case XML_ERR_MISSING_ENCODING:
1805                         det = gettext_noop("Missing encoding in text declaration.");
1806                         break;
1807                 case XML_ERR_XMLDECL_NOT_FINISHED:
1808                         det = gettext_noop("Parsing XML declaration: '?>' expected.");
1809                         break;
1810                 default:
1811                         det = gettext_noop("Unrecognized libxml error code: %d.");
1812                         break;
1813         }
1814
1815         ereport(level,
1816                         (errcode(sqlcode),
1817                          errmsg_internal("%s", msg),
1818                          errdetail(det, code)));
1819 }
1820
1821
1822 /*
1823  * Remove all trailing newlines from a StringInfo string
1824  */
1825 static void
1826 chopStringInfoNewlines(StringInfo str)
1827 {
1828         while (str->len > 0 && str->data[str->len - 1] == '\n')
1829                 str->data[--str->len] = '\0';
1830 }
1831
1832
1833 /*
1834  * Append a newline after removing any existing trailing newlines
1835  */
1836 static void
1837 appendStringInfoLineSeparator(StringInfo str)
1838 {
1839         chopStringInfoNewlines(str);
1840         if (str->len > 0)
1841                 appendStringInfoChar(str, '\n');
1842 }
1843
1844
1845 /*
1846  * Convert one char in the current server encoding to a Unicode codepoint.
1847  */
1848 static pg_wchar
1849 sqlchar_to_unicode(char *s)
1850 {
1851         char       *utf8string;
1852         pg_wchar        ret[2];                 /* need space for trailing zero */
1853
1854         /* note we're not assuming s is null-terminated */
1855         utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
1856
1857         pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
1858                                                                   pg_encoding_mblen(PG_UTF8, utf8string));
1859
1860         if (utf8string != s)
1861                 pfree(utf8string);
1862
1863         return ret[0];
1864 }
1865
1866
1867 static bool
1868 is_valid_xml_namefirst(pg_wchar c)
1869 {
1870         /* (Letter | '_' | ':') */
1871         return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1872                         || c == '_' || c == ':');
1873 }
1874
1875
1876 static bool
1877 is_valid_xml_namechar(pg_wchar c)
1878 {
1879         /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1880         return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1881                         || xmlIsDigitQ(c)
1882                         || c == '.' || c == '-' || c == '_' || c == ':'
1883                         || xmlIsCombiningQ(c)
1884                         || xmlIsExtenderQ(c));
1885 }
1886 #endif   /* USE_LIBXML */
1887
1888
1889 /*
1890  * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
1891  */
1892 char *
1893 map_sql_identifier_to_xml_name(char *ident, bool fully_escaped,
1894                                                            bool escape_period)
1895 {
1896 #ifdef USE_LIBXML
1897         StringInfoData buf;
1898         char       *p;
1899
1900         /*
1901          * SQL/XML doesn't make use of this case anywhere, so it's probably a
1902          * mistake.
1903          */
1904         Assert(fully_escaped || !escape_period);
1905
1906         initStringInfo(&buf);
1907
1908         for (p = ident; *p; p += pg_mblen(p))
1909         {
1910                 if (*p == ':' && (p == ident || fully_escaped))
1911                         appendStringInfoString(&buf, "_x003A_");
1912                 else if (*p == '_' && *(p + 1) == 'x')
1913                         appendStringInfoString(&buf, "_x005F_");
1914                 else if (fully_escaped && p == ident &&
1915                                  pg_strncasecmp(p, "xml", 3) == 0)
1916                 {
1917                         if (*p == 'x')
1918                                 appendStringInfoString(&buf, "_x0078_");
1919                         else
1920                                 appendStringInfoString(&buf, "_x0058_");
1921                 }
1922                 else if (escape_period && *p == '.')
1923                         appendStringInfoString(&buf, "_x002E_");
1924                 else
1925                 {
1926                         pg_wchar        u = sqlchar_to_unicode(p);
1927
1928                         if ((p == ident)
1929                                 ? !is_valid_xml_namefirst(u)
1930                                 : !is_valid_xml_namechar(u))
1931                                 appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
1932                         else
1933                                 appendBinaryStringInfo(&buf, p, pg_mblen(p));
1934                 }
1935         }
1936
1937         return buf.data;
1938 #else                                                   /* not USE_LIBXML */
1939         NO_XML_SUPPORT();
1940         return NULL;
1941 #endif   /* not USE_LIBXML */
1942 }
1943
1944
1945 /*
1946  * Map a Unicode codepoint into the current server encoding.
1947  */
1948 static char *
1949 unicode_to_sqlchar(pg_wchar c)
1950 {
1951         char            utf8string[8];  /* need room for trailing zero */
1952         char       *result;
1953
1954         memset(utf8string, 0, sizeof(utf8string));
1955         unicode_to_utf8(c, (unsigned char *) utf8string);
1956
1957         result = pg_any_to_server(utf8string, strlen(utf8string), PG_UTF8);
1958         /* if pg_any_to_server didn't strdup, we must */
1959         if (result == utf8string)
1960                 result = pstrdup(result);
1961         return result;
1962 }
1963
1964
1965 /*
1966  * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
1967  */
1968 char *
1969 map_xml_name_to_sql_identifier(char *name)
1970 {
1971         StringInfoData buf;
1972         char       *p;
1973
1974         initStringInfo(&buf);
1975
1976         for (p = name; *p; p += pg_mblen(p))
1977         {
1978                 if (*p == '_' && *(p + 1) == 'x'
1979                         && isxdigit((unsigned char) *(p + 2))
1980                         && isxdigit((unsigned char) *(p + 3))
1981                         && isxdigit((unsigned char) *(p + 4))
1982                         && isxdigit((unsigned char) *(p + 5))
1983                         && *(p + 6) == '_')
1984                 {
1985                         unsigned int u;
1986
1987                         sscanf(p + 2, "%X", &u);
1988                         appendStringInfoString(&buf, unicode_to_sqlchar(u));
1989                         p += 6;
1990                 }
1991                 else
1992                         appendBinaryStringInfo(&buf, p, pg_mblen(p));
1993         }
1994
1995         return buf.data;
1996 }
1997
1998 /*
1999  * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2000  *
2001  * When xml_escape_strings is true, then certain characters in string
2002  * values are replaced by entity references (&lt; etc.), as specified
2003  * in SQL/XML:2008 section 9.8 GR 9) a) iii).   This is normally what is
2004  * wanted.  The false case is mainly useful when the resulting value
2005  * is used with xmlTextWriterWriteAttribute() to write out an
2006  * attribute, because that function does the escaping itself.
2007  */
2008 char *
2009 map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2010 {
2011         if (type_is_array_domain(type))
2012         {
2013                 ArrayType  *array;
2014                 Oid                     elmtype;
2015                 int16           elmlen;
2016                 bool            elmbyval;
2017                 char            elmalign;
2018                 int                     num_elems;
2019                 Datum      *elem_values;
2020                 bool       *elem_nulls;
2021                 StringInfoData buf;
2022                 int                     i;
2023
2024                 array = DatumGetArrayTypeP(value);
2025                 elmtype = ARR_ELEMTYPE(array);
2026                 get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
2027
2028                 deconstruct_array(array, elmtype,
2029                                                   elmlen, elmbyval, elmalign,
2030                                                   &elem_values, &elem_nulls,
2031                                                   &num_elems);
2032
2033                 initStringInfo(&buf);
2034
2035                 for (i = 0; i < num_elems; i++)
2036                 {
2037                         if (elem_nulls[i])
2038                                 continue;
2039                         appendStringInfoString(&buf, "<element>");
2040                         appendStringInfoString(&buf,
2041                                                                    map_sql_value_to_xml_value(elem_values[i],
2042                                                                                                                           elmtype, true));
2043                         appendStringInfoString(&buf, "</element>");
2044                 }
2045
2046                 pfree(elem_values);
2047                 pfree(elem_nulls);
2048
2049                 return buf.data;
2050         }
2051         else
2052         {
2053                 Oid                     typeOut;
2054                 bool            isvarlena;
2055                 char       *str;
2056
2057                 /*
2058                  * Flatten domains; the special-case treatments below should apply to,
2059                  * eg, domains over boolean not just boolean.
2060                  */
2061                 type = getBaseType(type);
2062
2063                 /*
2064                  * Special XSD formatting for some data types
2065                  */
2066                 switch (type)
2067                 {
2068                         case BOOLOID:
2069                                 if (DatumGetBool(value))
2070                                         return "true";
2071                                 else
2072                                         return "false";
2073
2074                         case DATEOID:
2075                                 {
2076                                         DateADT         date;
2077                                         struct pg_tm tm;
2078                                         char            buf[MAXDATELEN + 1];
2079
2080                                         date = DatumGetDateADT(value);
2081                                         /* XSD doesn't support infinite values */
2082                                         if (DATE_NOT_FINITE(date))
2083                                                 ereport(ERROR,
2084                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2085                                                                  errmsg("date out of range"),
2086                                                                  errdetail("XML does not support infinite date values.")));
2087                                         j2date(date + POSTGRES_EPOCH_JDATE,
2088                                                    &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2089                                         EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2090
2091                                         return pstrdup(buf);
2092                                 }
2093
2094                         case TIMESTAMPOID:
2095                                 {
2096                                         Timestamp       timestamp;
2097                                         struct pg_tm tm;
2098                                         fsec_t          fsec;
2099                                         char            buf[MAXDATELEN + 1];
2100
2101                                         timestamp = DatumGetTimestamp(value);
2102
2103                                         /* XSD doesn't support infinite values */
2104                                         if (TIMESTAMP_NOT_FINITE(timestamp))
2105                                                 ereport(ERROR,
2106                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2107                                                                  errmsg("timestamp out of range"),
2108                                                                  errdetail("XML does not support infinite timestamp values.")));
2109                                         else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2110                                                 EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2111                                         else
2112                                                 ereport(ERROR,
2113                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2114                                                                  errmsg("timestamp out of range")));
2115
2116                                         return pstrdup(buf);
2117                                 }
2118
2119                         case TIMESTAMPTZOID:
2120                                 {
2121                                         TimestampTz timestamp;
2122                                         struct pg_tm tm;
2123                                         int                     tz;
2124                                         fsec_t          fsec;
2125                                         const char *tzn = NULL;
2126                                         char            buf[MAXDATELEN + 1];
2127
2128                                         timestamp = DatumGetTimestamp(value);
2129
2130                                         /* XSD doesn't support infinite values */
2131                                         if (TIMESTAMP_NOT_FINITE(timestamp))
2132                                                 ereport(ERROR,
2133                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2134                                                                  errmsg("timestamp out of range"),
2135                                                                  errdetail("XML does not support infinite timestamp values.")));
2136                                         else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2137                                                 EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2138                                         else
2139                                                 ereport(ERROR,
2140                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2141                                                                  errmsg("timestamp out of range")));
2142
2143                                         return pstrdup(buf);
2144                                 }
2145
2146 #ifdef USE_LIBXML
2147                         case BYTEAOID:
2148                                 {
2149                                         bytea      *bstr = DatumGetByteaPP(value);
2150                                         PgXmlErrorContext *xmlerrcxt;
2151                                         volatile xmlBufferPtr buf = NULL;
2152                                         volatile xmlTextWriterPtr writer = NULL;
2153                                         char       *result;
2154
2155                                         xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2156
2157                                         PG_TRY();
2158                                         {
2159                                                 buf = xmlBufferCreate();
2160                                                 if (buf == NULL || xmlerrcxt->err_occurred)
2161                                                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2162                                                                                 "could not allocate xmlBuffer");
2163                                                 writer = xmlNewTextWriterMemory(buf, 0);
2164                                                 if (writer == NULL || xmlerrcxt->err_occurred)
2165                                                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2166                                                                                 "could not allocate xmlTextWriter");
2167
2168                                                 if (xmlbinary == XMLBINARY_BASE64)
2169                                                         xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2170                                                                                                  0, VARSIZE_ANY_EXHDR(bstr));
2171                                                 else
2172                                                         xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2173                                                                                                  0, VARSIZE_ANY_EXHDR(bstr));
2174
2175                                                 /* we MUST do this now to flush data out to the buffer */
2176                                                 xmlFreeTextWriter(writer);
2177                                                 writer = NULL;
2178
2179                                                 result = pstrdup((const char *) xmlBufferContent(buf));
2180                                         }
2181                                         PG_CATCH();
2182                                         {
2183                                                 if (writer)
2184                                                         xmlFreeTextWriter(writer);
2185                                                 if (buf)
2186                                                         xmlBufferFree(buf);
2187
2188                                                 pg_xml_done(xmlerrcxt, true);
2189
2190                                                 PG_RE_THROW();
2191                                         }
2192                                         PG_END_TRY();
2193
2194                                         xmlBufferFree(buf);
2195
2196                                         pg_xml_done(xmlerrcxt, false);
2197
2198                                         return result;
2199                                 }
2200 #endif   /* USE_LIBXML */
2201
2202                 }
2203
2204                 /*
2205                  * otherwise, just use the type's native text representation
2206                  */
2207                 getTypeOutputInfo(type, &typeOut, &isvarlena);
2208                 str = OidOutputFunctionCall(typeOut, value);
2209
2210                 /* ... exactly as-is for XML, and when escaping is not wanted */
2211                 if (type == XMLOID || !xml_escape_strings)
2212                         return str;
2213
2214                 /* otherwise, translate special characters as needed */
2215                 return escape_xml(str);
2216         }
2217 }
2218
2219
2220 /*
2221  * Escape characters in text that have special meanings in XML.
2222  *
2223  * Returns a palloc'd string.
2224  *
2225  * NB: this is intentionally not dependent on libxml.
2226  */
2227 char *
2228 escape_xml(const char *str)
2229 {
2230         StringInfoData buf;
2231         const char *p;
2232
2233         initStringInfo(&buf);
2234         for (p = str; *p; p++)
2235         {
2236                 switch (*p)
2237                 {
2238                         case '&':
2239                                 appendStringInfoString(&buf, "&amp;");
2240                                 break;
2241                         case '<':
2242                                 appendStringInfoString(&buf, "&lt;");
2243                                 break;
2244                         case '>':
2245                                 appendStringInfoString(&buf, "&gt;");
2246                                 break;
2247                         case '\r':
2248                                 appendStringInfoString(&buf, "&#x0d;");
2249                                 break;
2250                         default:
2251                                 appendStringInfoCharMacro(&buf, *p);
2252                                 break;
2253                 }
2254         }
2255         return buf.data;
2256 }
2257
2258
2259 static char *
2260 _SPI_strdup(const char *s)
2261 {
2262         size_t          len = strlen(s) + 1;
2263         char       *ret = SPI_palloc(len);
2264
2265         memcpy(ret, s, len);
2266         return ret;
2267 }
2268
2269
2270 /*
2271  * SQL to XML mapping functions
2272  *
2273  * What follows below was at one point intentionally organized so that
2274  * you can read along in the SQL/XML standard. The functions are
2275  * mostly split up the way the clauses lay out in the standards
2276  * document, and the identifiers are also aligned with the standard
2277  * text.  Unfortunately, SQL/XML:2006 reordered the clauses
2278  * differently than SQL/XML:2003, so the order below doesn't make much
2279  * sense anymore.
2280  *
2281  * There are many things going on there:
2282  *
2283  * There are two kinds of mappings: Mapping SQL data (table contents)
2284  * to XML documents, and mapping SQL structure (the "schema") to XML
2285  * Schema.  And there are functions that do both at the same time.
2286  *
2287  * Then you can map a database, a schema, or a table, each in both
2288  * ways.  This breaks down recursively: Mapping a database invokes
2289  * mapping schemas, which invokes mapping tables, which invokes
2290  * mapping rows, which invokes mapping columns, although you can't
2291  * call the last two from the outside.  Because of this, there are a
2292  * number of xyz_internal() functions which are to be called both from
2293  * the function manager wrapper and from some upper layer in a
2294  * recursive call.
2295  *
2296  * See the documentation about what the common function arguments
2297  * nulls, tableforest, and targetns mean.
2298  *
2299  * Some style guidelines for XML output: Use double quotes for quoting
2300  * XML attributes.  Indent XML elements by two spaces, but remember
2301  * that a lot of code is called recursively at different levels, so
2302  * it's better not to indent rather than create output that indents
2303  * and outdents weirdly.  Add newlines to make the output look nice.
2304  */
2305
2306
2307 /*
2308  * Visibility of objects for XML mappings; see SQL/XML:2008 section
2309  * 4.10.8.
2310  */
2311
2312 /*
2313  * Given a query, which must return type oid as first column, produce
2314  * a list of Oids with the query results.
2315  */
2316 static List *
2317 query_to_oid_list(const char *query)
2318 {
2319         uint64          i;
2320         List       *list = NIL;
2321
2322         SPI_execute(query, true, 0);
2323
2324         for (i = 0; i < SPI_processed; i++)
2325         {
2326                 Datum           oid;
2327                 bool            isnull;
2328
2329                 oid = SPI_getbinval(SPI_tuptable->vals[i],
2330                                                         SPI_tuptable->tupdesc,
2331                                                         1,
2332                                                         &isnull);
2333                 if (!isnull)
2334                         list = lappend_oid(list, DatumGetObjectId(oid));
2335         }
2336
2337         return list;
2338 }
2339
2340
2341 static List *
2342 schema_get_xml_visible_tables(Oid nspid)
2343 {
2344         StringInfoData query;
2345
2346         initStringInfo(&query);
2347         appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class WHERE relnamespace = %u AND relkind IN ('r', 'm', 'v') AND pg_catalog.has_table_privilege (oid, 'SELECT') ORDER BY relname;", nspid);
2348
2349         return query_to_oid_list(query.data);
2350 }
2351
2352
2353 /*
2354  * Including the system schemas is probably not useful for a database
2355  * mapping.
2356  */
2357 #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2358
2359 #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2360
2361
2362 static List *
2363 database_get_xml_visible_schemas(void)
2364 {
2365         return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2366 }
2367
2368
2369 static List *
2370 database_get_xml_visible_tables(void)
2371 {
2372         /* At the moment there is no order required here. */
2373         return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class WHERE relkind IN ('r', 'm', 'v') AND pg_catalog.has_table_privilege (pg_class.oid, 'SELECT') AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2374 }
2375
2376
2377 /*
2378  * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2379  * section 9.11.
2380  */
2381
2382 static StringInfo
2383 table_to_xml_internal(Oid relid,
2384                                           const char *xmlschema, bool nulls, bool tableforest,
2385                                           const char *targetns, bool top_level)
2386 {
2387         StringInfoData query;
2388
2389         initStringInfo(&query);
2390         appendStringInfo(&query, "SELECT * FROM %s",
2391                                          DatumGetCString(DirectFunctionCall1(regclassout,
2392                                                                                                   ObjectIdGetDatum(relid))));
2393         return query_to_xml_internal(query.data, get_rel_name(relid),
2394                                                                  xmlschema, nulls, tableforest,
2395                                                                  targetns, top_level);
2396 }
2397
2398
2399 Datum
2400 table_to_xml(PG_FUNCTION_ARGS)
2401 {
2402         Oid                     relid = PG_GETARG_OID(0);
2403         bool            nulls = PG_GETARG_BOOL(1);
2404         bool            tableforest = PG_GETARG_BOOL(2);
2405         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2406
2407         PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2408                                                                                                                   nulls, tableforest,
2409                                                                                                                    targetns, true)));
2410 }
2411
2412
2413 Datum
2414 query_to_xml(PG_FUNCTION_ARGS)
2415 {
2416         char       *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2417         bool            nulls = PG_GETARG_BOOL(1);
2418         bool            tableforest = PG_GETARG_BOOL(2);
2419         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2420
2421         PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2422                                                                                                         NULL, nulls, tableforest,
2423                                                                                                                    targetns, true)));
2424 }
2425
2426
2427 Datum
2428 cursor_to_xml(PG_FUNCTION_ARGS)
2429 {
2430         char       *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2431         int32           count = PG_GETARG_INT32(1);
2432         bool            nulls = PG_GETARG_BOOL(2);
2433         bool            tableforest = PG_GETARG_BOOL(3);
2434         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2435
2436         StringInfoData result;
2437         Portal          portal;
2438         uint64          i;
2439
2440         initStringInfo(&result);
2441
2442         SPI_connect();
2443         portal = SPI_cursor_find(name);
2444         if (portal == NULL)
2445                 ereport(ERROR,
2446                                 (errcode(ERRCODE_UNDEFINED_CURSOR),
2447                                  errmsg("cursor \"%s\" does not exist", name)));
2448
2449         SPI_cursor_fetch(portal, true, count);
2450         for (i = 0; i < SPI_processed; i++)
2451                 SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2452                                                                   tableforest, targetns, true);
2453
2454         SPI_finish();
2455
2456         PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2457 }
2458
2459
2460 /*
2461  * Write the start tag of the root element of a data mapping.
2462  *
2463  * top_level means that this is the very top level of the eventual
2464  * output.  For example, when the user calls table_to_xml, then a call
2465  * with a table name to this function is the top level.  When the user
2466  * calls database_to_xml, then a call with a schema name to this
2467  * function is not the top level.  If top_level is false, then the XML
2468  * namespace declarations are omitted, because they supposedly already
2469  * appeared earlier in the output.  Repeating them is not wrong, but
2470  * it looks ugly.
2471  */
2472 static void
2473 xmldata_root_element_start(StringInfo result, const char *eltname,
2474                                                    const char *xmlschema, const char *targetns,
2475                                                    bool top_level)
2476 {
2477         /* This isn't really wrong but currently makes no sense. */
2478         Assert(top_level || !xmlschema);
2479
2480         appendStringInfo(result, "<%s", eltname);
2481         if (top_level)
2482         {
2483                 appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
2484                 if (strlen(targetns) > 0)
2485                         appendStringInfo(result, " xmlns=\"%s\"", targetns);
2486         }
2487         if (xmlschema)
2488         {
2489                 /* FIXME: better targets */
2490                 if (strlen(targetns) > 0)
2491                         appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
2492                 else
2493                         appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
2494         }
2495         appendStringInfoString(result, ">\n");
2496 }
2497
2498
2499 static void
2500 xmldata_root_element_end(StringInfo result, const char *eltname)
2501 {
2502         appendStringInfo(result, "</%s>\n", eltname);
2503 }
2504
2505
2506 static StringInfo
2507 query_to_xml_internal(const char *query, char *tablename,
2508                                           const char *xmlschema, bool nulls, bool tableforest,
2509                                           const char *targetns, bool top_level)
2510 {
2511         StringInfo      result;
2512         char       *xmltn;
2513         uint64          i;
2514
2515         if (tablename)
2516                 xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
2517         else
2518                 xmltn = "table";
2519
2520         result = makeStringInfo();
2521
2522         SPI_connect();
2523         if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
2524                 ereport(ERROR,
2525                                 (errcode(ERRCODE_DATA_EXCEPTION),
2526                                  errmsg("invalid query")));
2527
2528         if (!tableforest)
2529         {
2530                 xmldata_root_element_start(result, xmltn, xmlschema,
2531                                                                    targetns, top_level);
2532                 appendStringInfoChar(result, '\n');
2533         }
2534
2535         if (xmlschema)
2536                 appendStringInfo(result, "%s\n\n", xmlschema);
2537
2538         for (i = 0; i < SPI_processed; i++)
2539                 SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
2540                                                                   tableforest, targetns, top_level);
2541
2542         if (!tableforest)
2543                 xmldata_root_element_end(result, xmltn);
2544
2545         SPI_finish();
2546
2547         return result;
2548 }
2549
2550
2551 Datum
2552 table_to_xmlschema(PG_FUNCTION_ARGS)
2553 {
2554         Oid                     relid = PG_GETARG_OID(0);
2555         bool            nulls = PG_GETARG_BOOL(1);
2556         bool            tableforest = PG_GETARG_BOOL(2);
2557         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2558         const char *result;
2559         Relation        rel;
2560
2561         rel = heap_open(relid, AccessShareLock);
2562         result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2563                                                                                 tableforest, targetns);
2564         heap_close(rel, NoLock);
2565
2566         PG_RETURN_XML_P(cstring_to_xmltype(result));
2567 }
2568
2569
2570 Datum
2571 query_to_xmlschema(PG_FUNCTION_ARGS)
2572 {
2573         char       *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2574         bool            nulls = PG_GETARG_BOOL(1);
2575         bool            tableforest = PG_GETARG_BOOL(2);
2576         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2577         const char *result;
2578         SPIPlanPtr      plan;
2579         Portal          portal;
2580
2581         SPI_connect();
2582
2583         if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2584                 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2585
2586         if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2587                 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2588
2589         result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2590                                                                                                         InvalidOid, nulls,
2591                                                                                                         tableforest, targetns));
2592         SPI_cursor_close(portal);
2593         SPI_finish();
2594
2595         PG_RETURN_XML_P(cstring_to_xmltype(result));
2596 }
2597
2598
2599 Datum
2600 cursor_to_xmlschema(PG_FUNCTION_ARGS)
2601 {
2602         char       *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2603         bool            nulls = PG_GETARG_BOOL(1);
2604         bool            tableforest = PG_GETARG_BOOL(2);
2605         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2606         const char *xmlschema;
2607         Portal          portal;
2608
2609         SPI_connect();
2610         portal = SPI_cursor_find(name);
2611         if (portal == NULL)
2612                 ereport(ERROR,
2613                                 (errcode(ERRCODE_UNDEFINED_CURSOR),
2614                                  errmsg("cursor \"%s\" does not exist", name)));
2615
2616         xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2617                                                                                                            InvalidOid, nulls,
2618                                                                                                          tableforest, targetns));
2619         SPI_finish();
2620
2621         PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
2622 }
2623
2624
2625 Datum
2626 table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2627 {
2628         Oid                     relid = PG_GETARG_OID(0);
2629         bool            nulls = PG_GETARG_BOOL(1);
2630         bool            tableforest = PG_GETARG_BOOL(2);
2631         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2632         Relation        rel;
2633         const char *xmlschema;
2634
2635         rel = heap_open(relid, AccessShareLock);
2636         xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2637                                                                                    tableforest, targetns);
2638         heap_close(rel, NoLock);
2639
2640         PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
2641                                                                                            xmlschema, nulls, tableforest,
2642                                                                                                                    targetns, true)));
2643 }
2644
2645
2646 Datum
2647 query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2648 {
2649         char       *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2650         bool            nulls = PG_GETARG_BOOL(1);
2651         bool            tableforest = PG_GETARG_BOOL(2);
2652         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2653
2654         const char *xmlschema;
2655         SPIPlanPtr      plan;
2656         Portal          portal;
2657
2658         SPI_connect();
2659
2660         if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2661                 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2662
2663         if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2664                 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2665
2666         xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2667                                                                   InvalidOid, nulls, tableforest, targetns));
2668         SPI_cursor_close(portal);
2669         SPI_finish();
2670
2671         PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2672                                                                                            xmlschema, nulls, tableforest,
2673                                                                                                                    targetns, true)));
2674 }
2675
2676
2677 /*
2678  * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
2679  * sections 9.13, 9.14.
2680  */
2681
2682 static StringInfo
2683 schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
2684                                            bool tableforest, const char *targetns, bool top_level)
2685 {
2686         StringInfo      result;
2687         char       *xmlsn;
2688         List       *relid_list;
2689         ListCell   *cell;
2690
2691         xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
2692                                                                                    true, false);
2693         result = makeStringInfo();
2694
2695         xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
2696         appendStringInfoChar(result, '\n');
2697
2698         if (xmlschema)
2699                 appendStringInfo(result, "%s\n\n", xmlschema);
2700
2701         SPI_connect();
2702
2703         relid_list = schema_get_xml_visible_tables(nspid);
2704
2705         foreach(cell, relid_list)
2706         {
2707                 Oid                     relid = lfirst_oid(cell);
2708                 StringInfo      subres;
2709
2710                 subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
2711                                                                            targetns, false);
2712
2713                 appendStringInfoString(result, subres->data);
2714                 appendStringInfoChar(result, '\n');
2715         }
2716
2717         SPI_finish();
2718
2719         xmldata_root_element_end(result, xmlsn);
2720
2721         return result;
2722 }
2723
2724
2725 Datum
2726 schema_to_xml(PG_FUNCTION_ARGS)
2727 {
2728         Name            name = PG_GETARG_NAME(0);
2729         bool            nulls = PG_GETARG_BOOL(1);
2730         bool            tableforest = PG_GETARG_BOOL(2);
2731         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2732
2733         char       *schemaname;
2734         Oid                     nspid;
2735
2736         schemaname = NameStr(*name);
2737         nspid = LookupExplicitNamespace(schemaname, false);
2738
2739         PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
2740                                                                            nulls, tableforest, targetns, true)));
2741 }
2742
2743
2744 /*
2745  * Write the start element of the root element of an XML Schema mapping.
2746  */
2747 static void
2748 xsd_schema_element_start(StringInfo result, const char *targetns)
2749 {
2750         appendStringInfoString(result,
2751                                                    "<xsd:schema\n"
2752                                                    "    xmlns:xsd=\"" NAMESPACE_XSD "\"");
2753         if (strlen(targetns) > 0)
2754                 appendStringInfo(result,
2755                                                  "\n"
2756                                                  "    targetNamespace=\"%s\"\n"
2757                                                  "    elementFormDefault=\"qualified\"",
2758                                                  targetns);
2759         appendStringInfoString(result,
2760                                                    ">\n\n");
2761 }
2762
2763
2764 static void
2765 xsd_schema_element_end(StringInfo result)
2766 {
2767         appendStringInfoString(result, "</xsd:schema>");
2768 }
2769
2770
2771 static StringInfo
2772 schema_to_xmlschema_internal(const char *schemaname, bool nulls,
2773                                                          bool tableforest, const char *targetns)
2774 {
2775         Oid                     nspid;
2776         List       *relid_list;
2777         List       *tupdesc_list;
2778         ListCell   *cell;
2779         StringInfo      result;
2780
2781         result = makeStringInfo();
2782
2783         nspid = LookupExplicitNamespace(schemaname, false);
2784
2785         xsd_schema_element_start(result, targetns);
2786
2787         SPI_connect();
2788
2789         relid_list = schema_get_xml_visible_tables(nspid);
2790
2791         tupdesc_list = NIL;
2792         foreach(cell, relid_list)
2793         {
2794                 Relation        rel;
2795
2796                 rel = heap_open(lfirst_oid(cell), AccessShareLock);
2797                 tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
2798                 heap_close(rel, NoLock);
2799         }
2800
2801         appendStringInfoString(result,
2802                                                    map_sql_typecoll_to_xmlschema_types(tupdesc_list));
2803
2804         appendStringInfoString(result,
2805                                                  map_sql_schema_to_xmlschema_types(nspid, relid_list,
2806                                                                                           nulls, tableforest, targetns));
2807
2808         xsd_schema_element_end(result);
2809
2810         SPI_finish();
2811
2812         return result;
2813 }
2814
2815
2816 Datum
2817 schema_to_xmlschema(PG_FUNCTION_ARGS)
2818 {
2819         Name            name = PG_GETARG_NAME(0);
2820         bool            nulls = PG_GETARG_BOOL(1);
2821         bool            tableforest = PG_GETARG_BOOL(2);
2822         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2823
2824         PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
2825                                                                                          nulls, tableforest, targetns)));
2826 }
2827
2828
2829 Datum
2830 schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2831 {
2832         Name            name = PG_GETARG_NAME(0);
2833         bool            nulls = PG_GETARG_BOOL(1);
2834         bool            tableforest = PG_GETARG_BOOL(2);
2835         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2836         char       *schemaname;
2837         Oid                     nspid;
2838         StringInfo      xmlschema;
2839
2840         schemaname = NameStr(*name);
2841         nspid = LookupExplicitNamespace(schemaname, false);
2842
2843         xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
2844                                                                                          tableforest, targetns);
2845
2846         PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
2847                                                                                                           xmlschema->data, nulls,
2848                                                                                           tableforest, targetns, true)));
2849 }
2850
2851
2852 /*
2853  * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
2854  * sections 9.16, 9.17.
2855  */
2856
2857 static StringInfo
2858 database_to_xml_internal(const char *xmlschema, bool nulls,
2859                                                  bool tableforest, const char *targetns)
2860 {
2861         StringInfo      result;
2862         List       *nspid_list;
2863         ListCell   *cell;
2864         char       *xmlcn;
2865
2866         xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
2867                                                                                    true, false);
2868         result = makeStringInfo();
2869
2870         xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
2871         appendStringInfoChar(result, '\n');
2872
2873         if (xmlschema)
2874                 appendStringInfo(result, "%s\n\n", xmlschema);
2875
2876         SPI_connect();
2877
2878         nspid_list = database_get_xml_visible_schemas();
2879
2880         foreach(cell, nspid_list)
2881         {
2882                 Oid                     nspid = lfirst_oid(cell);
2883                 StringInfo      subres;
2884
2885                 subres = schema_to_xml_internal(nspid, NULL, nulls,
2886                                                                                 tableforest, targetns, false);
2887
2888                 appendStringInfoString(result, subres->data);
2889                 appendStringInfoChar(result, '\n');
2890         }
2891
2892         SPI_finish();
2893
2894         xmldata_root_element_end(result, xmlcn);
2895
2896         return result;
2897 }
2898
2899
2900 Datum
2901 database_to_xml(PG_FUNCTION_ARGS)
2902 {
2903         bool            nulls = PG_GETARG_BOOL(0);
2904         bool            tableforest = PG_GETARG_BOOL(1);
2905         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
2906
2907         PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
2908                                                                                                         tableforest, targetns)));
2909 }
2910
2911
2912 static StringInfo
2913 database_to_xmlschema_internal(bool nulls, bool tableforest,
2914                                                            const char *targetns)
2915 {
2916         List       *relid_list;
2917         List       *nspid_list;
2918         List       *tupdesc_list;
2919         ListCell   *cell;
2920         StringInfo      result;
2921
2922         result = makeStringInfo();
2923
2924         xsd_schema_element_start(result, targetns);
2925
2926         SPI_connect();
2927
2928         relid_list = database_get_xml_visible_tables();
2929         nspid_list = database_get_xml_visible_schemas();
2930
2931         tupdesc_list = NIL;
2932         foreach(cell, relid_list)
2933         {
2934                 Relation        rel;
2935
2936                 rel = heap_open(lfirst_oid(cell), AccessShareLock);
2937                 tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
2938                 heap_close(rel, NoLock);
2939         }
2940
2941         appendStringInfoString(result,
2942                                                    map_sql_typecoll_to_xmlschema_types(tupdesc_list));
2943
2944         appendStringInfoString(result,
2945                                                    map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
2946
2947         xsd_schema_element_end(result);
2948
2949         SPI_finish();
2950
2951         return result;
2952 }
2953
2954
2955 Datum
2956 database_to_xmlschema(PG_FUNCTION_ARGS)
2957 {
2958         bool            nulls = PG_GETARG_BOOL(0);
2959         bool            tableforest = PG_GETARG_BOOL(1);
2960         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
2961
2962         PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
2963                                                                                                         tableforest, targetns)));
2964 }
2965
2966
2967 Datum
2968 database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2969 {
2970         bool            nulls = PG_GETARG_BOOL(0);
2971         bool            tableforest = PG_GETARG_BOOL(1);
2972         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
2973         StringInfo      xmlschema;
2974
2975         xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
2976
2977         PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
2978                                                                                          nulls, tableforest, targetns)));
2979 }
2980
2981
2982 /*
2983  * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
2984  * 9.2.
2985  */
2986 static char *
2987 map_multipart_sql_identifier_to_xml_name(char *a, char *b, char *c, char *d)
2988 {
2989         StringInfoData result;
2990
2991         initStringInfo(&result);
2992
2993         if (a)
2994                 appendStringInfoString(&result,
2995                                                            map_sql_identifier_to_xml_name(a, true, true));
2996         if (b)
2997                 appendStringInfo(&result, ".%s",
2998                                                  map_sql_identifier_to_xml_name(b, true, true));
2999         if (c)
3000                 appendStringInfo(&result, ".%s",
3001                                                  map_sql_identifier_to_xml_name(c, true, true));
3002         if (d)
3003                 appendStringInfo(&result, ".%s",
3004                                                  map_sql_identifier_to_xml_name(d, true, true));
3005
3006         return result.data;
3007 }
3008
3009
3010 /*
3011  * Map an SQL table to an XML Schema document; see SQL/XML:2008
3012  * section 9.11.
3013  *
3014  * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3015  * 9.9.
3016  */
3017 static const char *
3018 map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
3019                                                    bool tableforest, const char *targetns)
3020 {
3021         int                     i;
3022         char       *xmltn;
3023         char       *tabletypename;
3024         char       *rowtypename;
3025         StringInfoData result;
3026
3027         initStringInfo(&result);
3028
3029         if (OidIsValid(relid))
3030         {
3031                 HeapTuple       tuple;
3032                 Form_pg_class reltuple;
3033
3034                 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
3035                 if (!HeapTupleIsValid(tuple))
3036                         elog(ERROR, "cache lookup failed for relation %u", relid);
3037                 reltuple = (Form_pg_class) GETSTRUCT(tuple);
3038
3039                 xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
3040                                                                                            true, false);
3041
3042                 tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
3043                                                                                          get_database_name(MyDatabaseId),
3044                                                                   get_namespace_name(reltuple->relnamespace),
3045                                                                                                  NameStr(reltuple->relname));
3046
3047                 rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
3048                                                                                          get_database_name(MyDatabaseId),
3049                                                                   get_namespace_name(reltuple->relnamespace),
3050                                                                                                  NameStr(reltuple->relname));
3051
3052                 ReleaseSysCache(tuple);
3053         }
3054         else
3055         {
3056                 if (tableforest)
3057                         xmltn = "row";
3058                 else
3059                         xmltn = "table";
3060
3061                 tabletypename = "TableType";
3062                 rowtypename = "RowType";
3063         }
3064
3065         xsd_schema_element_start(&result, targetns);
3066
3067         appendStringInfoString(&result,
3068                                    map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3069
3070         appendStringInfo(&result,
3071                                          "<xsd:complexType name=\"%s\">\n"
3072                                          "  <xsd:sequence>\n",
3073                                          rowtypename);
3074
3075         for (i = 0; i < tupdesc->natts; i++)
3076         {
3077                 if (tupdesc->attrs[i]->attisdropped)
3078                         continue;
3079                 appendStringInfo(&result,
3080                            "    <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3081                   map_sql_identifier_to_xml_name(NameStr(tupdesc->attrs[i]->attname),
3082                                                                                  true, false),
3083                                    map_sql_type_to_xml_name(tupdesc->attrs[i]->atttypid, -1),
3084                                                  nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3085         }
3086
3087         appendStringInfoString(&result,
3088                                                    "  </xsd:sequence>\n"
3089                                                    "</xsd:complexType>\n\n");
3090
3091         if (!tableforest)
3092         {
3093                 appendStringInfo(&result,
3094                                                  "<xsd:complexType name=\"%s\">\n"
3095                                                  "  <xsd:sequence>\n"
3096                                                  "    <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3097                                                  "  </xsd:sequence>\n"
3098                                                  "</xsd:complexType>\n\n",
3099                                                  tabletypename, rowtypename);
3100
3101                 appendStringInfo(&result,
3102                                                  "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3103                                                  xmltn, tabletypename);
3104         }
3105         else
3106                 appendStringInfo(&result,
3107                                                  "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3108                                                  xmltn, rowtypename);
3109
3110         xsd_schema_element_end(&result);
3111
3112         return result.data;
3113 }
3114
3115
3116 /*
3117  * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3118  * section 9.12.
3119  */
3120 static const char *
3121 map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3122                                                                   bool tableforest, const char *targetns)
3123 {
3124         char       *dbname;
3125         char       *nspname;
3126         char       *xmlsn;
3127         char       *schematypename;
3128         StringInfoData result;
3129         ListCell   *cell;
3130
3131         dbname = get_database_name(MyDatabaseId);
3132         nspname = get_namespace_name(nspid);
3133
3134         initStringInfo(&result);
3135
3136         xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3137
3138         schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3139                                                                                                                           dbname,
3140                                                                                                                           nspname,
3141                                                                                                                           NULL);
3142
3143         appendStringInfo(&result,
3144                                          "<xsd:complexType name=\"%s\">\n", schematypename);
3145         if (!tableforest)
3146                 appendStringInfoString(&result,
3147                                                            "  <xsd:all>\n");
3148         else
3149                 appendStringInfoString(&result,
3150                                                            "  <xsd:sequence>\n");
3151
3152         foreach(cell, relid_list)
3153         {
3154                 Oid                     relid = lfirst_oid(cell);
3155                 char       *relname = get_rel_name(relid);
3156                 char       *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3157                 char       *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3158                                                                                                                                           dbname,
3159                                                                                                                                          nspname,
3160                                                                                                                                         relname);
3161
3162                 if (!tableforest)
3163                         appendStringInfo(&result,
3164                                                          "    <xsd:element name=\"%s\" type=\"%s\"/>\n",
3165                                                          xmltn, tabletypename);
3166                 else
3167                         appendStringInfo(&result,
3168                                                          "    <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3169                                                          xmltn, tabletypename);
3170         }
3171
3172         if (!tableforest)
3173                 appendStringInfoString(&result,
3174                                                            "  </xsd:all>\n");
3175         else
3176                 appendStringInfoString(&result,
3177                                                            "  </xsd:sequence>\n");
3178         appendStringInfoString(&result,
3179                                                    "</xsd:complexType>\n\n");
3180
3181         appendStringInfo(&result,
3182                                          "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3183                                          xmlsn, schematypename);
3184
3185         return result.data;
3186 }
3187
3188
3189 /*
3190  * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3191  * section 9.15.
3192  */
3193 static const char *
3194 map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3195                                                                    bool tableforest, const char *targetns)
3196 {
3197         char       *dbname;
3198         char       *xmlcn;
3199         char       *catalogtypename;
3200         StringInfoData result;
3201         ListCell   *cell;
3202
3203         dbname = get_database_name(MyDatabaseId);
3204
3205         initStringInfo(&result);
3206
3207         xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3208
3209         catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3210                                                                                                                            dbname,
3211                                                                                                                            NULL,
3212                                                                                                                            NULL);
3213
3214         appendStringInfo(&result,
3215                                          "<xsd:complexType name=\"%s\">\n", catalogtypename);
3216         appendStringInfoString(&result,
3217                                                    "  <xsd:all>\n");
3218
3219         foreach(cell, nspid_list)
3220         {
3221                 Oid                     nspid = lfirst_oid(cell);
3222                 char       *nspname = get_namespace_name(nspid);
3223                 char       *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3224                 char       *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3225                                                                                                                                           dbname,
3226                                                                                                                                          nspname,
3227                                                                                                                                            NULL);
3228
3229                 appendStringInfo(&result,
3230                                                  "    <xsd:element name=\"%s\" type=\"%s\"/>\n",
3231                                                  xmlsn, schematypename);
3232         }
3233
3234         appendStringInfoString(&result,
3235                                                    "  </xsd:all>\n");
3236         appendStringInfoString(&result,
3237                                                    "</xsd:complexType>\n\n");
3238
3239         appendStringInfo(&result,
3240                                          "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3241                                          xmlcn, catalogtypename);
3242
3243         return result.data;
3244 }
3245
3246
3247 /*
3248  * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3249  */
3250 static const char *
3251 map_sql_type_to_xml_name(Oid typeoid, int typmod)
3252 {
3253         StringInfoData result;
3254
3255         initStringInfo(&result);
3256
3257         switch (typeoid)
3258         {
3259                 case BPCHAROID:
3260                         if (typmod == -1)
3261                                 appendStringInfoString(&result, "CHAR");
3262                         else
3263                                 appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3264                         break;
3265                 case VARCHAROID:
3266                         if (typmod == -1)
3267                                 appendStringInfoString(&result, "VARCHAR");
3268                         else
3269                                 appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3270                         break;
3271                 case NUMERICOID:
3272                         if (typmod == -1)
3273                                 appendStringInfoString(&result, "NUMERIC");
3274                         else
3275                                 appendStringInfo(&result, "NUMERIC_%d_%d",
3276                                                                  ((typmod - VARHDRSZ) >> 16) & 0xffff,
3277                                                                  (typmod - VARHDRSZ) & 0xffff);
3278                         break;
3279                 case INT4OID:
3280                         appendStringInfoString(&result, "INTEGER");
3281                         break;
3282                 case INT2OID:
3283                         appendStringInfoString(&result, "SMALLINT");
3284                         break;
3285                 case INT8OID:
3286                         appendStringInfoString(&result, "BIGINT");
3287                         break;
3288                 case FLOAT4OID:
3289                         appendStringInfoString(&result, "REAL");
3290                         break;
3291                 case FLOAT8OID:
3292                         appendStringInfoString(&result, "DOUBLE");
3293                         break;
3294                 case BOOLOID:
3295                         appendStringInfoString(&result, "BOOLEAN");
3296                         break;
3297                 case TIMEOID:
3298                         if (typmod == -1)
3299                                 appendStringInfoString(&result, "TIME");
3300                         else
3301                                 appendStringInfo(&result, "TIME_%d", typmod);
3302                         break;
3303                 case TIMETZOID:
3304                         if (typmod == -1)
3305                                 appendStringInfoString(&result, "TIME_WTZ");
3306                         else
3307                                 appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3308                         break;
3309                 case TIMESTAMPOID:
3310                         if (typmod == -1)
3311                                 appendStringInfoString(&result, "TIMESTAMP");
3312                         else
3313                                 appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3314                         break;
3315                 case TIMESTAMPTZOID:
3316                         if (typmod == -1)
3317                                 appendStringInfoString(&result, "TIMESTAMP_WTZ");
3318                         else
3319                                 appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3320                         break;
3321                 case DATEOID:
3322                         appendStringInfoString(&result, "DATE");
3323                         break;
3324                 case XMLOID:
3325                         appendStringInfoString(&result, "XML");
3326                         break;
3327                 default:
3328                         {
3329                                 HeapTuple       tuple;
3330                                 Form_pg_type typtuple;
3331
3332                                 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3333                                 if (!HeapTupleIsValid(tuple))
3334                                         elog(ERROR, "cache lookup failed for type %u", typeoid);
3335                                 typtuple = (Form_pg_type) GETSTRUCT(tuple);
3336
3337                                 appendStringInfoString(&result,
3338                                                                            map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3339                                                                                          get_database_name(MyDatabaseId),
3340                                                                   get_namespace_name(typtuple->typnamespace),
3341                                                                                                 NameStr(typtuple->typname)));
3342
3343                                 ReleaseSysCache(tuple);
3344                         }
3345         }
3346
3347         return result.data;
3348 }
3349
3350
3351 /*
3352  * Map a collection of SQL data types to XML Schema data types; see
3353  * SQL/XML:2008 section 9.7.
3354  */
3355 static const char *
3356 map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3357 {
3358         List       *uniquetypes = NIL;
3359         int                     i;
3360         StringInfoData result;
3361         ListCell   *cell0;
3362
3363         /* extract all column types used in the set of TupleDescs */
3364         foreach(cell0, tupdesc_list)
3365         {
3366                 TupleDesc       tupdesc = (TupleDesc) lfirst(cell0);
3367
3368                 for (i = 0; i < tupdesc->natts; i++)
3369                 {
3370                         if (tupdesc->attrs[i]->attisdropped)
3371                                 continue;
3372                         uniquetypes = list_append_unique_oid(uniquetypes,
3373                                                                                                  tupdesc->attrs[i]->atttypid);
3374                 }
3375         }
3376
3377         /* add base types of domains */
3378         foreach(cell0, uniquetypes)
3379         {
3380                 Oid                     typid = lfirst_oid(cell0);
3381                 Oid                     basetypid = getBaseType(typid);
3382
3383                 if (basetypid != typid)
3384                         uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3385         }
3386
3387         /* Convert to textual form */
3388         initStringInfo(&result);
3389
3390         foreach(cell0, uniquetypes)
3391         {
3392                 appendStringInfo(&result, "%s\n",
3393                                                  map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3394                                                                                                                 -1));
3395         }
3396
3397         return result.data;
3398 }
3399
3400
3401 /*
3402  * Map an SQL data type to a named XML Schema data type; see
3403  * SQL/XML:2008 sections 9.5 and 9.6.
3404  *
3405  * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3406  * a name attribute, which this function does.  The name-less version
3407  * 9.5 doesn't appear to be required anywhere.)
3408  */
3409 static const char *
3410 map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3411 {
3412         StringInfoData result;
3413         const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3414
3415         initStringInfo(&result);
3416
3417         if (typeoid == XMLOID)
3418         {
3419                 appendStringInfoString(&result,
3420                                                            "<xsd:complexType mixed=\"true\">\n"
3421                                                            "  <xsd:sequence>\n"
3422                                                            "    <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3423                                                            "  </xsd:sequence>\n"
3424                                                            "</xsd:complexType>\n");
3425         }
3426         else
3427         {
3428                 appendStringInfo(&result,
3429                                                  "<xsd:simpleType name=\"%s\">\n", typename);
3430
3431                 switch (typeoid)
3432                 {
3433                         case BPCHAROID:
3434                         case VARCHAROID:
3435                         case TEXTOID:
3436                                 appendStringInfo(&result,
3437                                                                  "  <xsd:restriction base=\"xsd:string\">\n");
3438                                 if (typmod != -1)
3439                                         appendStringInfo(&result,
3440                                                                          "    <xsd:maxLength value=\"%d\"/>\n",
3441                                                                          typmod - VARHDRSZ);
3442                                 appendStringInfoString(&result, "  </xsd:restriction>\n");
3443                                 break;
3444
3445                         case BYTEAOID:
3446                                 appendStringInfo(&result,
3447                                                                  "  <xsd:restriction base=\"xsd:%s\">\n"
3448                                                                  "  </xsd:restriction>\n",
3449                                 xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3450                                 break;
3451
3452                         case NUMERICOID:
3453                                 if (typmod != -1)
3454                                         appendStringInfo(&result,
3455                                                                  "  <xsd:restriction base=\"xsd:decimal\">\n"
3456                                                                          "    <xsd:totalDigits value=\"%d\"/>\n"
3457                                                                    "    <xsd:fractionDigits value=\"%d\"/>\n"
3458                                                                          "  </xsd:restriction>\n",
3459                                                                          ((typmod - VARHDRSZ) >> 16) & 0xffff,
3460                                                                          (typmod - VARHDRSZ) & 0xffff);
3461                                 break;
3462
3463                         case INT2OID:
3464                                 appendStringInfo(&result,
3465                                                                  "  <xsd:restriction base=\"xsd:short\">\n"
3466                                                                  "    <xsd:maxInclusive value=\"%d\"/>\n"
3467                                                                  "    <xsd:minInclusive value=\"%d\"/>\n"
3468                                                                  "  </xsd:restriction>\n",
3469                                                                  SHRT_MAX, SHRT_MIN);
3470                                 break;
3471
3472                         case INT4OID:
3473                                 appendStringInfo(&result,
3474                                                                  "  <xsd:restriction base=\"xsd:int\">\n"
3475                                                                  "    <xsd:maxInclusive value=\"%d\"/>\n"
3476                                                                  "    <xsd:minInclusive value=\"%d\"/>\n"
3477                                                                  "  </xsd:restriction>\n",
3478                                                                  INT_MAX, INT_MIN);
3479                                 break;
3480
3481                         case INT8OID:
3482                                 appendStringInfo(&result,
3483                                                                  "  <xsd:restriction base=\"xsd:long\">\n"
3484                                            "    <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
3485                                            "    <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
3486                                                                  "  </xsd:restriction>\n",
3487                                                            (((uint64) 1) << (sizeof(int64) * 8 - 1)) - 1,
3488                                                                  (((uint64) 1) << (sizeof(int64) * 8 - 1)));
3489                                 break;
3490
3491                         case FLOAT4OID:
3492                                 appendStringInfoString(&result,
3493                                 "  <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
3494                                 break;
3495
3496                         case FLOAT8OID:
3497                                 appendStringInfoString(&result,
3498                                                                            "  <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
3499                                 break;
3500
3501                         case BOOLOID:
3502                                 appendStringInfoString(&result,
3503                                                                            "  <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
3504                                 break;
3505
3506                         case TIMEOID:
3507                         case TIMETZOID:
3508                                 {
3509                                         const char *tz = (typeoid == TIMETZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3510
3511                                         if (typmod == -1)
3512                                                 appendStringInfo(&result,
3513                                                                         "  <xsd:restriction base=\"xsd:time\">\n"
3514                                                                                  "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3515                                                                                  "  </xsd:restriction>\n", tz);
3516                                         else if (typmod == 0)
3517                                                 appendStringInfo(&result,
3518                                                                         "  <xsd:restriction base=\"xsd:time\">\n"
3519                                                                                  "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3520                                                                                  "  </xsd:restriction>\n", tz);
3521                                         else
3522                                                 appendStringInfo(&result,
3523                                                                         "  <xsd:restriction base=\"xsd:time\">\n"
3524                                                                                  "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3525                                                         "  </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3526                                         break;
3527                                 }
3528
3529                         case TIMESTAMPOID:
3530                         case TIMESTAMPTZOID:
3531                                 {
3532                                         const char *tz = (typeoid == TIMESTAMPTZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3533
3534                                         if (typmod == -1)
3535                                                 appendStringInfo(&result,
3536                                                                 "  <xsd:restriction base=\"xsd:dateTime\">\n"
3537                                                                                  "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3538                                                                                  "  </xsd:restriction>\n", tz);
3539                                         else if (typmod == 0)
3540                                                 appendStringInfo(&result,
3541                                                                 "  <xsd:restriction base=\"xsd:dateTime\">\n"
3542                                                                                  "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3543                                                                                  "  </xsd:restriction>\n", tz);
3544                                         else
3545                                                 appendStringInfo(&result,
3546                                                                 "  <xsd:restriction base=\"xsd:dateTime\">\n"
3547                                                                                  "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3548                                                         "  </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3549                                         break;
3550                                 }
3551
3552                         case DATEOID:
3553                                 appendStringInfoString(&result,
3554                                                                         "  <xsd:restriction base=\"xsd:date\">\n"
3555                                                                            "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
3556                                                                            "  </xsd:restriction>\n");
3557                                 break;
3558
3559                         default:
3560                                 if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
3561                                 {
3562                                         Oid                     base_typeoid;
3563                                         int32           base_typmod = -1;
3564
3565                                         base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
3566
3567                                         appendStringInfo(&result,
3568                                                                          "  <xsd:restriction base=\"%s\"/>\n",
3569                                                 map_sql_type_to_xml_name(base_typeoid, base_typmod));
3570                                 }
3571                                 break;
3572                 }
3573                 appendStringInfoString(&result, "</xsd:simpleType>\n");
3574         }
3575
3576         return result.data;
3577 }
3578
3579
3580 /*
3581  * Map an SQL row to an XML element, taking the row from the active
3582  * SPI cursor.  See also SQL/XML:2008 section 9.10.
3583  */
3584 static void
3585 SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
3586                                                   bool nulls, bool tableforest,
3587                                                   const char *targetns, bool top_level)
3588 {
3589         int                     i;
3590         char       *xmltn;
3591
3592         if (tablename)
3593                 xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3594         else
3595         {
3596                 if (tableforest)
3597                         xmltn = "row";
3598                 else
3599                         xmltn = "table";
3600         }
3601
3602         if (tableforest)
3603                 xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
3604         else
3605                 appendStringInfoString(result, "<row>\n");
3606
3607         for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
3608         {
3609                 char       *colname;
3610                 Datum           colval;
3611                 bool            isnull;
3612
3613                 colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
3614                                                                                                  true, false);
3615                 colval = SPI_getbinval(SPI_tuptable->vals[rownum],
3616                                                            SPI_tuptable->tupdesc,
3617                                                            i,
3618                                                            &isnull);
3619                 if (isnull)
3620                 {
3621                         if (nulls)
3622                                 appendStringInfo(result, "  <%s xsi:nil=\"true\"/>\n", colname);
3623                 }
3624                 else
3625                         appendStringInfo(result, "  <%s>%s</%s>\n",
3626                                                          colname,
3627                                                          map_sql_value_to_xml_value(colval,
3628                                                           SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
3629                                                          colname);
3630         }
3631
3632         if (tableforest)
3633         {
3634                 xmldata_root_element_end(result, xmltn);
3635                 appendStringInfoChar(result, '\n');
3636         }
3637         else
3638                 appendStringInfoString(result, "</row>\n\n");
3639 }
3640
3641
3642 /*
3643  * XPath related functions
3644  */
3645
3646 #ifdef USE_LIBXML
3647
3648 /*
3649  * Convert XML node to text (dump subtree in case of element,
3650  * return value otherwise)
3651  */
3652 static text *
3653 xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
3654 {
3655         xmltype    *result;
3656
3657         if (cur->type == XML_ELEMENT_NODE)
3658         {
3659                 xmlBufferPtr buf;
3660                 xmlNodePtr      cur_copy;
3661
3662                 buf = xmlBufferCreate();
3663
3664                 /*
3665                  * The result of xmlNodeDump() won't contain namespace definitions
3666                  * from parent nodes, but xmlCopyNode() duplicates a node along with
3667                  * its required namespace definitions.
3668                  */
3669                 cur_copy = xmlCopyNode(cur, 1);
3670
3671                 if (cur_copy == NULL)
3672                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3673                                                 "could not copy node");
3674
3675                 PG_TRY();
3676                 {
3677                         xmlNodeDump(buf, NULL, cur_copy, 0, 1);
3678                         result = xmlBuffer_to_xmltype(buf);
3679                 }
3680                 PG_CATCH();
3681                 {
3682                         xmlFreeNode(cur_copy);
3683                         xmlBufferFree(buf);
3684                         PG_RE_THROW();
3685                 }
3686                 PG_END_TRY();
3687                 xmlFreeNode(cur_copy);
3688                 xmlBufferFree(buf);
3689         }
3690         else
3691         {
3692                 xmlChar    *str;
3693
3694                 str = xmlXPathCastNodeToString(cur);
3695                 PG_TRY();
3696                 {
3697                         /* Here we rely on XML having the same representation as TEXT */
3698                         char       *escaped = escape_xml((char *) str);
3699
3700                         result = (xmltype *) cstring_to_text(escaped);
3701                         pfree(escaped);
3702                 }
3703                 PG_CATCH();
3704                 {
3705                         xmlFree(str);
3706                         PG_RE_THROW();
3707                 }
3708                 PG_END_TRY();
3709                 xmlFree(str);
3710         }
3711
3712         return result;
3713 }
3714
3715 /*
3716  * Convert an XML XPath object (the result of evaluating an XPath expression)
3717  * to an array of xml values, which are appended to astate.  The function
3718  * result value is the number of elements in the array.
3719  *
3720  * If "astate" is NULL then we don't generate the array value, but we still
3721  * return the number of elements it would have had.
3722  *
3723  * Nodesets are converted to an array containing the nodes' textual
3724  * representations.  Primitive values (float, double, string) are converted
3725  * to a single-element array containing the value's string representation.
3726  */
3727 static int
3728 xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
3729                                            ArrayBuildState *astate,
3730                                            PgXmlErrorContext *xmlerrcxt)
3731 {
3732         int                     result = 0;
3733         Datum           datum;
3734         Oid                     datumtype;
3735         char       *result_str;
3736
3737         switch (xpathobj->type)
3738         {
3739                 case XPATH_NODESET:
3740                         if (xpathobj->nodesetval != NULL)
3741                         {
3742                                 result = xpathobj->nodesetval->nodeNr;
3743                                 if (astate != NULL)
3744                                 {
3745                                         int                     i;
3746
3747                                         for (i = 0; i < result; i++)
3748                                         {
3749                                                 datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
3750                                                                                                                                  xmlerrcxt));
3751                                                 (void) accumArrayResult(astate, datum, false,
3752                                                                                                 XMLOID, CurrentMemoryContext);
3753                                         }
3754                                 }
3755                         }
3756                         return result;
3757
3758                 case XPATH_BOOLEAN:
3759                         if (astate == NULL)
3760                                 return 1;
3761                         datum = BoolGetDatum(xpathobj->boolval);
3762                         datumtype = BOOLOID;
3763                         break;
3764
3765                 case XPATH_NUMBER:
3766                         if (astate == NULL)
3767                                 return 1;
3768                         datum = Float8GetDatum(xpathobj->floatval);
3769                         datumtype = FLOAT8OID;
3770                         break;
3771
3772                 case XPATH_STRING:
3773                         if (astate == NULL)
3774                                 return 1;
3775                         datum = CStringGetDatum((char *) xpathobj->stringval);
3776                         datumtype = CSTRINGOID;
3777                         break;
3778
3779                 default:
3780                         elog(ERROR, "xpath expression result type %d is unsupported",
3781                                  xpathobj->type);
3782                         return 0;                       /* keep compiler quiet */
3783         }
3784
3785         /* Common code for scalar-value cases */
3786         result_str = map_sql_value_to_xml_value(datum, datumtype, true);
3787         datum = PointerGetDatum(cstring_to_xmltype(result_str));
3788         (void) accumArrayResult(astate, datum, false,
3789                                                         XMLOID, CurrentMemoryContext);
3790         return 1;
3791 }
3792
3793
3794 /*
3795  * Common code for xpath() and xmlexists()
3796  *
3797  * Evaluate XPath expression and return number of nodes in res_items
3798  * and array of XML values in astate.  Either of those pointers can be
3799  * NULL if the corresponding result isn't wanted.
3800  *
3801  * It is up to the user to ensure that the XML passed is in fact
3802  * an XML document - XPath doesn't work easily on fragments without
3803  * a context node being known.
3804  */
3805 static void
3806 xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
3807                            int *res_nitems, ArrayBuildState *astate)
3808 {
3809         PgXmlErrorContext *xmlerrcxt;
3810         volatile xmlParserCtxtPtr ctxt = NULL;
3811         volatile xmlDocPtr doc = NULL;
3812         volatile xmlXPathContextPtr xpathctx = NULL;
3813         volatile xmlXPathCompExprPtr xpathcomp = NULL;
3814         volatile xmlXPathObjectPtr xpathobj = NULL;
3815         char       *datastr;
3816         int32           len;
3817         int32           xpath_len;
3818         xmlChar    *string;
3819         xmlChar    *xpath_expr;
3820         int                     i;
3821         int                     ndim;
3822         Datum      *ns_names_uris;
3823         bool       *ns_names_uris_nulls;
3824         int                     ns_count;
3825
3826         /*
3827          * Namespace mappings are passed as text[].  If an empty array is passed
3828          * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
3829          * Else, a 2-dimensional array with length of the second axis being equal
3830          * to 2 should be passed, i.e., every subarray contains 2 elements, the
3831          * first element defining the name, the second one the URI.  Example:
3832          * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
3833          * 'http://example2.com']].
3834          */
3835         ndim = namespaces ? ARR_NDIM(namespaces) : 0;
3836         if (ndim != 0)
3837         {
3838                 int                *dims;
3839
3840                 dims = ARR_DIMS(namespaces);
3841
3842                 if (ndim != 2 || dims[1] != 2)
3843                         ereport(ERROR,
3844                                         (errcode(ERRCODE_DATA_EXCEPTION),
3845                                          errmsg("invalid array for XML namespace mapping"),
3846                                          errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
3847
3848                 Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
3849
3850                 deconstruct_array(namespaces, TEXTOID, -1, false, 'i',
3851                                                   &ns_names_uris, &ns_names_uris_nulls,
3852                                                   &ns_count);
3853
3854                 Assert((ns_count % 2) == 0);    /* checked above */
3855                 ns_count /= 2;                  /* count pairs only */
3856         }
3857         else
3858         {
3859                 ns_names_uris = NULL;
3860                 ns_names_uris_nulls = NULL;
3861                 ns_count = 0;
3862         }
3863
3864         datastr = VARDATA(data);
3865         len = VARSIZE(data) - VARHDRSZ;
3866         xpath_len = VARSIZE(xpath_expr_text) - VARHDRSZ;
3867         if (xpath_len == 0)
3868                 ereport(ERROR,
3869                                 (errcode(ERRCODE_DATA_EXCEPTION),
3870                                  errmsg("empty XPath expression")));
3871
3872         string = pg_xmlCharStrndup(datastr, len);
3873         xpath_expr = pg_xmlCharStrndup(VARDATA(xpath_expr_text), xpath_len);
3874
3875         xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
3876
3877         PG_TRY();
3878         {
3879                 xmlInitParser();
3880
3881                 /*
3882                  * redundant XML parsing (two parsings for the same value during one
3883                  * command execution are possible)
3884                  */
3885                 ctxt = xmlNewParserCtxt();
3886                 if (ctxt == NULL || xmlerrcxt->err_occurred)
3887                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3888                                                 "could not allocate parser context");
3889                 doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
3890                 if (doc == NULL || xmlerrcxt->err_occurred)
3891                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
3892                                                 "could not parse XML document");
3893                 xpathctx = xmlXPathNewContext(doc);
3894                 if (xpathctx == NULL || xmlerrcxt->err_occurred)
3895                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3896                                                 "could not allocate XPath context");
3897                 xpathctx->node = xmlDocGetRootElement(doc);
3898                 if (xpathctx->node == NULL || xmlerrcxt->err_occurred)
3899                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
3900                                                 "could not find root XML element");
3901
3902                 /* register namespaces, if any */
3903                 if (ns_count > 0)
3904                 {
3905                         for (i = 0; i < ns_count; i++)
3906                         {
3907                                 char       *ns_name;
3908                                 char       *ns_uri;
3909
3910                                 if (ns_names_uris_nulls[i * 2] ||
3911                                         ns_names_uris_nulls[i * 2 + 1])
3912                                         ereport(ERROR,
3913                                                         (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
3914                                           errmsg("neither namespace name nor URI may be null")));
3915                                 ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
3916                                 ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
3917                                 if (xmlXPathRegisterNs(xpathctx,
3918                                                                            (xmlChar *) ns_name,
3919                                                                            (xmlChar *) ns_uri) != 0)
3920                                         ereport(ERROR,          /* is this an internal error??? */
3921                                                         (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
3922                                                                         ns_name, ns_uri)));
3923                         }
3924                 }
3925
3926                 xpathcomp = xmlXPathCompile(xpath_expr);
3927                 if (xpathcomp == NULL || xmlerrcxt->err_occurred)
3928                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
3929                                                 "invalid XPath expression");
3930
3931                 /*
3932                  * Version 2.6.27 introduces a function named
3933                  * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
3934                  * but we can derive the existence by whether any nodes are returned,
3935                  * thereby preventing a library version upgrade and keeping the code
3936                  * the same.
3937                  */
3938                 xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
3939                 if (xpathobj == NULL || xmlerrcxt->err_occurred)
3940                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
3941                                                 "could not create XPath object");
3942
3943                 /*
3944                  * Extract the results as requested.
3945                  */
3946                 if (res_nitems != NULL)
3947                         *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
3948                 else
3949                         (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
3950         }
3951         PG_CATCH();
3952         {
3953                 if (xpathobj)
3954                         xmlXPathFreeObject(xpathobj);
3955                 if (xpathcomp)
3956                         xmlXPathFreeCompExpr(xpathcomp);
3957                 if (xpathctx)
3958                         xmlXPathFreeContext(xpathctx);
3959                 if (doc)
3960                         xmlFreeDoc(doc);
3961                 if (ctxt)
3962                         xmlFreeParserCtxt(ctxt);
3963
3964                 pg_xml_done(xmlerrcxt, true);
3965
3966                 PG_RE_THROW();
3967         }
3968         PG_END_TRY();
3969
3970         xmlXPathFreeObject(xpathobj);
3971         xmlXPathFreeCompExpr(xpathcomp);
3972         xmlXPathFreeContext(xpathctx);
3973         xmlFreeDoc(doc);
3974         xmlFreeParserCtxt(ctxt);
3975
3976         pg_xml_done(xmlerrcxt, false);
3977 }
3978 #endif   /* USE_LIBXML */
3979
3980 /*
3981  * Evaluate XPath expression and return array of XML values.
3982  *
3983  * As we have no support of XQuery sequences yet, this function seems
3984  * to be the most useful one (array of XML functions plays a role of
3985  * some kind of substitution for XQuery sequences).
3986  */
3987 Datum
3988 xpath(PG_FUNCTION_ARGS)
3989 {
3990 #ifdef USE_LIBXML
3991         text       *xpath_expr_text = PG_GETARG_TEXT_P(0);
3992         xmltype    *data = PG_GETARG_XML_P(1);
3993         ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
3994         ArrayBuildState *astate;
3995
3996         astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
3997         xpath_internal(xpath_expr_text, data, namespaces,
3998                                    NULL, astate);
3999         PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
4000 #else
4001         NO_XML_SUPPORT();
4002         return 0;
4003 #endif
4004 }
4005
4006 /*
4007  * Determines if the node specified by the supplied XPath exists
4008  * in a given XML document, returning a boolean.
4009  */
4010 Datum
4011 xmlexists(PG_FUNCTION_ARGS)
4012 {
4013 #ifdef USE_LIBXML
4014         text       *xpath_expr_text = PG_GETARG_TEXT_P(0);
4015         xmltype    *data = PG_GETARG_XML_P(1);
4016         int                     res_nitems;
4017
4018         xpath_internal(xpath_expr_text, data, NULL,
4019                                    &res_nitems, NULL);
4020
4021         PG_RETURN_BOOL(res_nitems > 0);
4022 #else
4023         NO_XML_SUPPORT();
4024         return 0;
4025 #endif
4026 }
4027
4028 /*
4029  * Determines if the node specified by the supplied XPath exists
4030  * in a given XML document, returning a boolean. Differs from
4031  * xmlexists as it supports namespaces and is not defined in SQL/XML.
4032  */
4033 Datum
4034 xpath_exists(PG_FUNCTION_ARGS)
4035 {
4036 #ifdef USE_LIBXML
4037         text       *xpath_expr_text = PG_GETARG_TEXT_P(0);
4038         xmltype    *data = PG_GETARG_XML_P(1);
4039         ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4040         int                     res_nitems;
4041
4042         xpath_internal(xpath_expr_text, data, namespaces,
4043                                    &res_nitems, NULL);
4044
4045         PG_RETURN_BOOL(res_nitems > 0);
4046 #else
4047         NO_XML_SUPPORT();
4048         return 0;
4049 #endif
4050 }
4051
4052 /*
4053  * Functions for checking well-formed-ness
4054  */
4055
4056 #ifdef USE_LIBXML
4057 static bool
4058 wellformed_xml(text *data, XmlOptionType xmloption_arg)
4059 {
4060         bool            result;
4061         volatile xmlDocPtr doc = NULL;
4062
4063         /* We want to catch any exceptions and return false */
4064         PG_TRY();
4065         {
4066                 doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding());
4067                 result = true;
4068         }
4069         PG_CATCH();
4070         {
4071                 FlushErrorState();
4072                 result = false;
4073         }
4074         PG_END_TRY();
4075
4076         if (doc)
4077                 xmlFreeDoc(doc);
4078
4079         return result;
4080 }
4081 #endif
4082
4083 Datum
4084 xml_is_well_formed(PG_FUNCTION_ARGS)
4085 {
4086 #ifdef USE_LIBXML
4087         text       *data = PG_GETARG_TEXT_P(0);
4088
4089         PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4090 #else
4091         NO_XML_SUPPORT();
4092         return 0;
4093 #endif   /* not USE_LIBXML */
4094 }
4095
4096 Datum
4097 xml_is_well_formed_document(PG_FUNCTION_ARGS)
4098 {
4099 #ifdef USE_LIBXML
4100         text       *data = PG_GETARG_TEXT_P(0);
4101
4102         PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4103 #else
4104         NO_XML_SUPPORT();
4105         return 0;
4106 #endif   /* not USE_LIBXML */
4107 }
4108
4109 Datum
4110 xml_is_well_formed_content(PG_FUNCTION_ARGS)
4111 {
4112 #ifdef USE_LIBXML
4113         text       *data = PG_GETARG_TEXT_P(0);
4114
4115         PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4116 #else
4117         NO_XML_SUPPORT();
4118         return 0;
4119 #endif   /* not USE_LIBXML */
4120 }
4121
4122 /*
4123  * support functions for XMLTABLE
4124  *
4125  */
4126 #ifdef USE_LIBXML
4127
4128 /*
4129  * Returns private data from executor state. Ensure validity by check with
4130  * MAGIC number.
4131  */
4132 static inline XmlTableBuilderData *
4133 GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4134 {
4135         XmlTableBuilderData *result;
4136
4137         if (!IsA(state, TableFuncScanState))
4138                 elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4139         result = (XmlTableBuilderData *) state->opaque;
4140         if (result->magic != XMLTABLE_CONTEXT_MAGIC)
4141                 elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4142
4143         return result;
4144 }
4145 #endif
4146
4147 /*
4148  * XmlTableInitOpaque
4149  *              Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4150  *              the XML parser.
4151  *
4152  * Note: Because we call pg_xml_init() here and pg_xml_done() in
4153  * XmlTableDestroyOpaque, it is critical for robustness that no other
4154  * executor nodes run until this node is processed to completion.  Caller
4155  * must execute this to completion (probably filling a tuplestore to exhaust
4156  * this node in a single pass) instead of using row-per-call mode.
4157  */
4158 static void
4159 XmlTableInitOpaque(TableFuncScanState *state, int natts)
4160 {
4161 #ifdef USE_LIBXML
4162         volatile xmlParserCtxtPtr ctxt = NULL;
4163         XmlTableBuilderData *xtCxt;
4164         PgXmlErrorContext *xmlerrcxt;
4165
4166         xtCxt = palloc0(sizeof(XmlTableBuilderData));
4167         xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4168         xtCxt->natts = natts;
4169         xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
4170
4171         xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4172
4173         PG_TRY();
4174         {
4175                 xmlInitParser();
4176
4177                 ctxt = xmlNewParserCtxt();
4178                 if (ctxt == NULL || xmlerrcxt->err_occurred)
4179                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4180                                                 "could not allocate parser context");
4181         }
4182         PG_CATCH();
4183         {
4184                 if (ctxt != NULL)
4185                         xmlFreeParserCtxt(ctxt);
4186
4187                 pg_xml_done(xmlerrcxt, true);
4188
4189                 PG_RE_THROW();
4190         }
4191         PG_END_TRY();
4192
4193         xtCxt->xmlerrcxt = xmlerrcxt;
4194         xtCxt->ctxt = ctxt;
4195
4196         state->opaque = xtCxt;
4197 #else
4198         NO_XML_SUPPORT();
4199 #endif   /* not USE_LIBXML */
4200 }
4201
4202 /*
4203  * XmlTableSetDocument
4204  *              Install the input document
4205  */
4206 static void
4207 XmlTableSetDocument(TableFuncScanState *state, Datum value)
4208 {
4209 #ifdef USE_LIBXML
4210         XmlTableBuilderData *xtCxt;
4211         xmltype    *xmlval = DatumGetXmlP(value);
4212         char       *str;
4213         xmlChar    *xstr;
4214         int                     length;
4215         volatile xmlDocPtr doc = NULL;
4216         volatile xmlXPathContextPtr xpathcxt = NULL;
4217
4218         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
4219
4220         /*
4221          * Use out function for casting to string (remove encoding property). See
4222          * comment in xml_out.
4223          */
4224         str = xml_out_internal(xmlval, 0);
4225
4226         length = strlen(str);
4227         xstr = pg_xmlCharStrndup(str, length);
4228
4229         PG_TRY();
4230         {
4231                 doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4232                 if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
4233                         xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4234                                                 "could not parse XML document");
4235                 xpathcxt = xmlXPathNewContext(doc);
4236                 if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
4237                         xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4238                                                 "could not allocate XPath context");
4239                 xpathcxt->node = xmlDocGetRootElement(doc);
4240                 if (xpathcxt->node == NULL || xtCxt->xmlerrcxt->err_occurred)
4241                         xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4242                                                 "could not find root XML element");
4243         }
4244         PG_CATCH();
4245         {
4246                 if (xpathcxt != NULL)
4247                         xmlXPathFreeContext(xpathcxt);
4248                 if (doc != NULL)
4249                         xmlFreeDoc(doc);
4250
4251                 PG_RE_THROW();
4252         }
4253         PG_END_TRY();
4254
4255         xtCxt->doc = doc;
4256         xtCxt->xpathcxt = xpathcxt;
4257 #else
4258         NO_XML_SUPPORT();
4259 #endif   /* not USE_LIBXML */
4260 }
4261
4262 /*
4263  * XmlTableSetNamespace
4264  *              Add a namespace declaration
4265  */
4266 static void
4267 XmlTableSetNamespace(TableFuncScanState *state, char *name, char *uri)
4268 {
4269 #ifdef USE_LIBXML
4270         XmlTableBuilderData *xtCxt;
4271
4272         if (name == NULL)
4273                 ereport(ERROR,
4274                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4275                                  errmsg("DEFAULT namespace is not supported")));
4276         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4277
4278         if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4279                                                    pg_xmlCharStrndup(name, strlen(name)),
4280                                                    pg_xmlCharStrndup(uri, strlen(uri))))
4281                 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4282                                         "could not set XML namespace");
4283 #else
4284         NO_XML_SUPPORT();
4285 #endif   /* not USE_LIBXML */
4286 }
4287
4288 /*
4289  * XmlTableSetRowFilter
4290  *              Install the row-filter Xpath expression.
4291  */
4292 static void
4293 XmlTableSetRowFilter(TableFuncScanState *state, char *path)
4294 {
4295 #ifdef USE_LIBXML
4296         XmlTableBuilderData *xtCxt;
4297         xmlChar    *xstr;
4298
4299         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4300
4301         if (*path == '\0')
4302                 ereport(ERROR,
4303                                 (errcode(ERRCODE_DATA_EXCEPTION),
4304                                  errmsg("row path filter must not be empty string")));
4305
4306         xstr = pg_xmlCharStrndup(path, strlen(path));
4307
4308         xtCxt->xpathcomp = xmlXPathCompile(xstr);
4309         if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
4310                 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_SYNTAX_ERROR,
4311                                         "invalid XPath expression");
4312 #else
4313         NO_XML_SUPPORT();
4314 #endif   /* not USE_LIBXML */
4315 }
4316
4317 /*
4318  * XmlTableSetColumnFilter
4319  *              Install the column-filter Xpath expression, for the given column.
4320  */
4321 static void
4322 XmlTableSetColumnFilter(TableFuncScanState *state, char *path, int colnum)
4323 {
4324 #ifdef USE_LIBXML
4325         XmlTableBuilderData *xtCxt;
4326         xmlChar    *xstr;
4327
4328         AssertArg(PointerIsValid(path));
4329
4330         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4331
4332         if (*path == '\0')
4333                 ereport(ERROR,
4334                                 (errcode(ERRCODE_DATA_EXCEPTION),
4335                                  errmsg("column path filter must not be empty string")));
4336
4337         xstr = pg_xmlCharStrndup(path, strlen(path));
4338
4339         xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr);
4340         if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
4341                 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4342                                         "invalid XPath expression");
4343 #else
4344         NO_XML_SUPPORT();
4345 #endif   /* not USE_LIBXML */
4346 }
4347
4348 /*
4349  * XmlTableFetchRow
4350  *              Prepare the next "current" tuple for upcoming GetValue calls.
4351  *              Returns FALSE if the row-filter expression returned no more rows.
4352  */
4353 static bool
4354 XmlTableFetchRow(TableFuncScanState *state)
4355 {
4356 #ifdef USE_LIBXML
4357         XmlTableBuilderData *xtCxt;
4358
4359         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4360
4361         /*
4362          * XmlTable returns table - set of composite values. The error context, is
4363          * used for producement more values, between two calls, there can be
4364          * created and used another libxml2 error context. It is libxml2 global
4365          * value, so it should be refreshed any time before any libxml2 usage,
4366          * that is finished by returning some value.
4367          */
4368         xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4369
4370         if (xtCxt->xpathobj == NULL)
4371         {
4372                 xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4373                 if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4374                         xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4375                                                 "could not create XPath object");
4376
4377                 xtCxt->row_count = 0;
4378         }
4379
4380         if (xtCxt->xpathobj->type == XPATH_NODESET)
4381         {
4382                 if (xtCxt->xpathobj->nodesetval != NULL)
4383                 {
4384                         if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
4385                                 return true;
4386                 }
4387         }
4388
4389         return false;
4390 #else
4391         NO_XML_SUPPORT();
4392         return false;
4393 #endif   /* not USE_LIBXML */
4394 }
4395
4396 /*
4397  * XmlTableGetValue
4398  *              Return the value for column number 'colnum' for the current row.  If
4399  *              column -1 is requested, return representation of the whole row.
4400  *
4401  * This leaks memory, so be sure to reset often the context in which it's
4402  * called.
4403  */
4404 static Datum
4405 XmlTableGetValue(TableFuncScanState *state, int colnum,
4406                                  Oid typid, int32 typmod, bool *isnull)
4407 {
4408 #ifdef USE_LIBXML
4409         XmlTableBuilderData *xtCxt;
4410         Datum           result = (Datum) 0;
4411         xmlNodePtr      cur;
4412         char       *cstr = NULL;
4413         volatile xmlXPathObjectPtr xpathobj = NULL;
4414
4415         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4416
4417         Assert(xtCxt->xpathobj &&
4418                    xtCxt->xpathobj->type == XPATH_NODESET &&
4419                    xtCxt->xpathobj->nodesetval != NULL);
4420
4421         /* Propagate context related error context to libxml2 */
4422         xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4423
4424         *isnull = false;
4425
4426         cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
4427
4428         Assert(xtCxt->xpathscomp[colnum] != NULL);
4429
4430         PG_TRY();
4431         {
4432                 /* Set current node as entry point for XPath evaluation */
4433                 xtCxt->xpathcxt->node = cur;
4434
4435                 /* Evaluate column path */
4436                 xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
4437                 if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4438                         xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4439                                                 "could not create XPath object");
4440
4441                 /*
4442                  * There are four possible cases, depending on the number of nodes
4443                  * returned by the XPath expression and the type of the target column:
4444                  * a) XPath returns no nodes.  b) One node is returned, and column is
4445                  * of type XML.  c) One node, column type other than XML.  d) Multiple
4446                  * nodes are returned.
4447                  */
4448                 if (xpathobj->type == XPATH_NODESET)
4449                 {
4450                         int                     count = 0;
4451
4452                         if (xpathobj->nodesetval != NULL)
4453                                 count = xpathobj->nodesetval->nodeNr;
4454
4455                         if (xpathobj->nodesetval == NULL || count == 0)
4456                         {
4457                                 *isnull = true;
4458                         }
4459                         else if (count == 1 && typid == XMLOID)
4460                         {
4461                                 text       *textstr;
4462
4463                                 /* simple case, result is one value */
4464                                 textstr = xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[0],
4465                                                                                            xtCxt->xmlerrcxt);
4466                                 cstr = text_to_cstring(textstr);
4467                         }
4468                         else if (count == 1)
4469                         {
4470                                 xmlChar    *str;
4471
4472                                 str = xmlNodeListGetString(xtCxt->doc,
4473                                                    xpathobj->nodesetval->nodeTab[0]->xmlChildrenNode,
4474                                                                                    1);
4475
4476                                 if (str != NULL)
4477                                 {
4478                                         PG_TRY();
4479                                         {
4480                                                 cstr = pstrdup((char *) str);
4481                                         }
4482                                         PG_CATCH();
4483                                         {
4484                                                 xmlFree(str);
4485                                                 PG_RE_THROW();
4486                                         }
4487                                         PG_END_TRY();
4488                                         xmlFree(str);
4489                                 }
4490                                 else
4491                                 {
4492                                         /*
4493                                          * This line ensure mapping of empty tags to PostgreSQL
4494                                          * value. Usually we would to map a empty tag to empty
4495                                          * string. But this mapping can create empty string when
4496                                          * user doesn't expect it - when empty tag is enforced
4497                                          * by libxml2 - when user uses a text() function for
4498                                          * example.
4499                                          */
4500                                         cstr = "";
4501                                 }
4502                         }
4503                         else
4504                         {
4505                                 StringInfoData str;
4506                                 int                     i;
4507
4508                                 Assert(count > 1);
4509
4510                                 /*
4511                                  * When evaluating the XPath expression returns multiple
4512                                  * nodes, the result is the concatenation of them all. The
4513                                  * target type must be XML.
4514                                  */
4515                                 if (typid != XMLOID)
4516                                         ereport(ERROR,
4517                                                         (errcode(ERRCODE_CARDINALITY_VIOLATION),
4518                                                          errmsg("more than one value returned by column XPath expression")));
4519
4520                                 /* Concatenate serialized values */
4521                                 initStringInfo(&str);
4522                                 for (i = 0; i < count; i++)
4523                                 {
4524                                         appendStringInfoText(&str,
4525                                            xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4526                                                                                         xtCxt->xmlerrcxt));
4527                                 }
4528                                 cstr = str.data;
4529                         }
4530                 }
4531                 else if (xpathobj->type == XPATH_STRING)
4532                 {
4533                         cstr = (char *) xpathobj->stringval;
4534                 }
4535                 else
4536                         elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
4537
4538                 /*
4539                  * By here, either cstr contains the result value, or the isnull flag
4540                  * has been set.
4541                  */
4542                 Assert(cstr || *isnull);
4543
4544                 if (!*isnull)
4545                         result = InputFunctionCall(&state->in_functions[colnum],
4546                                                                            cstr,
4547                                                                            state->typioparams[colnum],
4548                                                                            typmod);
4549         }
4550         PG_CATCH();
4551         {
4552                 if (xpathobj != NULL)
4553                         xmlXPathFreeObject(xpathobj);
4554                 PG_RE_THROW();
4555         }
4556         PG_END_TRY();
4557
4558         xmlXPathFreeObject(xpathobj);
4559
4560         return result;
4561 #else
4562         NO_XML_SUPPORT();
4563         return 0;
4564 #endif   /* not USE_LIBXML */
4565 }
4566
4567 /*
4568  * XmlTableDestroyOpaque
4569  *              Release all libxml2 resources
4570  */
4571 static void
4572 XmlTableDestroyOpaque(TableFuncScanState *state)
4573 {
4574 #ifdef USE_LIBXML
4575         XmlTableBuilderData *xtCxt;
4576
4577         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
4578
4579         /* Propagate context related error context to libxml2 */
4580         xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4581
4582         if (xtCxt->xpathscomp != NULL)
4583         {
4584                 int                     i;
4585
4586                 for (i = 0; i < xtCxt->natts; i++)
4587                         if (xtCxt->xpathscomp[i] != NULL)
4588                                 xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
4589         }
4590
4591         if (xtCxt->xpathobj != NULL)
4592                 xmlXPathFreeObject(xtCxt->xpathobj);
4593         if (xtCxt->xpathcomp != NULL)
4594                 xmlXPathFreeCompExpr(xtCxt->xpathcomp);
4595         if (xtCxt->xpathcxt != NULL)
4596                 xmlXPathFreeContext(xtCxt->xpathcxt);
4597         if (xtCxt->doc != NULL)
4598                 xmlFreeDoc(xtCxt->doc);
4599         if (xtCxt->ctxt != NULL)
4600                 xmlFreeParserCtxt(xtCxt->ctxt);
4601
4602         pg_xml_done(xtCxt->xmlerrcxt, true);
4603
4604         /* not valid anymore */
4605         xtCxt->magic = 0;
4606         state->opaque = NULL;
4607
4608 #else
4609         NO_XML_SUPPORT();
4610 #endif   /* not USE_LIBXML */
4611 }