]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/xml.c
Fix initialization of fake LSN for unlogged relations
[postgresql] / src / backend / utils / adt / xml.c
1 /*-------------------------------------------------------------------------
2  *
3  * xml.c
4  *        XML data type support.
5  *
6  *
7  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/backend/utils/adt/xml.c
11  *
12  *-------------------------------------------------------------------------
13  */
14
15 /*
16  * Generally, XML type support is only available when libxml use was
17  * configured during the build.  But even if that is not done, the
18  * type and all the functions are available, but most of them will
19  * fail.  For one thing, this avoids having to manage variant catalog
20  * installations.  But it also has nice effects such as that you can
21  * dump a database containing XML type data even if the server is not
22  * linked with libxml.  Thus, make sure xml_out() works even if nothing
23  * else does.
24  */
25
26 /*
27  * Notes on memory management:
28  *
29  * Sometimes libxml allocates global structures in the hope that it can reuse
30  * them later on.  This makes it impractical to change the xmlMemSetup
31  * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32  * allocated with malloc() or vice versa.  Since libxml might be used by
33  * loadable modules, eg libperl, our only safe choices are to change the
34  * functions at postmaster/backend launch or not at all.  Since we'd rather
35  * not activate libxml in sessions that might never use it, the latter choice
36  * is the preferred one.  However, for debugging purposes it can be awfully
37  * handy to constrain libxml's allocations to be done in a specific palloc
38  * context, where they're easy to track.  Therefore there is code here that
39  * can be enabled in debug builds to redirect libxml's allocations into a
40  * special context LibxmlContext.  It's not recommended to turn this on in
41  * a production build because of the possibility of bad interactions with
42  * external modules.
43  */
44 /* #define USE_LIBXMLCONTEXT */
45
46 #include "postgres.h"
47
48 #ifdef USE_LIBXML
49 #include <libxml/chvalid.h>
50 #include <libxml/parser.h>
51 #include <libxml/parserInternals.h>
52 #include <libxml/tree.h>
53 #include <libxml/uri.h>
54 #include <libxml/xmlerror.h>
55 #include <libxml/xmlversion.h>
56 #include <libxml/xmlwriter.h>
57 #include <libxml/xpath.h>
58 #include <libxml/xpathInternals.h>
59
60 /*
61  * We used to check for xmlStructuredErrorContext via a configure test; but
62  * that doesn't work on Windows, so instead use this grottier method of
63  * testing the library version number.
64  */
65 #if LIBXML_VERSION >= 20704
66 #define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
67 #endif
68 #endif                                                  /* USE_LIBXML */
69
70 #include "access/htup_details.h"
71 #include "access/table.h"
72 #include "catalog/namespace.h"
73 #include "catalog/pg_class.h"
74 #include "catalog/pg_type.h"
75 #include "commands/dbcommands.h"
76 #include "executor/spi.h"
77 #include "executor/tablefunc.h"
78 #include "fmgr.h"
79 #include "lib/stringinfo.h"
80 #include "libpq/pqformat.h"
81 #include "mb/pg_wchar.h"
82 #include "miscadmin.h"
83 #include "nodes/execnodes.h"
84 #include "nodes/nodeFuncs.h"
85 #include "utils/array.h"
86 #include "utils/builtins.h"
87 #include "utils/date.h"
88 #include "utils/datetime.h"
89 #include "utils/lsyscache.h"
90 #include "utils/memutils.h"
91 #include "utils/rel.h"
92 #include "utils/syscache.h"
93 #include "utils/xml.h"
94
95
96 /* GUC variables */
97 int                     xmlbinary;
98 int                     xmloption;
99
100 #ifdef USE_LIBXML
101
102 /* random number to identify PgXmlErrorContext */
103 #define ERRCXT_MAGIC    68275028
104
105 struct PgXmlErrorContext
106 {
107         int                     magic;
108         /* strictness argument passed to pg_xml_init */
109         PgXmlStrictness strictness;
110         /* current error status and accumulated message, if any */
111         bool            err_occurred;
112         StringInfoData err_buf;
113         /* previous libxml error handling state (saved by pg_xml_init) */
114         xmlStructuredErrorFunc saved_errfunc;
115         void       *saved_errcxt;
116         /* previous libxml entity handler (saved by pg_xml_init) */
117         xmlExternalEntityLoader saved_entityfunc;
118 };
119
120 static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
121                                                                                    xmlParserCtxtPtr ctxt);
122 static void xml_errorHandler(void *data, xmlErrorPtr error);
123 static void xml_ereport_by_code(int level, int sqlcode,
124                                                                 const char *msg, int errcode);
125 static void chopStringInfoNewlines(StringInfo str);
126 static void appendStringInfoLineSeparator(StringInfo str);
127
128 #ifdef USE_LIBXMLCONTEXT
129
130 static MemoryContext LibxmlContext = NULL;
131
132 static void xml_memory_init(void);
133 static void *xml_palloc(size_t size);
134 static void *xml_repalloc(void *ptr, size_t size);
135 static void xml_pfree(void *ptr);
136 static char *xml_pstrdup(const char *string);
137 #endif                                                  /* USE_LIBXMLCONTEXT */
138
139 static xmlChar *xml_text2xmlChar(text *in);
140 static int      parse_xml_decl(const xmlChar *str, size_t *lenp,
141                                                    xmlChar **version, xmlChar **encoding, int *standalone);
142 static bool print_xml_decl(StringInfo buf, const xmlChar *version,
143                                                    pg_enc encoding, int standalone);
144 static bool xml_doctype_in_content(const xmlChar *str);
145 static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
146                                                    bool preserve_whitespace, int encoding);
147 static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
148 static int      xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
149                                                                    ArrayBuildState *astate,
150                                                                    PgXmlErrorContext *xmlerrcxt);
151 static xmlChar *pg_xmlCharStrndup(const char *str, size_t len);
152 #endif                                                  /* USE_LIBXML */
153
154 static void xmldata_root_element_start(StringInfo result, const char *eltname,
155                                                                            const char *xmlschema, const char *targetns,
156                                                                            bool top_level);
157 static void xmldata_root_element_end(StringInfo result, const char *eltname);
158 static StringInfo query_to_xml_internal(const char *query, char *tablename,
159                                                                                 const char *xmlschema, bool nulls, bool tableforest,
160                                                                                 const char *targetns, bool top_level);
161 static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
162                                                                                           bool nulls, bool tableforest, const char *targetns);
163 static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
164                                                                                                          List *relid_list, bool nulls,
165                                                                                                          bool tableforest, const char *targetns);
166 static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
167                                                                                                           bool nulls, bool tableforest,
168                                                                                                           const char *targetns);
169 static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
170 static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
171 static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
172 static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
173                                                                           char *tablename, bool nulls, bool tableforest,
174                                                                           const char *targetns, bool top_level);
175
176 /* XMLTABLE support */
177 #ifdef USE_LIBXML
178 /* random number to identify XmlTableContext */
179 #define XMLTABLE_CONTEXT_MAGIC  46922182
180 typedef struct XmlTableBuilderData
181 {
182         int                     magic;
183         int                     natts;
184         long int        row_count;
185         PgXmlErrorContext *xmlerrcxt;
186         xmlParserCtxtPtr ctxt;
187         xmlDocPtr       doc;
188         xmlXPathContextPtr xpathcxt;
189         xmlXPathCompExprPtr xpathcomp;
190         xmlXPathObjectPtr xpathobj;
191         xmlXPathCompExprPtr *xpathscomp;
192 } XmlTableBuilderData;
193 #endif
194
195 static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
196 static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
197 static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name,
198                                                                  const char *uri);
199 static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path);
200 static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
201                                                                         const char *path, int colnum);
202 static bool XmlTableFetchRow(struct TableFuncScanState *state);
203 static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
204                                                           Oid typid, int32 typmod, bool *isnull);
205 static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
206
207 const TableFuncRoutine XmlTableRoutine =
208 {
209         XmlTableInitOpaque,
210         XmlTableSetDocument,
211         XmlTableSetNamespace,
212         XmlTableSetRowFilter,
213         XmlTableSetColumnFilter,
214         XmlTableFetchRow,
215         XmlTableGetValue,
216         XmlTableDestroyOpaque
217 };
218
219 #define NO_XML_SUPPORT() \
220         ereport(ERROR, \
221                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
222                          errmsg("unsupported XML feature"), \
223                          errdetail("This functionality requires the server to be built with libxml support."), \
224                          errhint("You need to rebuild PostgreSQL using --with-libxml.")))
225
226
227 /* from SQL/XML:2008 section 4.9 */
228 #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
229 #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
230 #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
231
232
233 #ifdef USE_LIBXML
234
235 static int
236 xmlChar_to_encoding(const xmlChar *encoding_name)
237 {
238         int                     encoding = pg_char_to_encoding((const char *) encoding_name);
239
240         if (encoding < 0)
241                 ereport(ERROR,
242                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
243                                  errmsg("invalid encoding name \"%s\"",
244                                                 (const char *) encoding_name)));
245         return encoding;
246 }
247 #endif
248
249
250 /*
251  * xml_in uses a plain C string to VARDATA conversion, so for the time being
252  * we use the conversion function for the text datatype.
253  *
254  * This is only acceptable so long as xmltype and text use the same
255  * representation.
256  */
257 Datum
258 xml_in(PG_FUNCTION_ARGS)
259 {
260 #ifdef USE_LIBXML
261         char       *s = PG_GETARG_CSTRING(0);
262         xmltype    *vardata;
263         xmlDocPtr       doc;
264
265         vardata = (xmltype *) cstring_to_text(s);
266
267         /*
268          * Parse the data to check if it is well-formed XML data.  Assume that
269          * ERROR occurred if parsing failed.
270          */
271         doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding());
272         xmlFreeDoc(doc);
273
274         PG_RETURN_XML_P(vardata);
275 #else
276         NO_XML_SUPPORT();
277         return 0;
278 #endif
279 }
280
281
282 #define PG_XML_DEFAULT_VERSION "1.0"
283
284
285 /*
286  * xml_out_internal uses a plain VARDATA to C string conversion, so for the
287  * time being we use the conversion function for the text datatype.
288  *
289  * This is only acceptable so long as xmltype and text use the same
290  * representation.
291  */
292 static char *
293 xml_out_internal(xmltype *x, pg_enc target_encoding)
294 {
295         char       *str = text_to_cstring((text *) x);
296
297 #ifdef USE_LIBXML
298         size_t          len = strlen(str);
299         xmlChar    *version;
300         int                     standalone;
301         int                     res_code;
302
303         if ((res_code = parse_xml_decl((xmlChar *) str,
304                                                                    &len, &version, NULL, &standalone)) == 0)
305         {
306                 StringInfoData buf;
307
308                 initStringInfo(&buf);
309
310                 if (!print_xml_decl(&buf, version, target_encoding, standalone))
311                 {
312                         /*
313                          * If we are not going to produce an XML declaration, eat a single
314                          * newline in the original string to prevent empty first lines in
315                          * the output.
316                          */
317                         if (*(str + len) == '\n')
318                                 len += 1;
319                 }
320                 appendStringInfoString(&buf, str + len);
321
322                 pfree(str);
323
324                 return buf.data;
325         }
326
327         xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
328                                                 "could not parse XML declaration in stored value",
329                                                 res_code);
330 #endif
331         return str;
332 }
333
334
335 Datum
336 xml_out(PG_FUNCTION_ARGS)
337 {
338         xmltype    *x = PG_GETARG_XML_P(0);
339
340         /*
341          * xml_out removes the encoding property in all cases.  This is because we
342          * cannot control from here whether the datum will be converted to a
343          * different client encoding, so we'd do more harm than good by including
344          * it.
345          */
346         PG_RETURN_CSTRING(xml_out_internal(x, 0));
347 }
348
349
350 Datum
351 xml_recv(PG_FUNCTION_ARGS)
352 {
353 #ifdef USE_LIBXML
354         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
355         xmltype    *result;
356         char       *str;
357         char       *newstr;
358         int                     nbytes;
359         xmlDocPtr       doc;
360         xmlChar    *encodingStr = NULL;
361         int                     encoding;
362
363         /*
364          * Read the data in raw format. We don't know yet what the encoding is, as
365          * that information is embedded in the xml declaration; so we have to
366          * parse that before converting to server encoding.
367          */
368         nbytes = buf->len - buf->cursor;
369         str = (char *) pq_getmsgbytes(buf, nbytes);
370
371         /*
372          * We need a null-terminated string to pass to parse_xml_decl().  Rather
373          * than make a separate copy, make the temporary result one byte bigger
374          * than it needs to be.
375          */
376         result = palloc(nbytes + 1 + VARHDRSZ);
377         SET_VARSIZE(result, nbytes + VARHDRSZ);
378         memcpy(VARDATA(result), str, nbytes);
379         str = VARDATA(result);
380         str[nbytes] = '\0';
381
382         parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
383
384         /*
385          * If encoding wasn't explicitly specified in the XML header, treat it as
386          * UTF-8, as that's the default in XML. This is different from xml_in(),
387          * where the input has to go through the normal client to server encoding
388          * conversion.
389          */
390         encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
391
392         /*
393          * Parse the data to check if it is well-formed XML data.  Assume that
394          * xml_parse will throw ERROR if not.
395          */
396         doc = xml_parse(result, xmloption, true, encoding);
397         xmlFreeDoc(doc);
398
399         /* Now that we know what we're dealing with, convert to server encoding */
400         newstr = pg_any_to_server(str, nbytes, encoding);
401
402         if (newstr != str)
403         {
404                 pfree(result);
405                 result = (xmltype *) cstring_to_text(newstr);
406                 pfree(newstr);
407         }
408
409         PG_RETURN_XML_P(result);
410 #else
411         NO_XML_SUPPORT();
412         return 0;
413 #endif
414 }
415
416
417 Datum
418 xml_send(PG_FUNCTION_ARGS)
419 {
420         xmltype    *x = PG_GETARG_XML_P(0);
421         char       *outval;
422         StringInfoData buf;
423
424         /*
425          * xml_out_internal doesn't convert the encoding, it just prints the right
426          * declaration. pq_sendtext will do the conversion.
427          */
428         outval = xml_out_internal(x, pg_get_client_encoding());
429
430         pq_begintypsend(&buf);
431         pq_sendtext(&buf, outval, strlen(outval));
432         pfree(outval);
433         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
434 }
435
436
437 #ifdef USE_LIBXML
438 static void
439 appendStringInfoText(StringInfo str, const text *t)
440 {
441         appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
442 }
443 #endif
444
445
446 static xmltype *
447 stringinfo_to_xmltype(StringInfo buf)
448 {
449         return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
450 }
451
452
453 static xmltype *
454 cstring_to_xmltype(const char *string)
455 {
456         return (xmltype *) cstring_to_text(string);
457 }
458
459
460 #ifdef USE_LIBXML
461 static xmltype *
462 xmlBuffer_to_xmltype(xmlBufferPtr buf)
463 {
464         return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
465                                                                                                 xmlBufferLength(buf));
466 }
467 #endif
468
469
470 Datum
471 xmlcomment(PG_FUNCTION_ARGS)
472 {
473 #ifdef USE_LIBXML
474         text       *arg = PG_GETARG_TEXT_PP(0);
475         char       *argdata = VARDATA_ANY(arg);
476         int                     len = VARSIZE_ANY_EXHDR(arg);
477         StringInfoData buf;
478         int                     i;
479
480         /* check for "--" in string or "-" at the end */
481         for (i = 1; i < len; i++)
482         {
483                 if (argdata[i] == '-' && argdata[i - 1] == '-')
484                         ereport(ERROR,
485                                         (errcode(ERRCODE_INVALID_XML_COMMENT),
486                                          errmsg("invalid XML comment")));
487         }
488         if (len > 0 && argdata[len - 1] == '-')
489                 ereport(ERROR,
490                                 (errcode(ERRCODE_INVALID_XML_COMMENT),
491                                  errmsg("invalid XML comment")));
492
493         initStringInfo(&buf);
494         appendStringInfoString(&buf, "<!--");
495         appendStringInfoText(&buf, arg);
496         appendStringInfoString(&buf, "-->");
497
498         PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
499 #else
500         NO_XML_SUPPORT();
501         return 0;
502 #endif
503 }
504
505
506
507 /*
508  * TODO: xmlconcat needs to merge the notations and unparsed entities
509  * of the argument values.  Not very important in practice, though.
510  */
511 xmltype *
512 xmlconcat(List *args)
513 {
514 #ifdef USE_LIBXML
515         int                     global_standalone = 1;
516         xmlChar    *global_version = NULL;
517         bool            global_version_no_value = false;
518         StringInfoData buf;
519         ListCell   *v;
520
521         initStringInfo(&buf);
522         foreach(v, args)
523         {
524                 xmltype    *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
525                 size_t          len;
526                 xmlChar    *version;
527                 int                     standalone;
528                 char       *str;
529
530                 len = VARSIZE(x) - VARHDRSZ;
531                 str = text_to_cstring((text *) x);
532
533                 parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
534
535                 if (standalone == 0 && global_standalone == 1)
536                         global_standalone = 0;
537                 if (standalone < 0)
538                         global_standalone = -1;
539
540                 if (!version)
541                         global_version_no_value = true;
542                 else if (!global_version)
543                         global_version = version;
544                 else if (xmlStrcmp(version, global_version) != 0)
545                         global_version_no_value = true;
546
547                 appendStringInfoString(&buf, str + len);
548                 pfree(str);
549         }
550
551         if (!global_version_no_value || global_standalone >= 0)
552         {
553                 StringInfoData buf2;
554
555                 initStringInfo(&buf2);
556
557                 print_xml_decl(&buf2,
558                                            (!global_version_no_value) ? global_version : NULL,
559                                            0,
560                                            global_standalone);
561
562                 appendBinaryStringInfo(&buf2, buf.data, buf.len);
563                 buf = buf2;
564         }
565
566         return stringinfo_to_xmltype(&buf);
567 #else
568         NO_XML_SUPPORT();
569         return NULL;
570 #endif
571 }
572
573
574 /*
575  * XMLAGG support
576  */
577 Datum
578 xmlconcat2(PG_FUNCTION_ARGS)
579 {
580         if (PG_ARGISNULL(0))
581         {
582                 if (PG_ARGISNULL(1))
583                         PG_RETURN_NULL();
584                 else
585                         PG_RETURN_XML_P(PG_GETARG_XML_P(1));
586         }
587         else if (PG_ARGISNULL(1))
588                 PG_RETURN_XML_P(PG_GETARG_XML_P(0));
589         else
590                 PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
591                                                                                          PG_GETARG_XML_P(1))));
592 }
593
594
595 Datum
596 texttoxml(PG_FUNCTION_ARGS)
597 {
598         text       *data = PG_GETARG_TEXT_PP(0);
599
600         PG_RETURN_XML_P(xmlparse(data, xmloption, true));
601 }
602
603
604 Datum
605 xmltotext(PG_FUNCTION_ARGS)
606 {
607         xmltype    *data = PG_GETARG_XML_P(0);
608
609         /* It's actually binary compatible. */
610         PG_RETURN_TEXT_P((text *) data);
611 }
612
613
614 text *
615 xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg)
616 {
617         if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data))
618                 ereport(ERROR,
619                                 (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
620                                  errmsg("not an XML document")));
621
622         /* It's actually binary compatible, save for the above check. */
623         return (text *) data;
624 }
625
626
627 xmltype *
628 xmlelement(XmlExpr *xexpr,
629                    Datum *named_argvalue, bool *named_argnull,
630                    Datum *argvalue, bool *argnull)
631 {
632 #ifdef USE_LIBXML
633         xmltype    *result;
634         List       *named_arg_strings;
635         List       *arg_strings;
636         int                     i;
637         ListCell   *arg;
638         ListCell   *narg;
639         PgXmlErrorContext *xmlerrcxt;
640         volatile xmlBufferPtr buf = NULL;
641         volatile xmlTextWriterPtr writer = NULL;
642
643         /*
644          * All arguments are already evaluated, and their values are passed in the
645          * named_argvalue/named_argnull or argvalue/argnull arrays.  This avoids
646          * issues if one of the arguments involves a call to some other function
647          * or subsystem that wants to use libxml on its own terms.  We examine the
648          * original XmlExpr to identify the numbers and types of the arguments.
649          */
650         named_arg_strings = NIL;
651         i = 0;
652         foreach(arg, xexpr->named_args)
653         {
654                 Expr       *e = (Expr *) lfirst(arg);
655                 char       *str;
656
657                 if (named_argnull[i])
658                         str = NULL;
659                 else
660                         str = map_sql_value_to_xml_value(named_argvalue[i],
661                                                                                          exprType((Node *) e),
662                                                                                          false);
663                 named_arg_strings = lappend(named_arg_strings, str);
664                 i++;
665         }
666
667         arg_strings = NIL;
668         i = 0;
669         foreach(arg, xexpr->args)
670         {
671                 Expr       *e = (Expr *) lfirst(arg);
672                 char       *str;
673
674                 /* here we can just forget NULL elements immediately */
675                 if (!argnull[i])
676                 {
677                         str = map_sql_value_to_xml_value(argvalue[i],
678                                                                                          exprType((Node *) e),
679                                                                                          true);
680                         arg_strings = lappend(arg_strings, str);
681                 }
682                 i++;
683         }
684
685         xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
686
687         PG_TRY();
688         {
689                 buf = xmlBufferCreate();
690                 if (buf == NULL || xmlerrcxt->err_occurred)
691                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
692                                                 "could not allocate xmlBuffer");
693                 writer = xmlNewTextWriterMemory(buf, 0);
694                 if (writer == NULL || xmlerrcxt->err_occurred)
695                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
696                                                 "could not allocate xmlTextWriter");
697
698                 xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
699
700                 forboth(arg, named_arg_strings, narg, xexpr->arg_names)
701                 {
702                         char       *str = (char *) lfirst(arg);
703                         char       *argname = strVal(lfirst(narg));
704
705                         if (str)
706                                 xmlTextWriterWriteAttribute(writer,
707                                                                                         (xmlChar *) argname,
708                                                                                         (xmlChar *) str);
709                 }
710
711                 foreach(arg, arg_strings)
712                 {
713                         char       *str = (char *) lfirst(arg);
714
715                         xmlTextWriterWriteRaw(writer, (xmlChar *) str);
716                 }
717
718                 xmlTextWriterEndElement(writer);
719
720                 /* we MUST do this now to flush data out to the buffer ... */
721                 xmlFreeTextWriter(writer);
722                 writer = NULL;
723
724                 result = xmlBuffer_to_xmltype(buf);
725         }
726         PG_CATCH();
727         {
728                 if (writer)
729                         xmlFreeTextWriter(writer);
730                 if (buf)
731                         xmlBufferFree(buf);
732
733                 pg_xml_done(xmlerrcxt, true);
734
735                 PG_RE_THROW();
736         }
737         PG_END_TRY();
738
739         xmlBufferFree(buf);
740
741         pg_xml_done(xmlerrcxt, false);
742
743         return result;
744 #else
745         NO_XML_SUPPORT();
746         return NULL;
747 #endif
748 }
749
750
751 xmltype *
752 xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
753 {
754 #ifdef USE_LIBXML
755         xmlDocPtr       doc;
756
757         doc = xml_parse(data, xmloption_arg, preserve_whitespace,
758                                         GetDatabaseEncoding());
759         xmlFreeDoc(doc);
760
761         return (xmltype *) data;
762 #else
763         NO_XML_SUPPORT();
764         return NULL;
765 #endif
766 }
767
768
769 xmltype *
770 xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null)
771 {
772 #ifdef USE_LIBXML
773         xmltype    *result;
774         StringInfoData buf;
775
776         if (pg_strcasecmp(target, "xml") == 0)
777                 ereport(ERROR,
778                                 (errcode(ERRCODE_SYNTAX_ERROR), /* really */
779                                  errmsg("invalid XML processing instruction"),
780                                  errdetail("XML processing instruction target name cannot be \"%s\".", target)));
781
782         /*
783          * Following the SQL standard, the null check comes after the syntax check
784          * above.
785          */
786         *result_is_null = arg_is_null;
787         if (*result_is_null)
788                 return NULL;
789
790         initStringInfo(&buf);
791
792         appendStringInfo(&buf, "<?%s", target);
793
794         if (arg != NULL)
795         {
796                 char       *string;
797
798                 string = text_to_cstring(arg);
799                 if (strstr(string, "?>") != NULL)
800                         ereport(ERROR,
801                                         (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
802                                          errmsg("invalid XML processing instruction"),
803                                          errdetail("XML processing instruction cannot contain \"?>\".")));
804
805                 appendStringInfoChar(&buf, ' ');
806                 appendStringInfoString(&buf, string + strspn(string, " "));
807                 pfree(string);
808         }
809         appendStringInfoString(&buf, "?>");
810
811         result = stringinfo_to_xmltype(&buf);
812         pfree(buf.data);
813         return result;
814 #else
815         NO_XML_SUPPORT();
816         return NULL;
817 #endif
818 }
819
820
821 xmltype *
822 xmlroot(xmltype *data, text *version, int standalone)
823 {
824 #ifdef USE_LIBXML
825         char       *str;
826         size_t          len;
827         xmlChar    *orig_version;
828         int                     orig_standalone;
829         StringInfoData buf;
830
831         len = VARSIZE(data) - VARHDRSZ;
832         str = text_to_cstring((text *) data);
833
834         parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
835
836         if (version)
837                 orig_version = xml_text2xmlChar(version);
838         else
839                 orig_version = NULL;
840
841         switch (standalone)
842         {
843                 case XML_STANDALONE_YES:
844                         orig_standalone = 1;
845                         break;
846                 case XML_STANDALONE_NO:
847                         orig_standalone = 0;
848                         break;
849                 case XML_STANDALONE_NO_VALUE:
850                         orig_standalone = -1;
851                         break;
852                 case XML_STANDALONE_OMITTED:
853                         /* leave original value */
854                         break;
855         }
856
857         initStringInfo(&buf);
858         print_xml_decl(&buf, orig_version, 0, orig_standalone);
859         appendStringInfoString(&buf, str + len);
860
861         return stringinfo_to_xmltype(&buf);
862 #else
863         NO_XML_SUPPORT();
864         return NULL;
865 #endif
866 }
867
868
869 /*
870  * Validate document (given as string) against DTD (given as external link)
871  *
872  * This has been removed because it is a security hole: unprivileged users
873  * should not be able to use Postgres to fetch arbitrary external files,
874  * which unfortunately is exactly what libxml is willing to do with the DTD
875  * parameter.
876  */
877 Datum
878 xmlvalidate(PG_FUNCTION_ARGS)
879 {
880         ereport(ERROR,
881                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
882                          errmsg("xmlvalidate is not implemented")));
883         return 0;
884 }
885
886
887 bool
888 xml_is_document(xmltype *arg)
889 {
890 #ifdef USE_LIBXML
891         bool            result;
892         volatile xmlDocPtr doc = NULL;
893         MemoryContext ccxt = CurrentMemoryContext;
894
895         /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
896         PG_TRY();
897         {
898                 doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
899                                                 GetDatabaseEncoding());
900                 result = true;
901         }
902         PG_CATCH();
903         {
904                 ErrorData  *errdata;
905                 MemoryContext ecxt;
906
907                 ecxt = MemoryContextSwitchTo(ccxt);
908                 errdata = CopyErrorData();
909                 if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT)
910                 {
911                         FlushErrorState();
912                         result = false;
913                 }
914                 else
915                 {
916                         MemoryContextSwitchTo(ecxt);
917                         PG_RE_THROW();
918                 }
919         }
920         PG_END_TRY();
921
922         if (doc)
923                 xmlFreeDoc(doc);
924
925         return result;
926 #else                                                   /* not USE_LIBXML */
927         NO_XML_SUPPORT();
928         return false;
929 #endif                                                  /* not USE_LIBXML */
930 }
931
932
933 #ifdef USE_LIBXML
934
935 /*
936  * pg_xml_init_library --- set up for use of libxml
937  *
938  * This should be called by each function that is about to use libxml
939  * facilities but doesn't require error handling.  It initializes libxml
940  * and verifies compatibility with the loaded libxml version.  These are
941  * once-per-session activities.
942  *
943  * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
944  * check)
945  */
946 void
947 pg_xml_init_library(void)
948 {
949         static bool first_time = true;
950
951         if (first_time)
952         {
953                 /* Stuff we need do only once per session */
954
955                 /*
956                  * Currently, we have no pure UTF-8 support for internals -- check if
957                  * we can work.
958                  */
959                 if (sizeof(char) != sizeof(xmlChar))
960                         ereport(ERROR,
961                                         (errmsg("could not initialize XML library"),
962                                          errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
963                                                            (int) sizeof(char), (int) sizeof(xmlChar))));
964
965 #ifdef USE_LIBXMLCONTEXT
966                 /* Set up libxml's memory allocation our way */
967                 xml_memory_init();
968 #endif
969
970                 /* Check library compatibility */
971                 LIBXML_TEST_VERSION;
972
973                 first_time = false;
974         }
975 }
976
977 /*
978  * pg_xml_init --- set up for use of libxml and register an error handler
979  *
980  * This should be called by each function that is about to use libxml
981  * facilities and requires error handling.  It initializes libxml with
982  * pg_xml_init_library() and establishes our libxml error handler.
983  *
984  * strictness determines which errors are reported and which are ignored.
985  *
986  * Calls to this function MUST be followed by a PG_TRY block that guarantees
987  * that pg_xml_done() is called during either normal or error exit.
988  *
989  * This is exported for use by contrib/xml2, as well as other code that might
990  * wish to share use of this module's libxml error handler.
991  */
992 PgXmlErrorContext *
993 pg_xml_init(PgXmlStrictness strictness)
994 {
995         PgXmlErrorContext *errcxt;
996         void       *new_errcxt;
997
998         /* Do one-time setup if needed */
999         pg_xml_init_library();
1000
1001         /* Create error handling context structure */
1002         errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
1003         errcxt->magic = ERRCXT_MAGIC;
1004         errcxt->strictness = strictness;
1005         errcxt->err_occurred = false;
1006         initStringInfo(&errcxt->err_buf);
1007
1008         /*
1009          * Save original error handler and install ours. libxml originally didn't
1010          * distinguish between the contexts for generic and for structured error
1011          * handlers.  If we're using an old libxml version, we must thus save the
1012          * generic error context, even though we're using a structured error
1013          * handler.
1014          */
1015         errcxt->saved_errfunc = xmlStructuredError;
1016
1017 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1018         errcxt->saved_errcxt = xmlStructuredErrorContext;
1019 #else
1020         errcxt->saved_errcxt = xmlGenericErrorContext;
1021 #endif
1022
1023         xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler);
1024
1025         /*
1026          * Verify that xmlSetStructuredErrorFunc set the context variable we
1027          * expected it to.  If not, the error context pointer we just saved is not
1028          * the correct thing to restore, and since that leaves us without a way to
1029          * restore the context in pg_xml_done, we must fail.
1030          *
1031          * The only known situation in which this test fails is if we compile with
1032          * headers from a libxml2 that doesn't track the structured error context
1033          * separately (< 2.7.4), but at runtime use a version that does, or vice
1034          * versa.  The libxml2 authors did not treat that change as constituting
1035          * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1036          * fails to protect us from this.
1037          */
1038
1039 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1040         new_errcxt = xmlStructuredErrorContext;
1041 #else
1042         new_errcxt = xmlGenericErrorContext;
1043 #endif
1044
1045         if (new_errcxt != (void *) errcxt)
1046                 ereport(ERROR,
1047                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1048                                  errmsg("could not set up XML error handler"),
1049                                  errhint("This probably indicates that the version of libxml2"
1050                                                  " being used is not compatible with the libxml2"
1051                                                  " header files that PostgreSQL was built with.")));
1052
1053         /*
1054          * Also, install an entity loader to prevent unwanted fetches of external
1055          * files and URLs.
1056          */
1057         errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1058         xmlSetExternalEntityLoader(xmlPgEntityLoader);
1059
1060         return errcxt;
1061 }
1062
1063
1064 /*
1065  * pg_xml_done --- restore previous libxml error handling
1066  *
1067  * Resets libxml's global error-handling state to what it was before
1068  * pg_xml_init() was called.
1069  *
1070  * This routine verifies that all pending errors have been dealt with
1071  * (in assert-enabled builds, anyway).
1072  */
1073 void
1074 pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1075 {
1076         void       *cur_errcxt;
1077
1078         /* An assert seems like enough protection here */
1079         Assert(errcxt->magic == ERRCXT_MAGIC);
1080
1081         /*
1082          * In a normal exit, there should be no un-handled libxml errors.  But we
1083          * shouldn't try to enforce this during error recovery, since the longjmp
1084          * could have been thrown before xml_ereport had a chance to run.
1085          */
1086         Assert(!errcxt->err_occurred || isError);
1087
1088         /*
1089          * Check that libxml's global state is correct, warn if not.  This is a
1090          * real test and not an Assert because it has a higher probability of
1091          * happening.
1092          */
1093 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1094         cur_errcxt = xmlStructuredErrorContext;
1095 #else
1096         cur_errcxt = xmlGenericErrorContext;
1097 #endif
1098
1099         if (cur_errcxt != (void *) errcxt)
1100                 elog(WARNING, "libxml error handling state is out of sync with xml.c");
1101
1102         /* Restore the saved handlers */
1103         xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1104         xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1105
1106         /*
1107          * Mark the struct as invalid, just in case somebody somehow manages to
1108          * call xml_errorHandler or xml_ereport with it.
1109          */
1110         errcxt->magic = 0;
1111
1112         /* Release memory */
1113         pfree(errcxt->err_buf.data);
1114         pfree(errcxt);
1115 }
1116
1117
1118 /*
1119  * pg_xml_error_occurred() --- test the error flag
1120  */
1121 bool
1122 pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1123 {
1124         return errcxt->err_occurred;
1125 }
1126
1127
1128 /*
1129  * SQL/XML allows storing "XML documents" or "XML content".  "XML
1130  * documents" are specified by the XML specification and are parsed
1131  * easily by libxml.  "XML content" is specified by SQL/XML as the
1132  * production "XMLDecl? content".  But libxml can only parse the
1133  * "content" part, so we have to parse the XML declaration ourselves
1134  * to complete this.
1135  */
1136
1137 #define CHECK_XML_SPACE(p) \
1138         do { \
1139                 if (!xmlIsBlank_ch(*(p))) \
1140                         return XML_ERR_SPACE_REQUIRED; \
1141         } while (0)
1142
1143 #define SKIP_XML_SPACE(p) \
1144         while (xmlIsBlank_ch(*(p))) (p)++
1145
1146 /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1147 /* Beware of multiple evaluations of argument! */
1148 #define PG_XMLISNAMECHAR(c) \
1149         (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1150                         || xmlIsDigit_ch(c) \
1151                         || c == '.' || c == '-' || c == '_' || c == ':' \
1152                         || xmlIsCombiningQ(c) \
1153                         || xmlIsExtender_ch(c))
1154
1155 /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1156 static xmlChar *
1157 xml_pnstrdup(const xmlChar *str, size_t len)
1158 {
1159         xmlChar    *result;
1160
1161         result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1162         memcpy(result, str, len * sizeof(xmlChar));
1163         result[len] = 0;
1164         return result;
1165 }
1166
1167 /* Ditto, except input is char* */
1168 static xmlChar *
1169 pg_xmlCharStrndup(const char *str, size_t len)
1170 {
1171         xmlChar    *result;
1172
1173         result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1174         memcpy(result, str, len);
1175         result[len] = '\0';
1176
1177         return result;
1178 }
1179
1180 /*
1181  * Copy xmlChar string to PostgreSQL-owned memory, freeing the input.
1182  *
1183  * The input xmlChar is freed regardless of success of the copy.
1184  */
1185 static char *
1186 xml_pstrdup_and_free(xmlChar *str)
1187 {
1188         char       *result;
1189
1190         if (str)
1191         {
1192                 PG_TRY();
1193                 {
1194                         result = pstrdup((char *) str);
1195                 }
1196                 PG_CATCH();
1197                 {
1198                         xmlFree(str);
1199                         PG_RE_THROW();
1200                 }
1201                 PG_END_TRY();
1202                 xmlFree(str);
1203         }
1204         else
1205                 result = NULL;
1206
1207         return result;
1208 }
1209
1210 /*
1211  * str is the null-terminated input string.  Remaining arguments are
1212  * output arguments; each can be NULL if value is not wanted.
1213  * version and encoding are returned as locally-palloc'd strings.
1214  * Result is 0 if OK, an error code if not.
1215  */
1216 static int
1217 parse_xml_decl(const xmlChar *str, size_t *lenp,
1218                            xmlChar **version, xmlChar **encoding, int *standalone)
1219 {
1220         const xmlChar *p;
1221         const xmlChar *save_p;
1222         size_t          len;
1223         int                     utf8char;
1224         int                     utf8len;
1225
1226         /*
1227          * Only initialize libxml.  We don't need error handling here, but we do
1228          * need to make sure libxml is initialized before calling any of its
1229          * functions.  Note that this is safe (and a no-op) if caller has already
1230          * done pg_xml_init().
1231          */
1232         pg_xml_init_library();
1233
1234         /* Initialize output arguments to "not present" */
1235         if (version)
1236                 *version = NULL;
1237         if (encoding)
1238                 *encoding = NULL;
1239         if (standalone)
1240                 *standalone = -1;
1241
1242         p = str;
1243
1244         if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1245                 goto finished;
1246
1247         /*
1248          * If next char is a name char, it's a PI like <?xml-stylesheet ...?>
1249          * rather than an XMLDecl, so we have done what we came to do and found no
1250          * XMLDecl.
1251          *
1252          * We need an input length value for xmlGetUTF8Char, but there's no need
1253          * to count the whole document size, so use strnlen not strlen.
1254          */
1255         utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
1256         utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1257         if (PG_XMLISNAMECHAR(utf8char))
1258                 goto finished;
1259
1260         p += 5;
1261
1262         /* version */
1263         CHECK_XML_SPACE(p);
1264         SKIP_XML_SPACE(p);
1265         if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1266                 return XML_ERR_VERSION_MISSING;
1267         p += 7;
1268         SKIP_XML_SPACE(p);
1269         if (*p != '=')
1270                 return XML_ERR_VERSION_MISSING;
1271         p += 1;
1272         SKIP_XML_SPACE(p);
1273
1274         if (*p == '\'' || *p == '"')
1275         {
1276                 const xmlChar *q;
1277
1278                 q = xmlStrchr(p + 1, *p);
1279                 if (!q)
1280                         return XML_ERR_VERSION_MISSING;
1281
1282                 if (version)
1283                         *version = xml_pnstrdup(p + 1, q - p - 1);
1284                 p = q + 1;
1285         }
1286         else
1287                 return XML_ERR_VERSION_MISSING;
1288
1289         /* encoding */
1290         save_p = p;
1291         SKIP_XML_SPACE(p);
1292         if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1293         {
1294                 CHECK_XML_SPACE(save_p);
1295                 p += 8;
1296                 SKIP_XML_SPACE(p);
1297                 if (*p != '=')
1298                         return XML_ERR_MISSING_ENCODING;
1299                 p += 1;
1300                 SKIP_XML_SPACE(p);
1301
1302                 if (*p == '\'' || *p == '"')
1303                 {
1304                         const xmlChar *q;
1305
1306                         q = xmlStrchr(p + 1, *p);
1307                         if (!q)
1308                                 return XML_ERR_MISSING_ENCODING;
1309
1310                         if (encoding)
1311                                 *encoding = xml_pnstrdup(p + 1, q - p - 1);
1312                         p = q + 1;
1313                 }
1314                 else
1315                         return XML_ERR_MISSING_ENCODING;
1316         }
1317         else
1318         {
1319                 p = save_p;
1320         }
1321
1322         /* standalone */
1323         save_p = p;
1324         SKIP_XML_SPACE(p);
1325         if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1326         {
1327                 CHECK_XML_SPACE(save_p);
1328                 p += 10;
1329                 SKIP_XML_SPACE(p);
1330                 if (*p != '=')
1331                         return XML_ERR_STANDALONE_VALUE;
1332                 p += 1;
1333                 SKIP_XML_SPACE(p);
1334                 if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1335                         xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1336                 {
1337                         if (standalone)
1338                                 *standalone = 1;
1339                         p += 5;
1340                 }
1341                 else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1342                                  xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1343                 {
1344                         if (standalone)
1345                                 *standalone = 0;
1346                         p += 4;
1347                 }
1348                 else
1349                         return XML_ERR_STANDALONE_VALUE;
1350         }
1351         else
1352         {
1353                 p = save_p;
1354         }
1355
1356         SKIP_XML_SPACE(p);
1357         if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1358                 return XML_ERR_XMLDECL_NOT_FINISHED;
1359         p += 2;
1360
1361 finished:
1362         len = p - str;
1363
1364         for (p = str; p < str + len; p++)
1365                 if (*p > 127)
1366                         return XML_ERR_INVALID_CHAR;
1367
1368         if (lenp)
1369                 *lenp = len;
1370
1371         return XML_ERR_OK;
1372 }
1373
1374
1375 /*
1376  * Write an XML declaration.  On output, we adjust the XML declaration
1377  * as follows.  (These rules are the moral equivalent of the clause
1378  * "Serialization of an XML value" in the SQL standard.)
1379  *
1380  * We try to avoid generating an XML declaration if possible.  This is
1381  * so that you don't get trivial things like xml '<foo/>' resulting in
1382  * '<?xml version="1.0"?><foo/>', which would surely be annoying.  We
1383  * must provide a declaration if the standalone property is specified
1384  * or if we include an encoding declaration.  If we have a
1385  * declaration, we must specify a version (XML requires this).
1386  * Otherwise we only make a declaration if the version is not "1.0",
1387  * which is the default version specified in SQL:2003.
1388  */
1389 static bool
1390 print_xml_decl(StringInfo buf, const xmlChar *version,
1391                            pg_enc encoding, int standalone)
1392 {
1393         if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1394                 || (encoding && encoding != PG_UTF8)
1395                 || standalone != -1)
1396         {
1397                 appendStringInfoString(buf, "<?xml");
1398
1399                 if (version)
1400                         appendStringInfo(buf, " version=\"%s\"", version);
1401                 else
1402                         appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1403
1404                 if (encoding && encoding != PG_UTF8)
1405                 {
1406                         /*
1407                          * XXX might be useful to convert this to IANA names (ISO-8859-1
1408                          * instead of LATIN1 etc.); needs field experience
1409                          */
1410                         appendStringInfo(buf, " encoding=\"%s\"",
1411                                                          pg_encoding_to_char(encoding));
1412                 }
1413
1414                 if (standalone == 1)
1415                         appendStringInfoString(buf, " standalone=\"yes\"");
1416                 else if (standalone == 0)
1417                         appendStringInfoString(buf, " standalone=\"no\"");
1418                 appendStringInfoString(buf, "?>");
1419
1420                 return true;
1421         }
1422         else
1423                 return false;
1424 }
1425
1426 /*
1427  * Test whether an input that is to be parsed as CONTENT contains a DTD.
1428  *
1429  * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
1430  * satisfied by a document with a DTD, which is a bit of a wart, as it means
1431  * the CONTENT type is not a proper superset of DOCUMENT.  SQL/XML:2006 and
1432  * later fix that, by redefining content with reference to the "more
1433  * permissive" Document Node of the XQuery/XPath Data Model, such that any
1434  * DOCUMENT value is indeed also a CONTENT value.  That definition is more
1435  * useful, as CONTENT becomes usable for parsing input of unknown form (think
1436  * pg_restore).
1437  *
1438  * As used below in parse_xml when parsing for CONTENT, libxml does not give
1439  * us the 2006+ behavior, but only the 2003; it will choke if the input has
1440  * a DTD.  But we can provide the 2006+ definition of CONTENT easily enough,
1441  * by detecting this case first and simply doing the parse as DOCUMENT.
1442  *
1443  * A DTD can be found arbitrarily far in, but that would be a contrived case;
1444  * it will ordinarily start within a few dozen characters.  The only things
1445  * that can precede it are an XMLDecl (here, the caller will have called
1446  * parse_xml_decl already), whitespace, comments, and processing instructions.
1447  * This function need only return true if it sees a valid sequence of such
1448  * things leading to <!DOCTYPE.  It can simply return false in any other
1449  * cases, including malformed input; that will mean the input gets parsed as
1450  * CONTENT as originally planned, with libxml reporting any errors.
1451  *
1452  * This is only to be called from xml_parse, when pg_xml_init has already
1453  * been called.  The input is already in UTF8 encoding.
1454  */
1455 static bool
1456 xml_doctype_in_content(const xmlChar *str)
1457 {
1458         const xmlChar *p = str;
1459
1460         for (;;)
1461         {
1462                 const xmlChar *e;
1463
1464                 SKIP_XML_SPACE(p);
1465                 if (*p != '<')
1466                         return false;
1467                 p++;
1468
1469                 if (*p == '!')
1470                 {
1471                         p++;
1472
1473                         /* if we see <!DOCTYPE, we can return true */
1474                         if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0)
1475                                 return true;
1476
1477                         /* otherwise, if it's not a comment, fail */
1478                         if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0)
1479                                 return false;
1480                         /* find end of comment: find -- and a > must follow */
1481                         p = xmlStrstr(p + 2, (xmlChar *) "--");
1482                         if (!p || p[2] != '>')
1483                                 return false;
1484                         /* advance over comment, and keep scanning */
1485                         p += 3;
1486                         continue;
1487                 }
1488
1489                 /* otherwise, if it's not a PI <?target something?>, fail */
1490                 if (*p != '?')
1491                         return false;
1492                 p++;
1493
1494                 /* find end of PI (the string ?> is forbidden within a PI) */
1495                 e = xmlStrstr(p, (xmlChar *) "?>");
1496                 if (!e)
1497                         return false;
1498
1499                 /* advance over PI, keep scanning */
1500                 p = e + 2;
1501         }
1502 }
1503
1504
1505 /*
1506  * Convert a C string to XML internal representation
1507  *
1508  * Note: it is caller's responsibility to xmlFreeDoc() the result,
1509  * else a permanent memory leak will ensue!
1510  *
1511  * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1512  * yet do not use SAX - see xmlreader.c)
1513  */
1514 static xmlDocPtr
1515 xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1516                   int encoding)
1517 {
1518         int32           len;
1519         xmlChar    *string;
1520         xmlChar    *utf8string;
1521         PgXmlErrorContext *xmlerrcxt;
1522         volatile xmlParserCtxtPtr ctxt = NULL;
1523         volatile xmlDocPtr doc = NULL;
1524
1525         len = VARSIZE_ANY_EXHDR(data);  /* will be useful later */
1526         string = xml_text2xmlChar(data);
1527
1528         utf8string = pg_do_encoding_conversion(string,
1529                                                                                    len,
1530                                                                                    encoding,
1531                                                                                    PG_UTF8);
1532
1533         /* Start up libxml and its parser */
1534         xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1535
1536         /* Use a TRY block to ensure we clean up correctly */
1537         PG_TRY();
1538         {
1539                 bool            parse_as_document = false;
1540                 int                     res_code;
1541                 size_t          count = 0;
1542                 xmlChar    *version = NULL;
1543                 int                     standalone = 0;
1544
1545                 xmlInitParser();
1546
1547                 ctxt = xmlNewParserCtxt();
1548                 if (ctxt == NULL || xmlerrcxt->err_occurred)
1549                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1550                                                 "could not allocate parser context");
1551
1552                 /* Decide whether to parse as document or content */
1553                 if (xmloption_arg == XMLOPTION_DOCUMENT)
1554                         parse_as_document = true;
1555                 else
1556                 {
1557                         /* Parse and skip over the XML declaration, if any */
1558                         res_code = parse_xml_decl(utf8string,
1559                                                                           &count, &version, NULL, &standalone);
1560                         if (res_code != 0)
1561                                 xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
1562                                                                         "invalid XML content: invalid XML declaration",
1563                                                                         res_code);
1564
1565                         /* Is there a DOCTYPE element? */
1566                         if (xml_doctype_in_content(utf8string + count))
1567                                 parse_as_document = true;
1568                 }
1569
1570                 if (parse_as_document)
1571                 {
1572                         /*
1573                          * Note, that here we try to apply DTD defaults
1574                          * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
1575                          * 'Default values defined by internal DTD are applied'. As for
1576                          * external DTDs, we try to support them too, (see SQL/XML:2008 GR
1577                          * 10.16.7.e)
1578                          */
1579                         doc = xmlCtxtReadDoc(ctxt, utf8string,
1580                                                                  NULL,
1581                                                                  "UTF-8",
1582                                                                  XML_PARSE_NOENT | XML_PARSE_DTDATTR
1583                                                                  | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
1584                         if (doc == NULL || xmlerrcxt->err_occurred)
1585                         {
1586                                 /* Use original option to decide which error code to throw */
1587                                 if (xmloption_arg == XMLOPTION_DOCUMENT)
1588                                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
1589                                                                 "invalid XML document");
1590                                 else
1591                                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1592                                                                 "invalid XML content");
1593                         }
1594                 }
1595                 else
1596                 {
1597                         doc = xmlNewDoc(version);
1598                         Assert(doc->encoding == NULL);
1599                         doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1600                         doc->standalone = standalone;
1601
1602                         /* allow empty content */
1603                         if (*(utf8string + count))
1604                         {
1605                                 res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1606                                                                                                            utf8string + count, NULL);
1607                                 if (res_code != 0 || xmlerrcxt->err_occurred)
1608                                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1609                                                                 "invalid XML content");
1610                         }
1611                 }
1612         }
1613         PG_CATCH();
1614         {
1615                 if (doc != NULL)
1616                         xmlFreeDoc(doc);
1617                 if (ctxt != NULL)
1618                         xmlFreeParserCtxt(ctxt);
1619
1620                 pg_xml_done(xmlerrcxt, true);
1621
1622                 PG_RE_THROW();
1623         }
1624         PG_END_TRY();
1625
1626         xmlFreeParserCtxt(ctxt);
1627
1628         pg_xml_done(xmlerrcxt, false);
1629
1630         return doc;
1631 }
1632
1633
1634 /*
1635  * xmlChar<->text conversions
1636  */
1637 static xmlChar *
1638 xml_text2xmlChar(text *in)
1639 {
1640         return (xmlChar *) text_to_cstring(in);
1641 }
1642
1643
1644 #ifdef USE_LIBXMLCONTEXT
1645
1646 /*
1647  * Manage the special context used for all libxml allocations (but only
1648  * in special debug builds; see notes at top of file)
1649  */
1650 static void
1651 xml_memory_init(void)
1652 {
1653         /* Create memory context if not there already */
1654         if (LibxmlContext == NULL)
1655                 LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1656                                                                                           "Libxml context",
1657                                                                                           ALLOCSET_DEFAULT_SIZES);
1658
1659         /* Re-establish the callbacks even if already set */
1660         xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1661 }
1662
1663 /*
1664  * Wrappers for memory management functions
1665  */
1666 static void *
1667 xml_palloc(size_t size)
1668 {
1669         return MemoryContextAlloc(LibxmlContext, size);
1670 }
1671
1672
1673 static void *
1674 xml_repalloc(void *ptr, size_t size)
1675 {
1676         return repalloc(ptr, size);
1677 }
1678
1679
1680 static void
1681 xml_pfree(void *ptr)
1682 {
1683         /* At least some parts of libxml assume xmlFree(NULL) is allowed */
1684         if (ptr)
1685                 pfree(ptr);
1686 }
1687
1688
1689 static char *
1690 xml_pstrdup(const char *string)
1691 {
1692         return MemoryContextStrdup(LibxmlContext, string);
1693 }
1694 #endif                                                  /* USE_LIBXMLCONTEXT */
1695
1696
1697 /*
1698  * xmlPgEntityLoader --- entity loader callback function
1699  *
1700  * Silently prevent any external entity URL from being loaded.  We don't want
1701  * to throw an error, so instead make the entity appear to expand to an empty
1702  * string.
1703  *
1704  * We would prefer to allow loading entities that exist in the system's
1705  * global XML catalog; but the available libxml2 APIs make that a complex
1706  * and fragile task.  For now, just shut down all external access.
1707  */
1708 static xmlParserInputPtr
1709 xmlPgEntityLoader(const char *URL, const char *ID,
1710                                   xmlParserCtxtPtr ctxt)
1711 {
1712         return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
1713 }
1714
1715
1716 /*
1717  * xml_ereport --- report an XML-related error
1718  *
1719  * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
1720  * standard.  This function adds libxml's native error message, if any, as
1721  * detail.
1722  *
1723  * This is exported for modules that want to share the core libxml error
1724  * handler.  Note that pg_xml_init() *must* have been called previously.
1725  */
1726 void
1727 xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
1728 {
1729         char       *detail;
1730
1731         /* Defend against someone passing us a bogus context struct */
1732         if (errcxt->magic != ERRCXT_MAGIC)
1733                 elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
1734
1735         /* Flag that the current libxml error has been reported */
1736         errcxt->err_occurred = false;
1737
1738         /* Include detail only if we have some text from libxml */
1739         if (errcxt->err_buf.len > 0)
1740                 detail = errcxt->err_buf.data;
1741         else
1742                 detail = NULL;
1743
1744         ereport(level,
1745                         (errcode(sqlcode),
1746                          errmsg_internal("%s", msg),
1747                          detail ? errdetail_internal("%s", detail) : 0));
1748 }
1749
1750
1751 /*
1752  * Error handler for libxml errors and warnings
1753  */
1754 static void
1755 xml_errorHandler(void *data, xmlErrorPtr error)
1756 {
1757         PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
1758         xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
1759         xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
1760         xmlNodePtr      node = error->node;
1761         const xmlChar *name = (node != NULL &&
1762                                                    node->type == XML_ELEMENT_NODE) ? node->name : NULL;
1763         int                     domain = error->domain;
1764         int                     level = error->level;
1765         StringInfo      errorBuf;
1766
1767         /*
1768          * Defend against someone passing us a bogus context struct.
1769          *
1770          * We force a backend exit if this check fails because longjmp'ing out of
1771          * libxml would likely render it unsafe to use further.
1772          */
1773         if (xmlerrcxt->magic != ERRCXT_MAGIC)
1774                 elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
1775
1776         /*----------
1777          * Older libxml versions report some errors differently.
1778          * First, some errors were previously reported as coming from the parser
1779          * domain but are now reported as coming from the namespace domain.
1780          * Second, some warnings were upgraded to errors.
1781          * We attempt to compensate for that here.
1782          *----------
1783          */
1784         switch (error->code)
1785         {
1786                 case XML_WAR_NS_URI:
1787                         level = XML_ERR_ERROR;
1788                         domain = XML_FROM_NAMESPACE;
1789                         break;
1790
1791                 case XML_ERR_NS_DECL_ERROR:
1792                 case XML_WAR_NS_URI_RELATIVE:
1793                 case XML_WAR_NS_COLUMN:
1794                 case XML_NS_ERR_XML_NAMESPACE:
1795                 case XML_NS_ERR_UNDEFINED_NAMESPACE:
1796                 case XML_NS_ERR_QNAME:
1797                 case XML_NS_ERR_ATTRIBUTE_REDEFINED:
1798                 case XML_NS_ERR_EMPTY:
1799                         domain = XML_FROM_NAMESPACE;
1800                         break;
1801         }
1802
1803         /* Decide whether to act on the error or not */
1804         switch (domain)
1805         {
1806                 case XML_FROM_PARSER:
1807                 case XML_FROM_NONE:
1808                 case XML_FROM_MEMORY:
1809                 case XML_FROM_IO:
1810
1811                         /*
1812                          * Suppress warnings about undeclared entities.  We need to do
1813                          * this to avoid problems due to not loading DTD definitions.
1814                          */
1815                         if (error->code == XML_WAR_UNDECLARED_ENTITY)
1816                                 return;
1817
1818                         /* Otherwise, accept error regardless of the parsing purpose */
1819                         break;
1820
1821                 default:
1822                         /* Ignore error if only doing well-formedness check */
1823                         if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
1824                                 return;
1825                         break;
1826         }
1827
1828         /* Prepare error message in errorBuf */
1829         errorBuf = makeStringInfo();
1830
1831         if (error->line > 0)
1832                 appendStringInfo(errorBuf, "line %d: ", error->line);
1833         if (name != NULL)
1834                 appendStringInfo(errorBuf, "element %s: ", name);
1835         if (error->message != NULL)
1836                 appendStringInfoString(errorBuf, error->message);
1837         else
1838                 appendStringInfoString(errorBuf, "(no message provided)");
1839
1840         /*
1841          * Append context information to errorBuf.
1842          *
1843          * xmlParserPrintFileContext() uses libxml's "generic" error handler to
1844          * write the context.  Since we don't want to duplicate libxml
1845          * functionality here, we set up a generic error handler temporarily.
1846          *
1847          * We use appendStringInfo() directly as libxml's generic error handler.
1848          * This should work because it has essentially the same signature as
1849          * libxml expects, namely (void *ptr, const char *msg, ...).
1850          */
1851         if (input != NULL)
1852         {
1853                 xmlGenericErrorFunc errFuncSaved = xmlGenericError;
1854                 void       *errCtxSaved = xmlGenericErrorContext;
1855
1856                 xmlSetGenericErrorFunc((void *) errorBuf,
1857                                                            (xmlGenericErrorFunc) appendStringInfo);
1858
1859                 /* Add context information to errorBuf */
1860                 appendStringInfoLineSeparator(errorBuf);
1861
1862                 xmlParserPrintFileContext(input);
1863
1864                 /* Restore generic error func */
1865                 xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
1866         }
1867
1868         /* Get rid of any trailing newlines in errorBuf */
1869         chopStringInfoNewlines(errorBuf);
1870
1871         /*
1872          * Legacy error handling mode.  err_occurred is never set, we just add the
1873          * message to err_buf.  This mode exists because the xml2 contrib module
1874          * uses our error-handling infrastructure, but we don't want to change its
1875          * behaviour since it's deprecated anyway.  This is also why we don't
1876          * distinguish between notices, warnings and errors here --- the old-style
1877          * generic error handler wouldn't have done that either.
1878          */
1879         if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
1880         {
1881                 appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1882                 appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
1883                                                            errorBuf->len);
1884
1885                 pfree(errorBuf->data);
1886                 pfree(errorBuf);
1887                 return;
1888         }
1889
1890         /*
1891          * We don't want to ereport() here because that'd probably leave libxml in
1892          * an inconsistent state.  Instead, we remember the error and ereport()
1893          * from xml_ereport().
1894          *
1895          * Warnings and notices can be reported immediately since they won't cause
1896          * a longjmp() out of libxml.
1897          */
1898         if (level >= XML_ERR_ERROR)
1899         {
1900                 appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1901                 appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
1902                                                            errorBuf->len);
1903
1904                 xmlerrcxt->err_occurred = true;
1905         }
1906         else if (level >= XML_ERR_WARNING)
1907         {
1908                 ereport(WARNING,
1909                                 (errmsg_internal("%s", errorBuf->data)));
1910         }
1911         else
1912         {
1913                 ereport(NOTICE,
1914                                 (errmsg_internal("%s", errorBuf->data)));
1915         }
1916
1917         pfree(errorBuf->data);
1918         pfree(errorBuf);
1919 }
1920
1921
1922 /*
1923  * Wrapper for "ereport" function for XML-related errors.  The "msg"
1924  * is the SQL-level message; some can be adopted from the SQL/XML
1925  * standard.  This function uses "code" to create a textual detail
1926  * message.  At the moment, we only need to cover those codes that we
1927  * may raise in this file.
1928  */
1929 static void
1930 xml_ereport_by_code(int level, int sqlcode,
1931                                         const char *msg, int code)
1932 {
1933         const char *det;
1934
1935         switch (code)
1936         {
1937                 case XML_ERR_INVALID_CHAR:
1938                         det = gettext_noop("Invalid character value.");
1939                         break;
1940                 case XML_ERR_SPACE_REQUIRED:
1941                         det = gettext_noop("Space required.");
1942                         break;
1943                 case XML_ERR_STANDALONE_VALUE:
1944                         det = gettext_noop("standalone accepts only 'yes' or 'no'.");
1945                         break;
1946                 case XML_ERR_VERSION_MISSING:
1947                         det = gettext_noop("Malformed declaration: missing version.");
1948                         break;
1949                 case XML_ERR_MISSING_ENCODING:
1950                         det = gettext_noop("Missing encoding in text declaration.");
1951                         break;
1952                 case XML_ERR_XMLDECL_NOT_FINISHED:
1953                         det = gettext_noop("Parsing XML declaration: '?>' expected.");
1954                         break;
1955                 default:
1956                         det = gettext_noop("Unrecognized libxml error code: %d.");
1957                         break;
1958         }
1959
1960         ereport(level,
1961                         (errcode(sqlcode),
1962                          errmsg_internal("%s", msg),
1963                          errdetail(det, code)));
1964 }
1965
1966
1967 /*
1968  * Remove all trailing newlines from a StringInfo string
1969  */
1970 static void
1971 chopStringInfoNewlines(StringInfo str)
1972 {
1973         while (str->len > 0 && str->data[str->len - 1] == '\n')
1974                 str->data[--str->len] = '\0';
1975 }
1976
1977
1978 /*
1979  * Append a newline after removing any existing trailing newlines
1980  */
1981 static void
1982 appendStringInfoLineSeparator(StringInfo str)
1983 {
1984         chopStringInfoNewlines(str);
1985         if (str->len > 0)
1986                 appendStringInfoChar(str, '\n');
1987 }
1988
1989
1990 /*
1991  * Convert one char in the current server encoding to a Unicode codepoint.
1992  */
1993 static pg_wchar
1994 sqlchar_to_unicode(const char *s)
1995 {
1996         char       *utf8string;
1997         pg_wchar        ret[2];                 /* need space for trailing zero */
1998
1999         /* note we're not assuming s is null-terminated */
2000         utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
2001
2002         pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
2003                                                                   pg_encoding_mblen(PG_UTF8, utf8string));
2004
2005         if (utf8string != s)
2006                 pfree(utf8string);
2007
2008         return ret[0];
2009 }
2010
2011
2012 static bool
2013 is_valid_xml_namefirst(pg_wchar c)
2014 {
2015         /* (Letter | '_' | ':') */
2016         return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2017                         || c == '_' || c == ':');
2018 }
2019
2020
2021 static bool
2022 is_valid_xml_namechar(pg_wchar c)
2023 {
2024         /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
2025         return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2026                         || xmlIsDigitQ(c)
2027                         || c == '.' || c == '-' || c == '_' || c == ':'
2028                         || xmlIsCombiningQ(c)
2029                         || xmlIsExtenderQ(c));
2030 }
2031 #endif                                                  /* USE_LIBXML */
2032
2033
2034 /*
2035  * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
2036  */
2037 char *
2038 map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
2039                                                            bool escape_period)
2040 {
2041 #ifdef USE_LIBXML
2042         StringInfoData buf;
2043         const char *p;
2044
2045         /*
2046          * SQL/XML doesn't make use of this case anywhere, so it's probably a
2047          * mistake.
2048          */
2049         Assert(fully_escaped || !escape_period);
2050
2051         initStringInfo(&buf);
2052
2053         for (p = ident; *p; p += pg_mblen(p))
2054         {
2055                 if (*p == ':' && (p == ident || fully_escaped))
2056                         appendStringInfoString(&buf, "_x003A_");
2057                 else if (*p == '_' && *(p + 1) == 'x')
2058                         appendStringInfoString(&buf, "_x005F_");
2059                 else if (fully_escaped && p == ident &&
2060                                  pg_strncasecmp(p, "xml", 3) == 0)
2061                 {
2062                         if (*p == 'x')
2063                                 appendStringInfoString(&buf, "_x0078_");
2064                         else
2065                                 appendStringInfoString(&buf, "_x0058_");
2066                 }
2067                 else if (escape_period && *p == '.')
2068                         appendStringInfoString(&buf, "_x002E_");
2069                 else
2070                 {
2071                         pg_wchar        u = sqlchar_to_unicode(p);
2072
2073                         if ((p == ident)
2074                                 ? !is_valid_xml_namefirst(u)
2075                                 : !is_valid_xml_namechar(u))
2076                                 appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
2077                         else
2078                                 appendBinaryStringInfo(&buf, p, pg_mblen(p));
2079                 }
2080         }
2081
2082         return buf.data;
2083 #else                                                   /* not USE_LIBXML */
2084         NO_XML_SUPPORT();
2085         return NULL;
2086 #endif                                                  /* not USE_LIBXML */
2087 }
2088
2089
2090 /*
2091  * Map a Unicode codepoint into the current server encoding.
2092  */
2093 static char *
2094 unicode_to_sqlchar(pg_wchar c)
2095 {
2096         char            utf8string[8];  /* need room for trailing zero */
2097         char       *result;
2098
2099         memset(utf8string, 0, sizeof(utf8string));
2100         unicode_to_utf8(c, (unsigned char *) utf8string);
2101
2102         result = pg_any_to_server(utf8string, strlen(utf8string), PG_UTF8);
2103         /* if pg_any_to_server didn't strdup, we must */
2104         if (result == utf8string)
2105                 result = pstrdup(result);
2106         return result;
2107 }
2108
2109
2110 /*
2111  * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
2112  */
2113 char *
2114 map_xml_name_to_sql_identifier(const char *name)
2115 {
2116         StringInfoData buf;
2117         const char *p;
2118
2119         initStringInfo(&buf);
2120
2121         for (p = name; *p; p += pg_mblen(p))
2122         {
2123                 if (*p == '_' && *(p + 1) == 'x'
2124                         && isxdigit((unsigned char) *(p + 2))
2125                         && isxdigit((unsigned char) *(p + 3))
2126                         && isxdigit((unsigned char) *(p + 4))
2127                         && isxdigit((unsigned char) *(p + 5))
2128                         && *(p + 6) == '_')
2129                 {
2130                         unsigned int u;
2131
2132                         sscanf(p + 2, "%X", &u);
2133                         appendStringInfoString(&buf, unicode_to_sqlchar(u));
2134                         p += 6;
2135                 }
2136                 else
2137                         appendBinaryStringInfo(&buf, p, pg_mblen(p));
2138         }
2139
2140         return buf.data;
2141 }
2142
2143 /*
2144  * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2145  *
2146  * When xml_escape_strings is true, then certain characters in string
2147  * values are replaced by entity references (&lt; etc.), as specified
2148  * in SQL/XML:2008 section 9.8 GR 9) a) iii).   This is normally what is
2149  * wanted.  The false case is mainly useful when the resulting value
2150  * is used with xmlTextWriterWriteAttribute() to write out an
2151  * attribute, because that function does the escaping itself.
2152  */
2153 char *
2154 map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2155 {
2156         if (type_is_array_domain(type))
2157         {
2158                 ArrayType  *array;
2159                 Oid                     elmtype;
2160                 int16           elmlen;
2161                 bool            elmbyval;
2162                 char            elmalign;
2163                 int                     num_elems;
2164                 Datum      *elem_values;
2165                 bool       *elem_nulls;
2166                 StringInfoData buf;
2167                 int                     i;
2168
2169                 array = DatumGetArrayTypeP(value);
2170                 elmtype = ARR_ELEMTYPE(array);
2171                 get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
2172
2173                 deconstruct_array(array, elmtype,
2174                                                   elmlen, elmbyval, elmalign,
2175                                                   &elem_values, &elem_nulls,
2176                                                   &num_elems);
2177
2178                 initStringInfo(&buf);
2179
2180                 for (i = 0; i < num_elems; i++)
2181                 {
2182                         if (elem_nulls[i])
2183                                 continue;
2184                         appendStringInfoString(&buf, "<element>");
2185                         appendStringInfoString(&buf,
2186                                                                    map_sql_value_to_xml_value(elem_values[i],
2187                                                                                                                           elmtype, true));
2188                         appendStringInfoString(&buf, "</element>");
2189                 }
2190
2191                 pfree(elem_values);
2192                 pfree(elem_nulls);
2193
2194                 return buf.data;
2195         }
2196         else
2197         {
2198                 Oid                     typeOut;
2199                 bool            isvarlena;
2200                 char       *str;
2201
2202                 /*
2203                  * Flatten domains; the special-case treatments below should apply to,
2204                  * eg, domains over boolean not just boolean.
2205                  */
2206                 type = getBaseType(type);
2207
2208                 /*
2209                  * Special XSD formatting for some data types
2210                  */
2211                 switch (type)
2212                 {
2213                         case BOOLOID:
2214                                 if (DatumGetBool(value))
2215                                         return "true";
2216                                 else
2217                                         return "false";
2218
2219                         case DATEOID:
2220                                 {
2221                                         DateADT         date;
2222                                         struct pg_tm tm;
2223                                         char            buf[MAXDATELEN + 1];
2224
2225                                         date = DatumGetDateADT(value);
2226                                         /* XSD doesn't support infinite values */
2227                                         if (DATE_NOT_FINITE(date))
2228                                                 ereport(ERROR,
2229                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2230                                                                  errmsg("date out of range"),
2231                                                                  errdetail("XML does not support infinite date values.")));
2232                                         j2date(date + POSTGRES_EPOCH_JDATE,
2233                                                    &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2234                                         EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2235
2236                                         return pstrdup(buf);
2237                                 }
2238
2239                         case TIMESTAMPOID:
2240                                 {
2241                                         Timestamp       timestamp;
2242                                         struct pg_tm tm;
2243                                         fsec_t          fsec;
2244                                         char            buf[MAXDATELEN + 1];
2245
2246                                         timestamp = DatumGetTimestamp(value);
2247
2248                                         /* XSD doesn't support infinite values */
2249                                         if (TIMESTAMP_NOT_FINITE(timestamp))
2250                                                 ereport(ERROR,
2251                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2252                                                                  errmsg("timestamp out of range"),
2253                                                                  errdetail("XML does not support infinite timestamp values.")));
2254                                         else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2255                                                 EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2256                                         else
2257                                                 ereport(ERROR,
2258                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2259                                                                  errmsg("timestamp out of range")));
2260
2261                                         return pstrdup(buf);
2262                                 }
2263
2264                         case TIMESTAMPTZOID:
2265                                 {
2266                                         TimestampTz timestamp;
2267                                         struct pg_tm tm;
2268                                         int                     tz;
2269                                         fsec_t          fsec;
2270                                         const char *tzn = NULL;
2271                                         char            buf[MAXDATELEN + 1];
2272
2273                                         timestamp = DatumGetTimestamp(value);
2274
2275                                         /* XSD doesn't support infinite values */
2276                                         if (TIMESTAMP_NOT_FINITE(timestamp))
2277                                                 ereport(ERROR,
2278                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2279                                                                  errmsg("timestamp out of range"),
2280                                                                  errdetail("XML does not support infinite timestamp values.")));
2281                                         else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2282                                                 EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2283                                         else
2284                                                 ereport(ERROR,
2285                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2286                                                                  errmsg("timestamp out of range")));
2287
2288                                         return pstrdup(buf);
2289                                 }
2290
2291 #ifdef USE_LIBXML
2292                         case BYTEAOID:
2293                                 {
2294                                         bytea      *bstr = DatumGetByteaPP(value);
2295                                         PgXmlErrorContext *xmlerrcxt;
2296                                         volatile xmlBufferPtr buf = NULL;
2297                                         volatile xmlTextWriterPtr writer = NULL;
2298                                         char       *result;
2299
2300                                         xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2301
2302                                         PG_TRY();
2303                                         {
2304                                                 buf = xmlBufferCreate();
2305                                                 if (buf == NULL || xmlerrcxt->err_occurred)
2306                                                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2307                                                                                 "could not allocate xmlBuffer");
2308                                                 writer = xmlNewTextWriterMemory(buf, 0);
2309                                                 if (writer == NULL || xmlerrcxt->err_occurred)
2310                                                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2311                                                                                 "could not allocate xmlTextWriter");
2312
2313                                                 if (xmlbinary == XMLBINARY_BASE64)
2314                                                         xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2315                                                                                                          0, VARSIZE_ANY_EXHDR(bstr));
2316                                                 else
2317                                                         xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2318                                                                                                          0, VARSIZE_ANY_EXHDR(bstr));
2319
2320                                                 /* we MUST do this now to flush data out to the buffer */
2321                                                 xmlFreeTextWriter(writer);
2322                                                 writer = NULL;
2323
2324                                                 result = pstrdup((const char *) xmlBufferContent(buf));
2325                                         }
2326                                         PG_CATCH();
2327                                         {
2328                                                 if (writer)
2329                                                         xmlFreeTextWriter(writer);
2330                                                 if (buf)
2331                                                         xmlBufferFree(buf);
2332
2333                                                 pg_xml_done(xmlerrcxt, true);
2334
2335                                                 PG_RE_THROW();
2336                                         }
2337                                         PG_END_TRY();
2338
2339                                         xmlBufferFree(buf);
2340
2341                                         pg_xml_done(xmlerrcxt, false);
2342
2343                                         return result;
2344                                 }
2345 #endif                                                  /* USE_LIBXML */
2346
2347                 }
2348
2349                 /*
2350                  * otherwise, just use the type's native text representation
2351                  */
2352                 getTypeOutputInfo(type, &typeOut, &isvarlena);
2353                 str = OidOutputFunctionCall(typeOut, value);
2354
2355                 /* ... exactly as-is for XML, and when escaping is not wanted */
2356                 if (type == XMLOID || !xml_escape_strings)
2357                         return str;
2358
2359                 /* otherwise, translate special characters as needed */
2360                 return escape_xml(str);
2361         }
2362 }
2363
2364
2365 /*
2366  * Escape characters in text that have special meanings in XML.
2367  *
2368  * Returns a palloc'd string.
2369  *
2370  * NB: this is intentionally not dependent on libxml.
2371  */
2372 char *
2373 escape_xml(const char *str)
2374 {
2375         StringInfoData buf;
2376         const char *p;
2377
2378         initStringInfo(&buf);
2379         for (p = str; *p; p++)
2380         {
2381                 switch (*p)
2382                 {
2383                         case '&':
2384                                 appendStringInfoString(&buf, "&amp;");
2385                                 break;
2386                         case '<':
2387                                 appendStringInfoString(&buf, "&lt;");
2388                                 break;
2389                         case '>':
2390                                 appendStringInfoString(&buf, "&gt;");
2391                                 break;
2392                         case '\r':
2393                                 appendStringInfoString(&buf, "&#x0d;");
2394                                 break;
2395                         default:
2396                                 appendStringInfoCharMacro(&buf, *p);
2397                                 break;
2398                 }
2399         }
2400         return buf.data;
2401 }
2402
2403
2404 static char *
2405 _SPI_strdup(const char *s)
2406 {
2407         size_t          len = strlen(s) + 1;
2408         char       *ret = SPI_palloc(len);
2409
2410         memcpy(ret, s, len);
2411         return ret;
2412 }
2413
2414
2415 /*
2416  * SQL to XML mapping functions
2417  *
2418  * What follows below was at one point intentionally organized so that
2419  * you can read along in the SQL/XML standard. The functions are
2420  * mostly split up the way the clauses lay out in the standards
2421  * document, and the identifiers are also aligned with the standard
2422  * text.  Unfortunately, SQL/XML:2006 reordered the clauses
2423  * differently than SQL/XML:2003, so the order below doesn't make much
2424  * sense anymore.
2425  *
2426  * There are many things going on there:
2427  *
2428  * There are two kinds of mappings: Mapping SQL data (table contents)
2429  * to XML documents, and mapping SQL structure (the "schema") to XML
2430  * Schema.  And there are functions that do both at the same time.
2431  *
2432  * Then you can map a database, a schema, or a table, each in both
2433  * ways.  This breaks down recursively: Mapping a database invokes
2434  * mapping schemas, which invokes mapping tables, which invokes
2435  * mapping rows, which invokes mapping columns, although you can't
2436  * call the last two from the outside.  Because of this, there are a
2437  * number of xyz_internal() functions which are to be called both from
2438  * the function manager wrapper and from some upper layer in a
2439  * recursive call.
2440  *
2441  * See the documentation about what the common function arguments
2442  * nulls, tableforest, and targetns mean.
2443  *
2444  * Some style guidelines for XML output: Use double quotes for quoting
2445  * XML attributes.  Indent XML elements by two spaces, but remember
2446  * that a lot of code is called recursively at different levels, so
2447  * it's better not to indent rather than create output that indents
2448  * and outdents weirdly.  Add newlines to make the output look nice.
2449  */
2450
2451
2452 /*
2453  * Visibility of objects for XML mappings; see SQL/XML:2008 section
2454  * 4.10.8.
2455  */
2456
2457 /*
2458  * Given a query, which must return type oid as first column, produce
2459  * a list of Oids with the query results.
2460  */
2461 static List *
2462 query_to_oid_list(const char *query)
2463 {
2464         uint64          i;
2465         List       *list = NIL;
2466
2467         SPI_execute(query, true, 0);
2468
2469         for (i = 0; i < SPI_processed; i++)
2470         {
2471                 Datum           oid;
2472                 bool            isnull;
2473
2474                 oid = SPI_getbinval(SPI_tuptable->vals[i],
2475                                                         SPI_tuptable->tupdesc,
2476                                                         1,
2477                                                         &isnull);
2478                 if (!isnull)
2479                         list = lappend_oid(list, DatumGetObjectId(oid));
2480         }
2481
2482         return list;
2483 }
2484
2485
2486 static List *
2487 schema_get_xml_visible_tables(Oid nspid)
2488 {
2489         StringInfoData query;
2490
2491         initStringInfo(&query);
2492         appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class"
2493                                          " WHERE relnamespace = %u AND relkind IN ("
2494                                          CppAsString2(RELKIND_RELATION) ","
2495                                          CppAsString2(RELKIND_MATVIEW) ","
2496                                          CppAsString2(RELKIND_VIEW) ")"
2497                                          " AND pg_catalog.has_table_privilege (oid, 'SELECT')"
2498                                          " ORDER BY relname;", nspid);
2499
2500         return query_to_oid_list(query.data);
2501 }
2502
2503
2504 /*
2505  * Including the system schemas is probably not useful for a database
2506  * mapping.
2507  */
2508 #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2509
2510 #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2511
2512
2513 static List *
2514 database_get_xml_visible_schemas(void)
2515 {
2516         return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2517 }
2518
2519
2520 static List *
2521 database_get_xml_visible_tables(void)
2522 {
2523         /* At the moment there is no order required here. */
2524         return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class"
2525                                                          " WHERE relkind IN ("
2526                                                          CppAsString2(RELKIND_RELATION) ","
2527                                                          CppAsString2(RELKIND_MATVIEW) ","
2528                                                          CppAsString2(RELKIND_VIEW) ")"
2529                                                          " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')"
2530                                                          " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2531 }
2532
2533
2534 /*
2535  * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2536  * section 9.11.
2537  */
2538
2539 static StringInfo
2540 table_to_xml_internal(Oid relid,
2541                                           const char *xmlschema, bool nulls, bool tableforest,
2542                                           const char *targetns, bool top_level)
2543 {
2544         StringInfoData query;
2545
2546         initStringInfo(&query);
2547         appendStringInfo(&query, "SELECT * FROM %s",
2548                                          DatumGetCString(DirectFunctionCall1(regclassout,
2549                                                                                                                  ObjectIdGetDatum(relid))));
2550         return query_to_xml_internal(query.data, get_rel_name(relid),
2551                                                                  xmlschema, nulls, tableforest,
2552                                                                  targetns, top_level);
2553 }
2554
2555
2556 Datum
2557 table_to_xml(PG_FUNCTION_ARGS)
2558 {
2559         Oid                     relid = PG_GETARG_OID(0);
2560         bool            nulls = PG_GETARG_BOOL(1);
2561         bool            tableforest = PG_GETARG_BOOL(2);
2562         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2563
2564         PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2565                                                                                                                                 nulls, tableforest,
2566                                                                                                                                 targetns, true)));
2567 }
2568
2569
2570 Datum
2571 query_to_xml(PG_FUNCTION_ARGS)
2572 {
2573         char       *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2574         bool            nulls = PG_GETARG_BOOL(1);
2575         bool            tableforest = PG_GETARG_BOOL(2);
2576         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2577
2578         PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2579                                                                                                                                 NULL, nulls, tableforest,
2580                                                                                                                                 targetns, true)));
2581 }
2582
2583
2584 Datum
2585 cursor_to_xml(PG_FUNCTION_ARGS)
2586 {
2587         char       *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2588         int32           count = PG_GETARG_INT32(1);
2589         bool            nulls = PG_GETARG_BOOL(2);
2590         bool            tableforest = PG_GETARG_BOOL(3);
2591         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2592
2593         StringInfoData result;
2594         Portal          portal;
2595         uint64          i;
2596
2597         initStringInfo(&result);
2598
2599         if (!tableforest)
2600         {
2601                 xmldata_root_element_start(&result, "table", NULL, targetns, true);
2602                 appendStringInfoChar(&result, '\n');
2603         }
2604
2605         SPI_connect();
2606         portal = SPI_cursor_find(name);
2607         if (portal == NULL)
2608                 ereport(ERROR,
2609                                 (errcode(ERRCODE_UNDEFINED_CURSOR),
2610                                  errmsg("cursor \"%s\" does not exist", name)));
2611
2612         SPI_cursor_fetch(portal, true, count);
2613         for (i = 0; i < SPI_processed; i++)
2614                 SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2615                                                                   tableforest, targetns, true);
2616
2617         SPI_finish();
2618
2619         if (!tableforest)
2620                 xmldata_root_element_end(&result, "table");
2621
2622         PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2623 }
2624
2625
2626 /*
2627  * Write the start tag of the root element of a data mapping.
2628  *
2629  * top_level means that this is the very top level of the eventual
2630  * output.  For example, when the user calls table_to_xml, then a call
2631  * with a table name to this function is the top level.  When the user
2632  * calls database_to_xml, then a call with a schema name to this
2633  * function is not the top level.  If top_level is false, then the XML
2634  * namespace declarations are omitted, because they supposedly already
2635  * appeared earlier in the output.  Repeating them is not wrong, but
2636  * it looks ugly.
2637  */
2638 static void
2639 xmldata_root_element_start(StringInfo result, const char *eltname,
2640                                                    const char *xmlschema, const char *targetns,
2641                                                    bool top_level)
2642 {
2643         /* This isn't really wrong but currently makes no sense. */
2644         Assert(top_level || !xmlschema);
2645
2646         appendStringInfo(result, "<%s", eltname);
2647         if (top_level)
2648         {
2649                 appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
2650                 if (strlen(targetns) > 0)
2651                         appendStringInfo(result, " xmlns=\"%s\"", targetns);
2652         }
2653         if (xmlschema)
2654         {
2655                 /* FIXME: better targets */
2656                 if (strlen(targetns) > 0)
2657                         appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
2658                 else
2659                         appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
2660         }
2661         appendStringInfoString(result, ">\n");
2662 }
2663
2664
2665 static void
2666 xmldata_root_element_end(StringInfo result, const char *eltname)
2667 {
2668         appendStringInfo(result, "</%s>\n", eltname);
2669 }
2670
2671
2672 static StringInfo
2673 query_to_xml_internal(const char *query, char *tablename,
2674                                           const char *xmlschema, bool nulls, bool tableforest,
2675                                           const char *targetns, bool top_level)
2676 {
2677         StringInfo      result;
2678         char       *xmltn;
2679         uint64          i;
2680
2681         if (tablename)
2682                 xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
2683         else
2684                 xmltn = "table";
2685
2686         result = makeStringInfo();
2687
2688         SPI_connect();
2689         if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
2690                 ereport(ERROR,
2691                                 (errcode(ERRCODE_DATA_EXCEPTION),
2692                                  errmsg("invalid query")));
2693
2694         if (!tableforest)
2695         {
2696                 xmldata_root_element_start(result, xmltn, xmlschema,
2697                                                                    targetns, top_level);
2698                 appendStringInfoChar(result, '\n');
2699         }
2700
2701         if (xmlschema)
2702                 appendStringInfo(result, "%s\n\n", xmlschema);
2703
2704         for (i = 0; i < SPI_processed; i++)
2705                 SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
2706                                                                   tableforest, targetns, top_level);
2707
2708         if (!tableforest)
2709                 xmldata_root_element_end(result, xmltn);
2710
2711         SPI_finish();
2712
2713         return result;
2714 }
2715
2716
2717 Datum
2718 table_to_xmlschema(PG_FUNCTION_ARGS)
2719 {
2720         Oid                     relid = PG_GETARG_OID(0);
2721         bool            nulls = PG_GETARG_BOOL(1);
2722         bool            tableforest = PG_GETARG_BOOL(2);
2723         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2724         const char *result;
2725         Relation        rel;
2726
2727         rel = table_open(relid, AccessShareLock);
2728         result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2729                                                                                 tableforest, targetns);
2730         table_close(rel, NoLock);
2731
2732         PG_RETURN_XML_P(cstring_to_xmltype(result));
2733 }
2734
2735
2736 Datum
2737 query_to_xmlschema(PG_FUNCTION_ARGS)
2738 {
2739         char       *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2740         bool            nulls = PG_GETARG_BOOL(1);
2741         bool            tableforest = PG_GETARG_BOOL(2);
2742         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2743         const char *result;
2744         SPIPlanPtr      plan;
2745         Portal          portal;
2746
2747         SPI_connect();
2748
2749         if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2750                 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2751
2752         if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2753                 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2754
2755         result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2756                                                                                                         InvalidOid, nulls,
2757                                                                                                         tableforest, targetns));
2758         SPI_cursor_close(portal);
2759         SPI_finish();
2760
2761         PG_RETURN_XML_P(cstring_to_xmltype(result));
2762 }
2763
2764
2765 Datum
2766 cursor_to_xmlschema(PG_FUNCTION_ARGS)
2767 {
2768         char       *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2769         bool            nulls = PG_GETARG_BOOL(1);
2770         bool            tableforest = PG_GETARG_BOOL(2);
2771         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2772         const char *xmlschema;
2773         Portal          portal;
2774
2775         SPI_connect();
2776         portal = SPI_cursor_find(name);
2777         if (portal == NULL)
2778                 ereport(ERROR,
2779                                 (errcode(ERRCODE_UNDEFINED_CURSOR),
2780                                  errmsg("cursor \"%s\" does not exist", name)));
2781
2782         xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2783                                                                                                            InvalidOid, nulls,
2784                                                                                                            tableforest, targetns));
2785         SPI_finish();
2786
2787         PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
2788 }
2789
2790
2791 Datum
2792 table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2793 {
2794         Oid                     relid = PG_GETARG_OID(0);
2795         bool            nulls = PG_GETARG_BOOL(1);
2796         bool            tableforest = PG_GETARG_BOOL(2);
2797         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2798         Relation        rel;
2799         const char *xmlschema;
2800
2801         rel = table_open(relid, AccessShareLock);
2802         xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2803                                                                                    tableforest, targetns);
2804         table_close(rel, NoLock);
2805
2806         PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
2807                                                                                                                                 xmlschema, nulls, tableforest,
2808                                                                                                                                 targetns, true)));
2809 }
2810
2811
2812 Datum
2813 query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2814 {
2815         char       *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2816         bool            nulls = PG_GETARG_BOOL(1);
2817         bool            tableforest = PG_GETARG_BOOL(2);
2818         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2819
2820         const char *xmlschema;
2821         SPIPlanPtr      plan;
2822         Portal          portal;
2823
2824         SPI_connect();
2825
2826         if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2827                 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2828
2829         if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2830                 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2831
2832         xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2833                                                                                                            InvalidOid, nulls, tableforest, targetns));
2834         SPI_cursor_close(portal);
2835         SPI_finish();
2836
2837         PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2838                                                                                                                                 xmlschema, nulls, tableforest,
2839                                                                                                                                 targetns, true)));
2840 }
2841
2842
2843 /*
2844  * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
2845  * sections 9.13, 9.14.
2846  */
2847
2848 static StringInfo
2849 schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
2850                                            bool tableforest, const char *targetns, bool top_level)
2851 {
2852         StringInfo      result;
2853         char       *xmlsn;
2854         List       *relid_list;
2855         ListCell   *cell;
2856
2857         xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
2858                                                                                    true, false);
2859         result = makeStringInfo();
2860
2861         xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
2862         appendStringInfoChar(result, '\n');
2863
2864         if (xmlschema)
2865                 appendStringInfo(result, "%s\n\n", xmlschema);
2866
2867         SPI_connect();
2868
2869         relid_list = schema_get_xml_visible_tables(nspid);
2870
2871         foreach(cell, relid_list)
2872         {
2873                 Oid                     relid = lfirst_oid(cell);
2874                 StringInfo      subres;
2875
2876                 subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
2877                                                                            targetns, false);
2878
2879                 appendBinaryStringInfo(result, subres->data, subres->len);
2880                 appendStringInfoChar(result, '\n');
2881         }
2882
2883         SPI_finish();
2884
2885         xmldata_root_element_end(result, xmlsn);
2886
2887         return result;
2888 }
2889
2890
2891 Datum
2892 schema_to_xml(PG_FUNCTION_ARGS)
2893 {
2894         Name            name = PG_GETARG_NAME(0);
2895         bool            nulls = PG_GETARG_BOOL(1);
2896         bool            tableforest = PG_GETARG_BOOL(2);
2897         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2898
2899         char       *schemaname;
2900         Oid                     nspid;
2901
2902         schemaname = NameStr(*name);
2903         nspid = LookupExplicitNamespace(schemaname, false);
2904
2905         PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
2906                                                                                                                                  nulls, tableforest, targetns, true)));
2907 }
2908
2909
2910 /*
2911  * Write the start element of the root element of an XML Schema mapping.
2912  */
2913 static void
2914 xsd_schema_element_start(StringInfo result, const char *targetns)
2915 {
2916         appendStringInfoString(result,
2917                                                    "<xsd:schema\n"
2918                                                    "    xmlns:xsd=\"" NAMESPACE_XSD "\"");
2919         if (strlen(targetns) > 0)
2920                 appendStringInfo(result,
2921                                                  "\n"
2922                                                  "    targetNamespace=\"%s\"\n"
2923                                                  "    elementFormDefault=\"qualified\"",
2924                                                  targetns);
2925         appendStringInfoString(result,
2926                                                    ">\n\n");
2927 }
2928
2929
2930 static void
2931 xsd_schema_element_end(StringInfo result)
2932 {
2933         appendStringInfoString(result, "</xsd:schema>");
2934 }
2935
2936
2937 static StringInfo
2938 schema_to_xmlschema_internal(const char *schemaname, bool nulls,
2939                                                          bool tableforest, const char *targetns)
2940 {
2941         Oid                     nspid;
2942         List       *relid_list;
2943         List       *tupdesc_list;
2944         ListCell   *cell;
2945         StringInfo      result;
2946
2947         result = makeStringInfo();
2948
2949         nspid = LookupExplicitNamespace(schemaname, false);
2950
2951         xsd_schema_element_start(result, targetns);
2952
2953         SPI_connect();
2954
2955         relid_list = schema_get_xml_visible_tables(nspid);
2956
2957         tupdesc_list = NIL;
2958         foreach(cell, relid_list)
2959         {
2960                 Relation        rel;
2961
2962                 rel = table_open(lfirst_oid(cell), AccessShareLock);
2963                 tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
2964                 table_close(rel, NoLock);
2965         }
2966
2967         appendStringInfoString(result,
2968                                                    map_sql_typecoll_to_xmlschema_types(tupdesc_list));
2969
2970         appendStringInfoString(result,
2971                                                    map_sql_schema_to_xmlschema_types(nspid, relid_list,
2972                                                                                                                          nulls, tableforest, targetns));
2973
2974         xsd_schema_element_end(result);
2975
2976         SPI_finish();
2977
2978         return result;
2979 }
2980
2981
2982 Datum
2983 schema_to_xmlschema(PG_FUNCTION_ARGS)
2984 {
2985         Name            name = PG_GETARG_NAME(0);
2986         bool            nulls = PG_GETARG_BOOL(1);
2987         bool            tableforest = PG_GETARG_BOOL(2);
2988         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2989
2990         PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
2991                                                                                                                                            nulls, tableforest, targetns)));
2992 }
2993
2994
2995 Datum
2996 schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2997 {
2998         Name            name = PG_GETARG_NAME(0);
2999         bool            nulls = PG_GETARG_BOOL(1);
3000         bool            tableforest = PG_GETARG_BOOL(2);
3001         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3002         char       *schemaname;
3003         Oid                     nspid;
3004         StringInfo      xmlschema;
3005
3006         schemaname = NameStr(*name);
3007         nspid = LookupExplicitNamespace(schemaname, false);
3008
3009         xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
3010                                                                                          tableforest, targetns);
3011
3012         PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
3013                                                                                                                                  xmlschema->data, nulls,
3014                                                                                                                                  tableforest, targetns, true)));
3015 }
3016
3017
3018 /*
3019  * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
3020  * sections 9.16, 9.17.
3021  */
3022
3023 static StringInfo
3024 database_to_xml_internal(const char *xmlschema, bool nulls,
3025                                                  bool tableforest, const char *targetns)
3026 {
3027         StringInfo      result;
3028         List       *nspid_list;
3029         ListCell   *cell;
3030         char       *xmlcn;
3031
3032         xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
3033                                                                                    true, false);
3034         result = makeStringInfo();
3035
3036         xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
3037         appendStringInfoChar(result, '\n');
3038
3039         if (xmlschema)
3040                 appendStringInfo(result, "%s\n\n", xmlschema);
3041
3042         SPI_connect();
3043
3044         nspid_list = database_get_xml_visible_schemas();
3045
3046         foreach(cell, nspid_list)
3047         {
3048                 Oid                     nspid = lfirst_oid(cell);
3049                 StringInfo      subres;
3050
3051                 subres = schema_to_xml_internal(nspid, NULL, nulls,
3052                                                                                 tableforest, targetns, false);
3053
3054                 appendBinaryStringInfo(result, subres->data, subres->len);
3055                 appendStringInfoChar(result, '\n');
3056         }
3057
3058         SPI_finish();
3059
3060         xmldata_root_element_end(result, xmlcn);
3061
3062         return result;
3063 }
3064
3065
3066 Datum
3067 database_to_xml(PG_FUNCTION_ARGS)
3068 {
3069         bool            nulls = PG_GETARG_BOOL(0);
3070         bool            tableforest = PG_GETARG_BOOL(1);
3071         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3072
3073         PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
3074                                                                                                                                    tableforest, targetns)));
3075 }
3076
3077
3078 static StringInfo
3079 database_to_xmlschema_internal(bool nulls, bool tableforest,
3080                                                            const char *targetns)
3081 {
3082         List       *relid_list;
3083         List       *nspid_list;
3084         List       *tupdesc_list;
3085         ListCell   *cell;
3086         StringInfo      result;
3087
3088         result = makeStringInfo();
3089
3090         xsd_schema_element_start(result, targetns);
3091
3092         SPI_connect();
3093
3094         relid_list = database_get_xml_visible_tables();
3095         nspid_list = database_get_xml_visible_schemas();
3096
3097         tupdesc_list = NIL;
3098         foreach(cell, relid_list)
3099         {
3100                 Relation        rel;
3101
3102                 rel = table_open(lfirst_oid(cell), AccessShareLock);
3103                 tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3104                 table_close(rel, NoLock);
3105         }
3106
3107         appendStringInfoString(result,
3108                                                    map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3109
3110         appendStringInfoString(result,
3111                                                    map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
3112
3113         xsd_schema_element_end(result);
3114
3115         SPI_finish();
3116
3117         return result;
3118 }
3119
3120
3121 Datum
3122 database_to_xmlschema(PG_FUNCTION_ARGS)
3123 {
3124         bool            nulls = PG_GETARG_BOOL(0);
3125         bool            tableforest = PG_GETARG_BOOL(1);
3126         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3127
3128         PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
3129                                                                                                                                                  tableforest, targetns)));
3130 }
3131
3132
3133 Datum
3134 database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3135 {
3136         bool            nulls = PG_GETARG_BOOL(0);
3137         bool            tableforest = PG_GETARG_BOOL(1);
3138         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3139         StringInfo      xmlschema;
3140
3141         xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
3142
3143         PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
3144                                                                                                                                    nulls, tableforest, targetns)));
3145 }
3146
3147
3148 /*
3149  * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
3150  * 9.2.
3151  */
3152 static char *
3153 map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d)
3154 {
3155         StringInfoData result;
3156
3157         initStringInfo(&result);
3158
3159         if (a)
3160                 appendStringInfoString(&result,
3161                                                            map_sql_identifier_to_xml_name(a, true, true));
3162         if (b)
3163                 appendStringInfo(&result, ".%s",
3164                                                  map_sql_identifier_to_xml_name(b, true, true));
3165         if (c)
3166                 appendStringInfo(&result, ".%s",
3167                                                  map_sql_identifier_to_xml_name(c, true, true));
3168         if (d)
3169                 appendStringInfo(&result, ".%s",
3170                                                  map_sql_identifier_to_xml_name(d, true, true));
3171
3172         return result.data;
3173 }
3174
3175
3176 /*
3177  * Map an SQL table to an XML Schema document; see SQL/XML:2008
3178  * section 9.11.
3179  *
3180  * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3181  * 9.9.
3182  */
3183 static const char *
3184 map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
3185                                                    bool tableforest, const char *targetns)
3186 {
3187         int                     i;
3188         char       *xmltn;
3189         char       *tabletypename;
3190         char       *rowtypename;
3191         StringInfoData result;
3192
3193         initStringInfo(&result);
3194
3195         if (OidIsValid(relid))
3196         {
3197                 HeapTuple       tuple;
3198                 Form_pg_class reltuple;
3199
3200                 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
3201                 if (!HeapTupleIsValid(tuple))
3202                         elog(ERROR, "cache lookup failed for relation %u", relid);
3203                 reltuple = (Form_pg_class) GETSTRUCT(tuple);
3204
3205                 xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
3206                                                                                            true, false);
3207
3208                 tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
3209                                                                                                                                  get_database_name(MyDatabaseId),
3210                                                                                                                                  get_namespace_name(reltuple->relnamespace),
3211                                                                                                                                  NameStr(reltuple->relname));
3212
3213                 rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
3214                                                                                                                            get_database_name(MyDatabaseId),
3215                                                                                                                            get_namespace_name(reltuple->relnamespace),
3216                                                                                                                            NameStr(reltuple->relname));
3217
3218                 ReleaseSysCache(tuple);
3219         }
3220         else
3221         {
3222                 if (tableforest)
3223                         xmltn = "row";
3224                 else
3225                         xmltn = "table";
3226
3227                 tabletypename = "TableType";
3228                 rowtypename = "RowType";
3229         }
3230
3231         xsd_schema_element_start(&result, targetns);
3232
3233         appendStringInfoString(&result,
3234                                                    map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3235
3236         appendStringInfo(&result,
3237                                          "<xsd:complexType name=\"%s\">\n"
3238                                          "  <xsd:sequence>\n",
3239                                          rowtypename);
3240
3241         for (i = 0; i < tupdesc->natts; i++)
3242         {
3243                 Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3244
3245                 if (att->attisdropped)
3246                         continue;
3247                 appendStringInfo(&result,
3248                                                  "    <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3249                                                  map_sql_identifier_to_xml_name(NameStr(att->attname),
3250                                                                                                                 true, false),
3251                                                  map_sql_type_to_xml_name(att->atttypid, -1),
3252                                                  nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3253         }
3254
3255         appendStringInfoString(&result,
3256                                                    "  </xsd:sequence>\n"
3257                                                    "</xsd:complexType>\n\n");
3258
3259         if (!tableforest)
3260         {
3261                 appendStringInfo(&result,
3262                                                  "<xsd:complexType name=\"%s\">\n"
3263                                                  "  <xsd:sequence>\n"
3264                                                  "    <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3265                                                  "  </xsd:sequence>\n"
3266                                                  "</xsd:complexType>\n\n",
3267                                                  tabletypename, rowtypename);
3268
3269                 appendStringInfo(&result,
3270                                                  "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3271                                                  xmltn, tabletypename);
3272         }
3273         else
3274                 appendStringInfo(&result,
3275                                                  "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3276                                                  xmltn, rowtypename);
3277
3278         xsd_schema_element_end(&result);
3279
3280         return result.data;
3281 }
3282
3283
3284 /*
3285  * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3286  * section 9.12.
3287  */
3288 static const char *
3289 map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3290                                                                   bool tableforest, const char *targetns)
3291 {
3292         char       *dbname;
3293         char       *nspname;
3294         char       *xmlsn;
3295         char       *schematypename;
3296         StringInfoData result;
3297         ListCell   *cell;
3298
3299         dbname = get_database_name(MyDatabaseId);
3300         nspname = get_namespace_name(nspid);
3301
3302         initStringInfo(&result);
3303
3304         xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3305
3306         schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3307                                                                                                                           dbname,
3308                                                                                                                           nspname,
3309                                                                                                                           NULL);
3310
3311         appendStringInfo(&result,
3312                                          "<xsd:complexType name=\"%s\">\n", schematypename);
3313         if (!tableforest)
3314                 appendStringInfoString(&result,
3315                                                            "  <xsd:all>\n");
3316         else
3317                 appendStringInfoString(&result,
3318                                                            "  <xsd:sequence>\n");
3319
3320         foreach(cell, relid_list)
3321         {
3322                 Oid                     relid = lfirst_oid(cell);
3323                 char       *relname = get_rel_name(relid);
3324                 char       *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3325                 char       *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3326                                                                                                                                                          dbname,
3327                                                                                                                                                          nspname,
3328                                                                                                                                                          relname);
3329
3330                 if (!tableforest)
3331                         appendStringInfo(&result,
3332                                                          "    <xsd:element name=\"%s\" type=\"%s\"/>\n",
3333                                                          xmltn, tabletypename);
3334                 else
3335                         appendStringInfo(&result,
3336                                                          "    <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3337                                                          xmltn, tabletypename);
3338         }
3339
3340         if (!tableforest)
3341                 appendStringInfoString(&result,
3342                                                            "  </xsd:all>\n");
3343         else
3344                 appendStringInfoString(&result,
3345                                                            "  </xsd:sequence>\n");
3346         appendStringInfoString(&result,
3347                                                    "</xsd:complexType>\n\n");
3348
3349         appendStringInfo(&result,
3350                                          "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3351                                          xmlsn, schematypename);
3352
3353         return result.data;
3354 }
3355
3356
3357 /*
3358  * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3359  * section 9.15.
3360  */
3361 static const char *
3362 map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3363                                                                    bool tableforest, const char *targetns)
3364 {
3365         char       *dbname;
3366         char       *xmlcn;
3367         char       *catalogtypename;
3368         StringInfoData result;
3369         ListCell   *cell;
3370
3371         dbname = get_database_name(MyDatabaseId);
3372
3373         initStringInfo(&result);
3374
3375         xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3376
3377         catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3378                                                                                                                            dbname,
3379                                                                                                                            NULL,
3380                                                                                                                            NULL);
3381
3382         appendStringInfo(&result,
3383                                          "<xsd:complexType name=\"%s\">\n", catalogtypename);
3384         appendStringInfoString(&result,
3385                                                    "  <xsd:all>\n");
3386
3387         foreach(cell, nspid_list)
3388         {
3389                 Oid                     nspid = lfirst_oid(cell);
3390                 char       *nspname = get_namespace_name(nspid);
3391                 char       *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3392                 char       *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3393                                                                                                                                                           dbname,
3394                                                                                                                                                           nspname,
3395                                                                                                                                                           NULL);
3396
3397                 appendStringInfo(&result,
3398                                                  "    <xsd:element name=\"%s\" type=\"%s\"/>\n",
3399                                                  xmlsn, schematypename);
3400         }
3401
3402         appendStringInfoString(&result,
3403                                                    "  </xsd:all>\n");
3404         appendStringInfoString(&result,
3405                                                    "</xsd:complexType>\n\n");
3406
3407         appendStringInfo(&result,
3408                                          "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3409                                          xmlcn, catalogtypename);
3410
3411         return result.data;
3412 }
3413
3414
3415 /*
3416  * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3417  */
3418 static const char *
3419 map_sql_type_to_xml_name(Oid typeoid, int typmod)
3420 {
3421         StringInfoData result;
3422
3423         initStringInfo(&result);
3424
3425         switch (typeoid)
3426         {
3427                 case BPCHAROID:
3428                         if (typmod == -1)
3429                                 appendStringInfoString(&result, "CHAR");
3430                         else
3431                                 appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3432                         break;
3433                 case VARCHAROID:
3434                         if (typmod == -1)
3435                                 appendStringInfoString(&result, "VARCHAR");
3436                         else
3437                                 appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3438                         break;
3439                 case NUMERICOID:
3440                         if (typmod == -1)
3441                                 appendStringInfoString(&result, "NUMERIC");
3442                         else
3443                                 appendStringInfo(&result, "NUMERIC_%d_%d",
3444                                                                  ((typmod - VARHDRSZ) >> 16) & 0xffff,
3445                                                                  (typmod - VARHDRSZ) & 0xffff);
3446                         break;
3447                 case INT4OID:
3448                         appendStringInfoString(&result, "INTEGER");
3449                         break;
3450                 case INT2OID:
3451                         appendStringInfoString(&result, "SMALLINT");
3452                         break;
3453                 case INT8OID:
3454                         appendStringInfoString(&result, "BIGINT");
3455                         break;
3456                 case FLOAT4OID:
3457                         appendStringInfoString(&result, "REAL");
3458                         break;
3459                 case FLOAT8OID:
3460                         appendStringInfoString(&result, "DOUBLE");
3461                         break;
3462                 case BOOLOID:
3463                         appendStringInfoString(&result, "BOOLEAN");
3464                         break;
3465                 case TIMEOID:
3466                         if (typmod == -1)
3467                                 appendStringInfoString(&result, "TIME");
3468                         else
3469                                 appendStringInfo(&result, "TIME_%d", typmod);
3470                         break;
3471                 case TIMETZOID:
3472                         if (typmod == -1)
3473                                 appendStringInfoString(&result, "TIME_WTZ");
3474                         else
3475                                 appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3476                         break;
3477                 case TIMESTAMPOID:
3478                         if (typmod == -1)
3479                                 appendStringInfoString(&result, "TIMESTAMP");
3480                         else
3481                                 appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3482                         break;
3483                 case TIMESTAMPTZOID:
3484                         if (typmod == -1)
3485                                 appendStringInfoString(&result, "TIMESTAMP_WTZ");
3486                         else
3487                                 appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3488                         break;
3489                 case DATEOID:
3490                         appendStringInfoString(&result, "DATE");
3491                         break;
3492                 case XMLOID:
3493                         appendStringInfoString(&result, "XML");
3494                         break;
3495                 default:
3496                         {
3497                                 HeapTuple       tuple;
3498                                 Form_pg_type typtuple;
3499
3500                                 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3501                                 if (!HeapTupleIsValid(tuple))
3502                                         elog(ERROR, "cache lookup failed for type %u", typeoid);
3503                                 typtuple = (Form_pg_type) GETSTRUCT(tuple);
3504
3505                                 appendStringInfoString(&result,
3506                                                                            map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3507                                                                                                                                                                 get_database_name(MyDatabaseId),
3508                                                                                                                                                                 get_namespace_name(typtuple->typnamespace),
3509                                                                                                                                                                 NameStr(typtuple->typname)));
3510
3511                                 ReleaseSysCache(tuple);
3512                         }
3513         }
3514
3515         return result.data;
3516 }
3517
3518
3519 /*
3520  * Map a collection of SQL data types to XML Schema data types; see
3521  * SQL/XML:2008 section 9.7.
3522  */
3523 static const char *
3524 map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3525 {
3526         List       *uniquetypes = NIL;
3527         int                     i;
3528         StringInfoData result;
3529         ListCell   *cell0;
3530
3531         /* extract all column types used in the set of TupleDescs */
3532         foreach(cell0, tupdesc_list)
3533         {
3534                 TupleDesc       tupdesc = (TupleDesc) lfirst(cell0);
3535
3536                 for (i = 0; i < tupdesc->natts; i++)
3537                 {
3538                         Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3539
3540                         if (att->attisdropped)
3541                                 continue;
3542                         uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid);
3543                 }
3544         }
3545
3546         /* add base types of domains */
3547         foreach(cell0, uniquetypes)
3548         {
3549                 Oid                     typid = lfirst_oid(cell0);
3550                 Oid                     basetypid = getBaseType(typid);
3551
3552                 if (basetypid != typid)
3553                         uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3554         }
3555
3556         /* Convert to textual form */
3557         initStringInfo(&result);
3558
3559         foreach(cell0, uniquetypes)
3560         {
3561                 appendStringInfo(&result, "%s\n",
3562                                                  map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3563                                                                                                                 -1));
3564         }
3565
3566         return result.data;
3567 }
3568
3569
3570 /*
3571  * Map an SQL data type to a named XML Schema data type; see
3572  * SQL/XML:2008 sections 9.5 and 9.6.
3573  *
3574  * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3575  * a name attribute, which this function does.  The name-less version
3576  * 9.5 doesn't appear to be required anywhere.)
3577  */
3578 static const char *
3579 map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3580 {
3581         StringInfoData result;
3582         const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3583
3584         initStringInfo(&result);
3585
3586         if (typeoid == XMLOID)
3587         {
3588                 appendStringInfoString(&result,
3589                                                            "<xsd:complexType mixed=\"true\">\n"
3590                                                            "  <xsd:sequence>\n"
3591                                                            "    <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3592                                                            "  </xsd:sequence>\n"
3593                                                            "</xsd:complexType>\n");
3594         }
3595         else
3596         {
3597                 appendStringInfo(&result,
3598                                                  "<xsd:simpleType name=\"%s\">\n", typename);
3599
3600                 switch (typeoid)
3601                 {
3602                         case BPCHAROID:
3603                         case VARCHAROID:
3604                         case TEXTOID:
3605                                 appendStringInfoString(&result,
3606                                                                            "  <xsd:restriction base=\"xsd:string\">\n");
3607                                 if (typmod != -1)
3608                                         appendStringInfo(&result,
3609                                                                          "    <xsd:maxLength value=\"%d\"/>\n",
3610                                                                          typmod - VARHDRSZ);
3611                                 appendStringInfoString(&result, "  </xsd:restriction>\n");
3612                                 break;
3613
3614                         case BYTEAOID:
3615                                 appendStringInfo(&result,
3616                                                                  "  <xsd:restriction base=\"xsd:%s\">\n"
3617                                                                  "  </xsd:restriction>\n",
3618                                                                  xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3619                                 break;
3620
3621                         case NUMERICOID:
3622                                 if (typmod != -1)
3623                                         appendStringInfo(&result,
3624                                                                          "  <xsd:restriction base=\"xsd:decimal\">\n"
3625                                                                          "    <xsd:totalDigits value=\"%d\"/>\n"
3626                                                                          "    <xsd:fractionDigits value=\"%d\"/>\n"
3627                                                                          "  </xsd:restriction>\n",
3628                                                                          ((typmod - VARHDRSZ) >> 16) & 0xffff,
3629                                                                          (typmod - VARHDRSZ) & 0xffff);
3630                                 break;
3631
3632                         case INT2OID:
3633                                 appendStringInfo(&result,
3634                                                                  "  <xsd:restriction base=\"xsd:short\">\n"
3635                                                                  "    <xsd:maxInclusive value=\"%d\"/>\n"
3636                                                                  "    <xsd:minInclusive value=\"%d\"/>\n"
3637                                                                  "  </xsd:restriction>\n",
3638                                                                  SHRT_MAX, SHRT_MIN);
3639                                 break;
3640
3641                         case INT4OID:
3642                                 appendStringInfo(&result,
3643                                                                  "  <xsd:restriction base=\"xsd:int\">\n"
3644                                                                  "    <xsd:maxInclusive value=\"%d\"/>\n"
3645                                                                  "    <xsd:minInclusive value=\"%d\"/>\n"
3646                                                                  "  </xsd:restriction>\n",
3647                                                                  INT_MAX, INT_MIN);
3648                                 break;
3649
3650                         case INT8OID:
3651                                 appendStringInfo(&result,
3652                                                                  "  <xsd:restriction base=\"xsd:long\">\n"
3653                                                                  "    <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
3654                                                                  "    <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
3655                                                                  "  </xsd:restriction>\n",
3656                                                                  (((uint64) 1) << (sizeof(int64) * 8 - 1)) - 1,
3657                                                                  (((uint64) 1) << (sizeof(int64) * 8 - 1)));
3658                                 break;
3659
3660                         case FLOAT4OID:
3661                                 appendStringInfoString(&result,
3662                                                                            "  <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
3663                                 break;
3664
3665                         case FLOAT8OID:
3666                                 appendStringInfoString(&result,
3667                                                                            "  <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
3668                                 break;
3669
3670                         case BOOLOID:
3671                                 appendStringInfoString(&result,
3672                                                                            "  <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
3673                                 break;
3674
3675                         case TIMEOID:
3676                         case TIMETZOID:
3677                                 {
3678                                         const char *tz = (typeoid == TIMETZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3679
3680                                         if (typmod == -1)
3681                                                 appendStringInfo(&result,
3682                                                                                  "  <xsd:restriction base=\"xsd:time\">\n"
3683                                                                                  "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3684                                                                                  "  </xsd:restriction>\n", tz);
3685                                         else if (typmod == 0)
3686                                                 appendStringInfo(&result,
3687                                                                                  "  <xsd:restriction base=\"xsd:time\">\n"
3688                                                                                  "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3689                                                                                  "  </xsd:restriction>\n", tz);
3690                                         else
3691                                                 appendStringInfo(&result,
3692                                                                                  "  <xsd:restriction base=\"xsd:time\">\n"
3693                                                                                  "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3694                                                                                  "  </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3695                                         break;
3696                                 }
3697
3698                         case TIMESTAMPOID:
3699                         case TIMESTAMPTZOID:
3700                                 {
3701                                         const char *tz = (typeoid == TIMESTAMPTZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3702
3703                                         if (typmod == -1)
3704                                                 appendStringInfo(&result,
3705                                                                                  "  <xsd:restriction base=\"xsd:dateTime\">\n"
3706                                                                                  "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3707                                                                                  "  </xsd:restriction>\n", tz);
3708                                         else if (typmod == 0)
3709                                                 appendStringInfo(&result,
3710                                                                                  "  <xsd:restriction base=\"xsd:dateTime\">\n"
3711                                                                                  "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3712                                                                                  "  </xsd:restriction>\n", tz);
3713                                         else
3714                                                 appendStringInfo(&result,
3715                                                                                  "  <xsd:restriction base=\"xsd:dateTime\">\n"
3716                                                                                  "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3717                                                                                  "  </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3718                                         break;
3719                                 }
3720
3721                         case DATEOID:
3722                                 appendStringInfoString(&result,
3723                                                                            "  <xsd:restriction base=\"xsd:date\">\n"
3724                                                                            "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
3725                                                                            "  </xsd:restriction>\n");
3726                                 break;
3727
3728                         default:
3729                                 if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
3730                                 {
3731                                         Oid                     base_typeoid;
3732                                         int32           base_typmod = -1;
3733
3734                                         base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
3735
3736                                         appendStringInfo(&result,
3737                                                                          "  <xsd:restriction base=\"%s\"/>\n",
3738                                                                          map_sql_type_to_xml_name(base_typeoid, base_typmod));
3739                                 }
3740                                 break;
3741                 }
3742                 appendStringInfoString(&result, "</xsd:simpleType>\n");
3743         }
3744
3745         return result.data;
3746 }
3747
3748
3749 /*
3750  * Map an SQL row to an XML element, taking the row from the active
3751  * SPI cursor.  See also SQL/XML:2008 section 9.10.
3752  */
3753 static void
3754 SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
3755                                                   bool nulls, bool tableforest,
3756                                                   const char *targetns, bool top_level)
3757 {
3758         int                     i;
3759         char       *xmltn;
3760
3761         if (tablename)
3762                 xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3763         else
3764         {
3765                 if (tableforest)
3766                         xmltn = "row";
3767                 else
3768                         xmltn = "table";
3769         }
3770
3771         if (tableforest)
3772                 xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
3773         else
3774                 appendStringInfoString(result, "<row>\n");
3775
3776         for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
3777         {
3778                 char       *colname;
3779                 Datum           colval;
3780                 bool            isnull;
3781
3782                 colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
3783                                                                                                  true, false);
3784                 colval = SPI_getbinval(SPI_tuptable->vals[rownum],
3785                                                            SPI_tuptable->tupdesc,
3786                                                            i,
3787                                                            &isnull);
3788                 if (isnull)
3789                 {
3790                         if (nulls)
3791                                 appendStringInfo(result, "  <%s xsi:nil=\"true\"/>\n", colname);
3792                 }
3793                 else
3794                         appendStringInfo(result, "  <%s>%s</%s>\n",
3795                                                          colname,
3796                                                          map_sql_value_to_xml_value(colval,
3797                                                                                                                 SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
3798                                                          colname);
3799         }
3800
3801         if (tableforest)
3802         {
3803                 xmldata_root_element_end(result, xmltn);
3804                 appendStringInfoChar(result, '\n');
3805         }
3806         else
3807                 appendStringInfoString(result, "</row>\n\n");
3808 }
3809
3810
3811 /*
3812  * XPath related functions
3813  */
3814
3815 #ifdef USE_LIBXML
3816
3817 /*
3818  * Convert XML node to text.
3819  *
3820  * For attribute and text nodes, return the escaped text.  For anything else,
3821  * dump the whole subtree.
3822  */
3823 static text *
3824 xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
3825 {
3826         xmltype    *result;
3827
3828         if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE)
3829         {
3830                 void            (*volatile nodefree) (xmlNodePtr) = NULL;
3831                 volatile xmlBufferPtr buf = NULL;
3832                 volatile xmlNodePtr cur_copy = NULL;
3833
3834                 PG_TRY();
3835                 {
3836                         int                     bytes;
3837
3838                         buf = xmlBufferCreate();
3839                         if (buf == NULL || xmlerrcxt->err_occurred)
3840                                 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3841                                                         "could not allocate xmlBuffer");
3842
3843                         /*
3844                          * Produce a dump of the node that we can serialize.  xmlNodeDump
3845                          * does that, but the result of that function won't contain
3846                          * namespace definitions from ancestor nodes, so we first do a
3847                          * xmlCopyNode() which duplicates the node along with its required
3848                          * namespace definitions.
3849                          *
3850                          * Some old libxml2 versions such as 2.7.6 produce partially
3851                          * broken XML_DOCUMENT_NODE nodes (unset content field) when
3852                          * copying them.  xmlNodeDump of such a node works fine, but
3853                          * xmlFreeNode crashes; set us up to call xmlFreeDoc instead.
3854                          */
3855                         cur_copy = xmlCopyNode(cur, 1);
3856                         if (cur_copy == NULL || xmlerrcxt->err_occurred)
3857                                 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3858                                                         "could not copy node");
3859                         nodefree = (cur_copy->type == XML_DOCUMENT_NODE) ?
3860                                 (void (*) (xmlNodePtr)) xmlFreeDoc : xmlFreeNode;
3861
3862                         bytes = xmlNodeDump(buf, NULL, cur_copy, 0, 0);
3863                         if (bytes == -1 || xmlerrcxt->err_occurred)
3864                                 xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3865                                                         "could not dump node");
3866
3867                         result = xmlBuffer_to_xmltype(buf);
3868                 }
3869                 PG_CATCH();
3870                 {
3871                         if (nodefree)
3872                                 nodefree(cur_copy);
3873                         if (buf)
3874                                 xmlBufferFree(buf);
3875                         PG_RE_THROW();
3876                 }
3877                 PG_END_TRY();
3878
3879                 if (nodefree)
3880                         nodefree(cur_copy);
3881                 xmlBufferFree(buf);
3882         }
3883         else
3884         {
3885                 xmlChar    *str;
3886
3887                 str = xmlXPathCastNodeToString(cur);
3888                 PG_TRY();
3889                 {
3890                         /* Here we rely on XML having the same representation as TEXT */
3891                         char       *escaped = escape_xml((char *) str);
3892
3893                         result = (xmltype *) cstring_to_text(escaped);
3894                         pfree(escaped);
3895                 }
3896                 PG_CATCH();
3897                 {
3898                         xmlFree(str);
3899                         PG_RE_THROW();
3900                 }
3901                 PG_END_TRY();
3902                 xmlFree(str);
3903         }
3904
3905         return result;
3906 }
3907
3908 /*
3909  * Convert an XML XPath object (the result of evaluating an XPath expression)
3910  * to an array of xml values, which are appended to astate.  The function
3911  * result value is the number of elements in the array.
3912  *
3913  * If "astate" is NULL then we don't generate the array value, but we still
3914  * return the number of elements it would have had.
3915  *
3916  * Nodesets are converted to an array containing the nodes' textual
3917  * representations.  Primitive values (float, double, string) are converted
3918  * to a single-element array containing the value's string representation.
3919  */
3920 static int
3921 xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
3922                                            ArrayBuildState *astate,
3923                                            PgXmlErrorContext *xmlerrcxt)
3924 {
3925         int                     result = 0;
3926         Datum           datum;
3927         Oid                     datumtype;
3928         char       *result_str;
3929
3930         switch (xpathobj->type)
3931         {
3932                 case XPATH_NODESET:
3933                         if (xpathobj->nodesetval != NULL)
3934                         {
3935                                 result = xpathobj->nodesetval->nodeNr;
3936                                 if (astate != NULL)
3937                                 {
3938                                         int                     i;
3939
3940                                         for (i = 0; i < result; i++)
3941                                         {
3942                                                 datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
3943                                                                                                                                          xmlerrcxt));
3944                                                 (void) accumArrayResult(astate, datum, false,
3945                                                                                                 XMLOID, CurrentMemoryContext);
3946                                         }
3947                                 }
3948                         }
3949                         return result;
3950
3951                 case XPATH_BOOLEAN:
3952                         if (astate == NULL)
3953                                 return 1;
3954                         datum = BoolGetDatum(xpathobj->boolval);
3955                         datumtype = BOOLOID;
3956                         break;
3957
3958                 case XPATH_NUMBER:
3959                         if (astate == NULL)
3960                                 return 1;
3961                         datum = Float8GetDatum(xpathobj->floatval);
3962                         datumtype = FLOAT8OID;
3963                         break;
3964
3965                 case XPATH_STRING:
3966                         if (astate == NULL)
3967                                 return 1;
3968                         datum = CStringGetDatum((char *) xpathobj->stringval);
3969                         datumtype = CSTRINGOID;
3970                         break;
3971
3972                 default:
3973                         elog(ERROR, "xpath expression result type %d is unsupported",
3974                                  xpathobj->type);
3975                         return 0;                       /* keep compiler quiet */
3976         }
3977
3978         /* Common code for scalar-value cases */
3979         result_str = map_sql_value_to_xml_value(datum, datumtype, true);
3980         datum = PointerGetDatum(cstring_to_xmltype(result_str));
3981         (void) accumArrayResult(astate, datum, false,
3982                                                         XMLOID, CurrentMemoryContext);
3983         return 1;
3984 }
3985
3986
3987 /*
3988  * Common code for xpath() and xmlexists()
3989  *
3990  * Evaluate XPath expression and return number of nodes in res_nitems
3991  * and array of XML values in astate.  Either of those pointers can be
3992  * NULL if the corresponding result isn't wanted.
3993  *
3994  * It is up to the user to ensure that the XML passed is in fact
3995  * an XML document - XPath doesn't work easily on fragments without
3996  * a context node being known.
3997  */
3998 static void
3999 xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
4000                            int *res_nitems, ArrayBuildState *astate)
4001 {
4002         PgXmlErrorContext *xmlerrcxt;
4003         volatile xmlParserCtxtPtr ctxt = NULL;
4004         volatile xmlDocPtr doc = NULL;
4005         volatile xmlXPathContextPtr xpathctx = NULL;
4006         volatile xmlXPathCompExprPtr xpathcomp = NULL;
4007         volatile xmlXPathObjectPtr xpathobj = NULL;
4008         char       *datastr;
4009         int32           len;
4010         int32           xpath_len;
4011         xmlChar    *string;
4012         xmlChar    *xpath_expr;
4013         size_t          xmldecl_len = 0;
4014         int                     i;
4015         int                     ndim;
4016         Datum      *ns_names_uris;
4017         bool       *ns_names_uris_nulls;
4018         int                     ns_count;
4019
4020         /*
4021          * Namespace mappings are passed as text[].  If an empty array is passed
4022          * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
4023          * Else, a 2-dimensional array with length of the second axis being equal
4024          * to 2 should be passed, i.e., every subarray contains 2 elements, the
4025          * first element defining the name, the second one the URI.  Example:
4026          * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
4027          * 'http://example2.com']].
4028          */
4029         ndim = namespaces ? ARR_NDIM(namespaces) : 0;
4030         if (ndim != 0)
4031         {
4032                 int                *dims;
4033
4034                 dims = ARR_DIMS(namespaces);
4035
4036                 if (ndim != 2 || dims[1] != 2)
4037                         ereport(ERROR,
4038                                         (errcode(ERRCODE_DATA_EXCEPTION),
4039                                          errmsg("invalid array for XML namespace mapping"),
4040                                          errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
4041
4042                 Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
4043
4044                 deconstruct_array(namespaces, TEXTOID, -1, false, 'i',
4045                                                   &ns_names_uris, &ns_names_uris_nulls,
4046                                                   &ns_count);
4047
4048                 Assert((ns_count % 2) == 0);    /* checked above */
4049                 ns_count /= 2;                  /* count pairs only */
4050         }
4051         else
4052         {
4053                 ns_names_uris = NULL;
4054                 ns_names_uris_nulls = NULL;
4055                 ns_count = 0;
4056         }
4057
4058         datastr = VARDATA(data);
4059         len = VARSIZE(data) - VARHDRSZ;
4060         xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text);
4061         if (xpath_len == 0)
4062                 ereport(ERROR,
4063                                 (errcode(ERRCODE_DATA_EXCEPTION),
4064                                  errmsg("empty XPath expression")));
4065
4066         string = pg_xmlCharStrndup(datastr, len);
4067         xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
4068
4069         /*
4070          * In a UTF8 database, skip any xml declaration, which might assert
4071          * another encoding.  Ignore parse_xml_decl() failure, letting
4072          * xmlCtxtReadMemory() report parse errors.  Documentation disclaims
4073          * xpath() support for non-ASCII data in non-UTF8 databases, so leave
4074          * those scenarios bug-compatible with historical behavior.
4075          */
4076         if (GetDatabaseEncoding() == PG_UTF8)
4077                 parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
4078
4079         xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4080
4081         PG_TRY();
4082         {
4083                 xmlInitParser();
4084
4085                 /*
4086                  * redundant XML parsing (two parsings for the same value during one
4087                  * command execution are possible)
4088                  */
4089                 ctxt = xmlNewParserCtxt();
4090                 if (ctxt == NULL || xmlerrcxt->err_occurred)
4091                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4092                                                 "could not allocate parser context");
4093                 doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
4094                                                                 len - xmldecl_len, NULL, NULL, 0);
4095                 if (doc == NULL || xmlerrcxt->err_occurred)
4096                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4097                                                 "could not parse XML document");
4098                 xpathctx = xmlXPathNewContext(doc);
4099                 if (xpathctx == NULL || xmlerrcxt->err_occurred)
4100                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4101                                                 "could not allocate XPath context");
4102                 xpathctx->node = (xmlNodePtr) doc;
4103
4104                 /* register namespaces, if any */
4105                 if (ns_count > 0)
4106                 {
4107                         for (i = 0; i < ns_count; i++)
4108                         {
4109                                 char       *ns_name;
4110                                 char       *ns_uri;
4111
4112                                 if (ns_names_uris_nulls[i * 2] ||
4113                                         ns_names_uris_nulls[i * 2 + 1])
4114                                         ereport(ERROR,
4115                                                         (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4116                                                          errmsg("neither namespace name nor URI may be null")));
4117                                 ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
4118                                 ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
4119                                 if (xmlXPathRegisterNs(xpathctx,
4120                                                                            (xmlChar *) ns_name,
4121                                                                            (xmlChar *) ns_uri) != 0)
4122                                         ereport(ERROR,  /* is this an internal error??? */
4123                                                         (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
4124                                                                         ns_name, ns_uri)));
4125                         }
4126                 }
4127
4128                 xpathcomp = xmlXPathCompile(xpath_expr);
4129                 if (xpathcomp == NULL || xmlerrcxt->err_occurred)
4130                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4131                                                 "invalid XPath expression");
4132
4133                 /*
4134                  * Version 2.6.27 introduces a function named
4135                  * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
4136                  * but we can derive the existence by whether any nodes are returned,
4137                  * thereby preventing a library version upgrade and keeping the code
4138                  * the same.
4139                  */
4140                 xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
4141                 if (xpathobj == NULL || xmlerrcxt->err_occurred)
4142                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4143                                                 "could not create XPath object");
4144
4145                 /*
4146                  * Extract the results as requested.
4147                  */
4148                 if (res_nitems != NULL)
4149                         *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4150                 else
4151                         (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4152         }
4153         PG_CATCH();
4154         {
4155                 if (xpathobj)
4156                         xmlXPathFreeObject(xpathobj);
4157                 if (xpathcomp)
4158                         xmlXPathFreeCompExpr(xpathcomp);
4159                 if (xpathctx)
4160                         xmlXPathFreeContext(xpathctx);
4161                 if (doc)
4162                         xmlFreeDoc(doc);
4163                 if (ctxt)
4164                         xmlFreeParserCtxt(ctxt);
4165
4166                 pg_xml_done(xmlerrcxt, true);
4167
4168                 PG_RE_THROW();
4169         }
4170         PG_END_TRY();
4171
4172         xmlXPathFreeObject(xpathobj);
4173         xmlXPathFreeCompExpr(xpathcomp);
4174         xmlXPathFreeContext(xpathctx);
4175         xmlFreeDoc(doc);
4176         xmlFreeParserCtxt(ctxt);
4177
4178         pg_xml_done(xmlerrcxt, false);
4179 }
4180 #endif                                                  /* USE_LIBXML */
4181
4182 /*
4183  * Evaluate XPath expression and return array of XML values.
4184  *
4185  * As we have no support of XQuery sequences yet, this function seems
4186  * to be the most useful one (array of XML functions plays a role of
4187  * some kind of substitution for XQuery sequences).
4188  */
4189 Datum
4190 xpath(PG_FUNCTION_ARGS)
4191 {
4192 #ifdef USE_LIBXML
4193         text       *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4194         xmltype    *data = PG_GETARG_XML_P(1);
4195         ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4196         ArrayBuildState *astate;
4197
4198         astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
4199         xpath_internal(xpath_expr_text, data, namespaces,
4200                                    NULL, astate);
4201         PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
4202 #else
4203         NO_XML_SUPPORT();
4204         return 0;
4205 #endif
4206 }
4207
4208 /*
4209  * Determines if the node specified by the supplied XPath exists
4210  * in a given XML document, returning a boolean.
4211  */
4212 Datum
4213 xmlexists(PG_FUNCTION_ARGS)
4214 {
4215 #ifdef USE_LIBXML
4216         text       *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4217         xmltype    *data = PG_GETARG_XML_P(1);
4218         int                     res_nitems;
4219
4220         xpath_internal(xpath_expr_text, data, NULL,
4221                                    &res_nitems, NULL);
4222
4223         PG_RETURN_BOOL(res_nitems > 0);
4224 #else
4225         NO_XML_SUPPORT();
4226         return 0;
4227 #endif
4228 }
4229
4230 /*
4231  * Determines if the node specified by the supplied XPath exists
4232  * in a given XML document, returning a boolean. Differs from
4233  * xmlexists as it supports namespaces and is not defined in SQL/XML.
4234  */
4235 Datum
4236 xpath_exists(PG_FUNCTION_ARGS)
4237 {
4238 #ifdef USE_LIBXML
4239         text       *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4240         xmltype    *data = PG_GETARG_XML_P(1);
4241         ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4242         int                     res_nitems;
4243
4244         xpath_internal(xpath_expr_text, data, namespaces,
4245                                    &res_nitems, NULL);
4246
4247         PG_RETURN_BOOL(res_nitems > 0);
4248 #else
4249         NO_XML_SUPPORT();
4250         return 0;
4251 #endif
4252 }
4253
4254 /*
4255  * Functions for checking well-formed-ness
4256  */
4257
4258 #ifdef USE_LIBXML
4259 static bool
4260 wellformed_xml(text *data, XmlOptionType xmloption_arg)
4261 {
4262         bool            result;
4263         volatile xmlDocPtr doc = NULL;
4264
4265         /* We want to catch any exceptions and return false */
4266         PG_TRY();
4267         {
4268                 doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding());
4269                 result = true;
4270         }
4271         PG_CATCH();
4272         {
4273                 FlushErrorState();
4274                 result = false;
4275         }
4276         PG_END_TRY();
4277
4278         if (doc)
4279                 xmlFreeDoc(doc);
4280
4281         return result;
4282 }
4283 #endif
4284
4285 Datum
4286 xml_is_well_formed(PG_FUNCTION_ARGS)
4287 {
4288 #ifdef USE_LIBXML
4289         text       *data = PG_GETARG_TEXT_PP(0);
4290
4291         PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4292 #else
4293         NO_XML_SUPPORT();
4294         return 0;
4295 #endif                                                  /* not USE_LIBXML */
4296 }
4297
4298 Datum
4299 xml_is_well_formed_document(PG_FUNCTION_ARGS)
4300 {
4301 #ifdef USE_LIBXML
4302         text       *data = PG_GETARG_TEXT_PP(0);
4303
4304         PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4305 #else
4306         NO_XML_SUPPORT();
4307         return 0;
4308 #endif                                                  /* not USE_LIBXML */
4309 }
4310
4311 Datum
4312 xml_is_well_formed_content(PG_FUNCTION_ARGS)
4313 {
4314 #ifdef USE_LIBXML
4315         text       *data = PG_GETARG_TEXT_PP(0);
4316
4317         PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4318 #else
4319         NO_XML_SUPPORT();
4320         return 0;
4321 #endif                                                  /* not USE_LIBXML */
4322 }
4323
4324 /*
4325  * support functions for XMLTABLE
4326  *
4327  */
4328 #ifdef USE_LIBXML
4329
4330 /*
4331  * Returns private data from executor state. Ensure validity by check with
4332  * MAGIC number.
4333  */
4334 static inline XmlTableBuilderData *
4335 GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4336 {
4337         XmlTableBuilderData *result;
4338
4339         if (!IsA(state, TableFuncScanState))
4340                 elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4341         result = (XmlTableBuilderData *) state->opaque;
4342         if (result->magic != XMLTABLE_CONTEXT_MAGIC)
4343                 elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4344
4345         return result;
4346 }
4347 #endif
4348
4349 /*
4350  * XmlTableInitOpaque
4351  *              Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4352  *              the XML parser.
4353  *
4354  * Note: Because we call pg_xml_init() here and pg_xml_done() in
4355  * XmlTableDestroyOpaque, it is critical for robustness that no other
4356  * executor nodes run until this node is processed to completion.  Caller
4357  * must execute this to completion (probably filling a tuplestore to exhaust
4358  * this node in a single pass) instead of using row-per-call mode.
4359  */
4360 static void
4361 XmlTableInitOpaque(TableFuncScanState *state, int natts)
4362 {
4363 #ifdef USE_LIBXML
4364         volatile xmlParserCtxtPtr ctxt = NULL;
4365         XmlTableBuilderData *xtCxt;
4366         PgXmlErrorContext *xmlerrcxt;
4367
4368         xtCxt = palloc0(sizeof(XmlTableBuilderData));
4369         xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4370         xtCxt->natts = natts;
4371         xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
4372
4373         xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4374
4375         PG_TRY();
4376         {
4377                 xmlInitParser();
4378
4379                 ctxt = xmlNewParserCtxt();
4380                 if (ctxt == NULL || xmlerrcxt->err_occurred)
4381                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4382                                                 "could not allocate parser context");
4383         }
4384         PG_CATCH();
4385         {
4386                 if (ctxt != NULL)
4387                         xmlFreeParserCtxt(ctxt);
4388
4389                 pg_xml_done(xmlerrcxt, true);
4390
4391                 PG_RE_THROW();
4392         }
4393         PG_END_TRY();
4394
4395         xtCxt->xmlerrcxt = xmlerrcxt;
4396         xtCxt->ctxt = ctxt;
4397
4398         state->opaque = xtCxt;
4399 #else
4400         NO_XML_SUPPORT();
4401 #endif                                                  /* not USE_LIBXML */
4402 }
4403
4404 /*
4405  * XmlTableSetDocument
4406  *              Install the input document
4407  */
4408 static void
4409 XmlTableSetDocument(TableFuncScanState *state, Datum value)
4410 {
4411 #ifdef USE_LIBXML
4412         XmlTableBuilderData *xtCxt;
4413         xmltype    *xmlval = DatumGetXmlP(value);
4414         char       *str;
4415         xmlChar    *xstr;
4416         int                     length;
4417         volatile xmlDocPtr doc = NULL;
4418         volatile xmlXPathContextPtr xpathcxt = NULL;
4419
4420         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
4421
4422         /*
4423          * Use out function for casting to string (remove encoding property). See
4424          * comment in xml_out.
4425          */
4426         str = xml_out_internal(xmlval, 0);
4427
4428         length = strlen(str);
4429         xstr = pg_xmlCharStrndup(str, length);
4430
4431         PG_TRY();
4432         {
4433                 doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4434                 if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
4435                         xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4436                                                 "could not parse XML document");
4437                 xpathcxt = xmlXPathNewContext(doc);
4438                 if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
4439                         xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4440                                                 "could not allocate XPath context");
4441                 xpathcxt->node = (xmlNodePtr) doc;
4442         }
4443         PG_CATCH();
4444         {
4445                 if (xpathcxt != NULL)
4446                         xmlXPathFreeContext(xpathcxt);
4447                 if (doc != NULL)
4448                         xmlFreeDoc(doc);
4449
4450                 PG_RE_THROW();
4451         }
4452         PG_END_TRY();
4453
4454         xtCxt->doc = doc;
4455         xtCxt->xpathcxt = xpathcxt;
4456 #else
4457         NO_XML_SUPPORT();
4458 #endif                                                  /* not USE_LIBXML */
4459 }
4460
4461 /*
4462  * XmlTableSetNamespace
4463  *              Add a namespace declaration
4464  */
4465 static void
4466 XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri)
4467 {
4468 #ifdef USE_LIBXML
4469         XmlTableBuilderData *xtCxt;
4470
4471         if (name == NULL)
4472                 ereport(ERROR,
4473                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4474                                  errmsg("DEFAULT namespace is not supported")));
4475         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4476
4477         if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4478                                                    pg_xmlCharStrndup(name, strlen(name)),
4479                                                    pg_xmlCharStrndup(uri, strlen(uri))))
4480                 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4481                                         "could not set XML namespace");
4482 #else
4483         NO_XML_SUPPORT();
4484 #endif                                                  /* not USE_LIBXML */
4485 }
4486
4487 /*
4488  * XmlTableSetRowFilter
4489  *              Install the row-filter Xpath expression.
4490  */
4491 static void
4492 XmlTableSetRowFilter(TableFuncScanState *state, const char *path)
4493 {
4494 #ifdef USE_LIBXML
4495         XmlTableBuilderData *xtCxt;
4496         xmlChar    *xstr;
4497
4498         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4499
4500         if (*path == '\0')
4501                 ereport(ERROR,
4502                                 (errcode(ERRCODE_DATA_EXCEPTION),
4503                                  errmsg("row path filter must not be empty string")));
4504
4505         xstr = pg_xmlCharStrndup(path, strlen(path));
4506
4507         xtCxt->xpathcomp = xmlXPathCompile(xstr);
4508         if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
4509                 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_SYNTAX_ERROR,
4510                                         "invalid XPath expression");
4511 #else
4512         NO_XML_SUPPORT();
4513 #endif                                                  /* not USE_LIBXML */
4514 }
4515
4516 /*
4517  * XmlTableSetColumnFilter
4518  *              Install the column-filter Xpath expression, for the given column.
4519  */
4520 static void
4521 XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum)
4522 {
4523 #ifdef USE_LIBXML
4524         XmlTableBuilderData *xtCxt;
4525         xmlChar    *xstr;
4526
4527         AssertArg(PointerIsValid(path));
4528
4529         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4530
4531         if (*path == '\0')
4532                 ereport(ERROR,
4533                                 (errcode(ERRCODE_DATA_EXCEPTION),
4534                                  errmsg("column path filter must not be empty string")));
4535
4536         xstr = pg_xmlCharStrndup(path, strlen(path));
4537
4538         xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr);
4539         if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
4540                 xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4541                                         "invalid XPath expression");
4542 #else
4543         NO_XML_SUPPORT();
4544 #endif                                                  /* not USE_LIBXML */
4545 }
4546
4547 /*
4548  * XmlTableFetchRow
4549  *              Prepare the next "current" tuple for upcoming GetValue calls.
4550  *              Returns false if the row-filter expression returned no more rows.
4551  */
4552 static bool
4553 XmlTableFetchRow(TableFuncScanState *state)
4554 {
4555 #ifdef USE_LIBXML
4556         XmlTableBuilderData *xtCxt;
4557
4558         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4559
4560         /*
4561          * XmlTable returns table - set of composite values. The error context, is
4562          * used for producement more values, between two calls, there can be
4563          * created and used another libxml2 error context. It is libxml2 global
4564          * value, so it should be refreshed any time before any libxml2 usage,
4565          * that is finished by returning some value.
4566          */
4567         xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4568
4569         if (xtCxt->xpathobj == NULL)
4570         {
4571                 xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4572                 if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4573                         xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4574                                                 "could not create XPath object");
4575
4576                 xtCxt->row_count = 0;
4577         }
4578
4579         if (xtCxt->xpathobj->type == XPATH_NODESET)
4580         {
4581                 if (xtCxt->xpathobj->nodesetval != NULL)
4582                 {
4583                         if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
4584                                 return true;
4585                 }
4586         }
4587
4588         return false;
4589 #else
4590         NO_XML_SUPPORT();
4591         return false;
4592 #endif                                                  /* not USE_LIBXML */
4593 }
4594
4595 /*
4596  * XmlTableGetValue
4597  *              Return the value for column number 'colnum' for the current row.  If
4598  *              column -1 is requested, return representation of the whole row.
4599  *
4600  * This leaks memory, so be sure to reset often the context in which it's
4601  * called.
4602  */
4603 static Datum
4604 XmlTableGetValue(TableFuncScanState *state, int colnum,
4605                                  Oid typid, int32 typmod, bool *isnull)
4606 {
4607 #ifdef USE_LIBXML
4608         XmlTableBuilderData *xtCxt;
4609         Datum           result = (Datum) 0;
4610         xmlNodePtr      cur;
4611         char       *cstr = NULL;
4612         volatile xmlXPathObjectPtr xpathobj = NULL;
4613
4614         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4615
4616         Assert(xtCxt->xpathobj &&
4617                    xtCxt->xpathobj->type == XPATH_NODESET &&
4618                    xtCxt->xpathobj->nodesetval != NULL);
4619
4620         /* Propagate context related error context to libxml2 */
4621         xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4622
4623         *isnull = false;
4624
4625         cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
4626
4627         Assert(xtCxt->xpathscomp[colnum] != NULL);
4628
4629         PG_TRY();
4630         {
4631                 /* Set current node as entry point for XPath evaluation */
4632                 xtCxt->xpathcxt->node = cur;
4633
4634                 /* Evaluate column path */
4635                 xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
4636                 if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4637                         xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4638                                                 "could not create XPath object");
4639
4640                 /*
4641                  * There are four possible cases, depending on the number of nodes
4642                  * returned by the XPath expression and the type of the target column:
4643                  * a) XPath returns no nodes.  b) The target type is XML (return all
4644                  * as XML).  For non-XML return types:  c) One node (return content).
4645                  * d) Multiple nodes (error).
4646                  */
4647                 if (xpathobj->type == XPATH_NODESET)
4648                 {
4649                         int                     count = 0;
4650
4651                         if (xpathobj->nodesetval != NULL)
4652                                 count = xpathobj->nodesetval->nodeNr;
4653
4654                         if (xpathobj->nodesetval == NULL || count == 0)
4655                         {
4656                                 *isnull = true;
4657                         }
4658                         else
4659                         {
4660                                 if (typid == XMLOID)
4661                                 {
4662                                         text       *textstr;
4663                                         StringInfoData str;
4664
4665                                         /* Concatenate serialized values */
4666                                         initStringInfo(&str);
4667                                         for (int i = 0; i < count; i++)
4668                                         {
4669                                                 textstr =
4670                                                         xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4671                                                                                                  xtCxt->xmlerrcxt);
4672
4673                                                 appendStringInfoText(&str, textstr);
4674                                         }
4675                                         cstr = str.data;
4676                                 }
4677                                 else
4678                                 {
4679                                         xmlChar    *str;
4680
4681                                         if (count > 1)
4682                                                 ereport(ERROR,
4683                                                                 (errcode(ERRCODE_CARDINALITY_VIOLATION),
4684                                                                  errmsg("more than one value returned by column XPath expression")));
4685
4686                                         str = xmlXPathCastNodeSetToString(xpathobj->nodesetval);
4687                                         cstr = str ? xml_pstrdup_and_free(str) : "";
4688                                 }
4689                         }
4690                 }
4691                 else if (xpathobj->type == XPATH_STRING)
4692                 {
4693                         /* Content should be escaped when target will be XML */
4694                         if (typid == XMLOID)
4695                                 cstr = escape_xml((char *) xpathobj->stringval);
4696                         else
4697                                 cstr = (char *) xpathobj->stringval;
4698                 }
4699                 else if (xpathobj->type == XPATH_BOOLEAN)
4700                 {
4701                         char            typcategory;
4702                         bool            typispreferred;
4703                         xmlChar    *str;
4704
4705                         /* Allow implicit casting from boolean to numbers */
4706                         get_type_category_preferred(typid, &typcategory, &typispreferred);
4707
4708                         if (typcategory != TYPCATEGORY_NUMERIC)
4709                                 str = xmlXPathCastBooleanToString(xpathobj->boolval);
4710                         else
4711                                 str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval));
4712
4713                         cstr = xml_pstrdup_and_free(str);
4714                 }
4715                 else if (xpathobj->type == XPATH_NUMBER)
4716                 {
4717                         xmlChar    *str;
4718
4719                         str = xmlXPathCastNumberToString(xpathobj->floatval);
4720                         cstr = xml_pstrdup_and_free(str);
4721                 }
4722                 else
4723                         elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
4724
4725                 /*
4726                  * By here, either cstr contains the result value, or the isnull flag
4727                  * has been set.
4728                  */
4729                 Assert(cstr || *isnull);
4730
4731                 if (!*isnull)
4732                         result = InputFunctionCall(&state->in_functions[colnum],
4733                                                                            cstr,
4734                                                                            state->typioparams[colnum],
4735                                                                            typmod);
4736         }
4737         PG_CATCH();
4738         {
4739                 if (xpathobj != NULL)
4740                         xmlXPathFreeObject(xpathobj);
4741                 PG_RE_THROW();
4742         }
4743         PG_END_TRY();
4744
4745         xmlXPathFreeObject(xpathobj);
4746
4747         return result;
4748 #else
4749         NO_XML_SUPPORT();
4750         return 0;
4751 #endif                                                  /* not USE_LIBXML */
4752 }
4753
4754 /*
4755  * XmlTableDestroyOpaque
4756  *              Release all libxml2 resources
4757  */
4758 static void
4759 XmlTableDestroyOpaque(TableFuncScanState *state)
4760 {
4761 #ifdef USE_LIBXML
4762         XmlTableBuilderData *xtCxt;
4763
4764         xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
4765
4766         /* Propagate context related error context to libxml2 */
4767         xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4768
4769         if (xtCxt->xpathscomp != NULL)
4770         {
4771                 int                     i;
4772
4773                 for (i = 0; i < xtCxt->natts; i++)
4774                         if (xtCxt->xpathscomp[i] != NULL)
4775                                 xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
4776         }
4777
4778         if (xtCxt->xpathobj != NULL)
4779                 xmlXPathFreeObject(xtCxt->xpathobj);
4780         if (xtCxt->xpathcomp != NULL)
4781                 xmlXPathFreeCompExpr(xtCxt->xpathcomp);
4782         if (xtCxt->xpathcxt != NULL)
4783                 xmlXPathFreeContext(xtCxt->xpathcxt);
4784         if (xtCxt->doc != NULL)
4785                 xmlFreeDoc(xtCxt->doc);
4786         if (xtCxt->ctxt != NULL)
4787                 xmlFreeParserCtxt(xtCxt->ctxt);
4788
4789         pg_xml_done(xtCxt->xmlerrcxt, true);
4790
4791         /* not valid anymore */
4792         xtCxt->magic = 0;
4793         state->opaque = NULL;
4794
4795 #else
4796         NO_XML_SUPPORT();
4797 #endif                                                  /* not USE_LIBXML */
4798 }