]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/xml.c
Check to see whether libxml2 handles error context the way we expect.
[postgresql] / src / backend / utils / adt / xml.c
1 /*-------------------------------------------------------------------------
2  *
3  * xml.c
4  *        XML data type support.
5  *
6  *
7  * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/backend/utils/adt/xml.c
11  *
12  *-------------------------------------------------------------------------
13  */
14
15 /*
16  * Generally, XML type support is only available when libxml use was
17  * configured during the build.  But even if that is not done, the
18  * type and all the functions are available, but most of them will
19  * fail.  For one thing, this avoids having to manage variant catalog
20  * installations.  But it also has nice effects such as that you can
21  * dump a database containing XML type data even if the server is not
22  * linked with libxml.  Thus, make sure xml_out() works even if nothing
23  * else does.
24  */
25
26 /*
27  * Notes on memory management:
28  *
29  * Sometimes libxml allocates global structures in the hope that it can reuse
30  * them later on.  This makes it impractical to change the xmlMemSetup
31  * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32  * allocated with malloc() or vice versa.  Since libxml might be used by
33  * loadable modules, eg libperl, our only safe choices are to change the
34  * functions at postmaster/backend launch or not at all.  Since we'd rather
35  * not activate libxml in sessions that might never use it, the latter choice
36  * is the preferred one.  However, for debugging purposes it can be awfully
37  * handy to constrain libxml's allocations to be done in a specific palloc
38  * context, where they're easy to track.  Therefore there is code here that
39  * can be enabled in debug builds to redirect libxml's allocations into a
40  * special context LibxmlContext.  It's not recommended to turn this on in
41  * a production build because of the possibility of bad interactions with
42  * external modules.
43  */
44 /* #define USE_LIBXMLCONTEXT */
45
46 #include "postgres.h"
47
48 #ifdef USE_LIBXML
49 #include <libxml/chvalid.h>
50 #include <libxml/parser.h>
51 #include <libxml/tree.h>
52 #include <libxml/uri.h>
53 #include <libxml/xmlerror.h>
54 #include <libxml/xmlwriter.h>
55 #include <libxml/xpath.h>
56 #include <libxml/xpathInternals.h>
57 #endif   /* USE_LIBXML */
58
59 #include "catalog/namespace.h"
60 #include "catalog/pg_type.h"
61 #include "commands/dbcommands.h"
62 #include "executor/executor.h"
63 #include "executor/spi.h"
64 #include "fmgr.h"
65 #include "lib/stringinfo.h"
66 #include "libpq/pqformat.h"
67 #include "mb/pg_wchar.h"
68 #include "miscadmin.h"
69 #include "nodes/execnodes.h"
70 #include "nodes/nodeFuncs.h"
71 #include "utils/array.h"
72 #include "utils/builtins.h"
73 #include "utils/date.h"
74 #include "utils/datetime.h"
75 #include "utils/lsyscache.h"
76 #include "utils/memutils.h"
77 #include "utils/rel.h"
78 #include "utils/syscache.h"
79 #include "utils/xml.h"
80
81
82 /* GUC variables */
83 int                     xmlbinary;
84 int                     xmloption;
85
86 #ifdef USE_LIBXML
87
88 /* random number to identify PgXmlErrorContext */
89 #define ERRCXT_MAGIC    68275028
90
91 struct PgXmlErrorContext
92 {
93         int                     magic;
94         /* strictness argument passed to pg_xml_init */
95         PgXmlStrictness strictness;
96         /* current error status and accumulated message, if any */
97         bool            err_occurred;
98         StringInfoData err_buf;
99         /* previous libxml error handling state (saved by pg_xml_init) */
100         xmlStructuredErrorFunc saved_errfunc;
101         void       *saved_errcxt;
102 };
103
104 static void xml_errorHandler(void *data, xmlErrorPtr error);
105 static void xml_ereport_by_code(int level, int sqlcode,
106                                         const char *msg, int errcode);
107 static void chopStringInfoNewlines(StringInfo str);
108 static void appendStringInfoLineSeparator(StringInfo str);
109
110 #ifdef USE_LIBXMLCONTEXT
111
112 static MemoryContext LibxmlContext = NULL;
113
114 static void xml_memory_init(void);
115 static void *xml_palloc(size_t size);
116 static void *xml_repalloc(void *ptr, size_t size);
117 static void xml_pfree(void *ptr);
118 static char *xml_pstrdup(const char *string);
119 #endif   /* USE_LIBXMLCONTEXT */
120
121 static xmlChar *xml_text2xmlChar(text *in);
122 static int parse_xml_decl(const xmlChar *str, size_t *lenp,
123                            xmlChar **version, xmlChar **encoding, int *standalone);
124 static bool print_xml_decl(StringInfo buf, const xmlChar *version,
125                            pg_enc encoding, int standalone);
126 static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
127                   bool preserve_whitespace, int encoding);
128 static text *xml_xmlnodetoxmltype(xmlNodePtr cur);
129 static int      xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
130                                                                    ArrayBuildState **astate);
131 #endif   /* USE_LIBXML */
132
133 static StringInfo query_to_xml_internal(const char *query, char *tablename,
134                                           const char *xmlschema, bool nulls, bool tableforest,
135                                           const char *targetns, bool top_level);
136 static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
137                                                  bool nulls, bool tableforest, const char *targetns);
138 static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
139                                                                   List *relid_list, bool nulls,
140                                                                   bool tableforest, const char *targetns);
141 static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
142                                                                    bool nulls, bool tableforest,
143                                                                    const char *targetns);
144 static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
145 static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
146 static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
147 static void SPI_sql_row_to_xmlelement(int rownum, StringInfo result,
148                                                   char *tablename, bool nulls, bool tableforest,
149                                                   const char *targetns, bool top_level);
150
151 #define NO_XML_SUPPORT() \
152         ereport(ERROR, \
153                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
154                          errmsg("unsupported XML feature"), \
155                          errdetail("This functionality requires the server to be built with libxml support."), \
156                          errhint("You need to rebuild PostgreSQL using --with-libxml.")))
157
158
159 /* from SQL/XML:2008 section 4.9 */
160 #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
161 #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
162 #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
163
164
165 #ifdef USE_LIBXML
166
167 static int
168 xmlChar_to_encoding(const xmlChar *encoding_name)
169 {
170         int                     encoding = pg_char_to_encoding((const char *) encoding_name);
171
172         if (encoding < 0)
173                 ereport(ERROR,
174                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
175                                  errmsg("invalid encoding name \"%s\"",
176                                                 (const char *) encoding_name)));
177         return encoding;
178 }
179 #endif
180
181
182 /*
183  * xml_in uses a plain C string to VARDATA conversion, so for the time being
184  * we use the conversion function for the text datatype.
185  *
186  * This is only acceptable so long as xmltype and text use the same
187  * representation.
188  */
189 Datum
190 xml_in(PG_FUNCTION_ARGS)
191 {
192 #ifdef USE_LIBXML
193         char       *s = PG_GETARG_CSTRING(0);
194         xmltype    *vardata;
195         xmlDocPtr       doc;
196
197         vardata = (xmltype *) cstring_to_text(s);
198
199         /*
200          * Parse the data to check if it is well-formed XML data.  Assume that
201          * ERROR occurred if parsing failed.
202          */
203         doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding());
204         xmlFreeDoc(doc);
205
206         PG_RETURN_XML_P(vardata);
207 #else
208         NO_XML_SUPPORT();
209         return 0;
210 #endif
211 }
212
213
214 #define PG_XML_DEFAULT_VERSION "1.0"
215
216
217 /*
218  * xml_out_internal uses a plain VARDATA to C string conversion, so for the
219  * time being we use the conversion function for the text datatype.
220  *
221  * This is only acceptable so long as xmltype and text use the same
222  * representation.
223  */
224 static char *
225 xml_out_internal(xmltype *x, pg_enc target_encoding)
226 {
227         char       *str = text_to_cstring((text *) x);
228
229 #ifdef USE_LIBXML
230         size_t          len = strlen(str);
231         xmlChar    *version;
232         int                     standalone;
233         int                     res_code;
234
235         if ((res_code = parse_xml_decl((xmlChar *) str,
236                                                                    &len, &version, NULL, &standalone)) == 0)
237         {
238                 StringInfoData buf;
239
240                 initStringInfo(&buf);
241
242                 if (!print_xml_decl(&buf, version, target_encoding, standalone))
243                 {
244                         /*
245                          * If we are not going to produce an XML declaration, eat a single
246                          * newline in the original string to prevent empty first lines in
247                          * the output.
248                          */
249                         if (*(str + len) == '\n')
250                                 len += 1;
251                 }
252                 appendStringInfoString(&buf, str + len);
253
254                 pfree(str);
255
256                 return buf.data;
257         }
258
259         xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
260                                                 "could not parse XML declaration in stored value",
261                                                 res_code);
262 #endif
263         return str;
264 }
265
266
267 Datum
268 xml_out(PG_FUNCTION_ARGS)
269 {
270         xmltype    *x = PG_GETARG_XML_P(0);
271
272         /*
273          * xml_out removes the encoding property in all cases.  This is because we
274          * cannot control from here whether the datum will be converted to a
275          * different client encoding, so we'd do more harm than good by including
276          * it.
277          */
278         PG_RETURN_CSTRING(xml_out_internal(x, 0));
279 }
280
281
282 Datum
283 xml_recv(PG_FUNCTION_ARGS)
284 {
285 #ifdef USE_LIBXML
286         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
287         xmltype    *result;
288         char       *str;
289         char       *newstr;
290         int                     nbytes;
291         xmlDocPtr       doc;
292         xmlChar    *encodingStr = NULL;
293         int                     encoding;
294
295         /*
296          * Read the data in raw format. We don't know yet what the encoding is, as
297          * that information is embedded in the xml declaration; so we have to
298          * parse that before converting to server encoding.
299          */
300         nbytes = buf->len - buf->cursor;
301         str = (char *) pq_getmsgbytes(buf, nbytes);
302
303         /*
304          * We need a null-terminated string to pass to parse_xml_decl().  Rather
305          * than make a separate copy, make the temporary result one byte bigger
306          * than it needs to be.
307          */
308         result = palloc(nbytes + 1 + VARHDRSZ);
309         SET_VARSIZE(result, nbytes + VARHDRSZ);
310         memcpy(VARDATA(result), str, nbytes);
311         str = VARDATA(result);
312         str[nbytes] = '\0';
313
314         parse_xml_decl((xmlChar *) str, NULL, NULL, &encodingStr, NULL);
315
316         /*
317          * If encoding wasn't explicitly specified in the XML header, treat it as
318          * UTF-8, as that's the default in XML. This is different from xml_in(),
319          * where the input has to go through the normal client to server encoding
320          * conversion.
321          */
322         encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
323
324         /*
325          * Parse the data to check if it is well-formed XML data.  Assume that
326          * xml_parse will throw ERROR if not.
327          */
328         doc = xml_parse(result, xmloption, true, encoding);
329         xmlFreeDoc(doc);
330
331         /* Now that we know what we're dealing with, convert to server encoding */
332         newstr = (char *) pg_do_encoding_conversion((unsigned char *) str,
333                                                                                                 nbytes,
334                                                                                                 encoding,
335                                                                                                 GetDatabaseEncoding());
336
337         if (newstr != str)
338         {
339                 pfree(result);
340                 result = (xmltype *) cstring_to_text(newstr);
341                 pfree(newstr);
342         }
343
344         PG_RETURN_XML_P(result);
345 #else
346         NO_XML_SUPPORT();
347         return 0;
348 #endif
349 }
350
351
352 Datum
353 xml_send(PG_FUNCTION_ARGS)
354 {
355         xmltype    *x = PG_GETARG_XML_P(0);
356         char       *outval;
357         StringInfoData buf;
358
359         /*
360          * xml_out_internal doesn't convert the encoding, it just prints the right
361          * declaration. pq_sendtext will do the conversion.
362          */
363         outval = xml_out_internal(x, pg_get_client_encoding());
364
365         pq_begintypsend(&buf);
366         pq_sendtext(&buf, outval, strlen(outval));
367         pfree(outval);
368         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
369 }
370
371
372 #ifdef USE_LIBXML
373 static void
374 appendStringInfoText(StringInfo str, const text *t)
375 {
376         appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
377 }
378 #endif
379
380
381 static xmltype *
382 stringinfo_to_xmltype(StringInfo buf)
383 {
384         return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
385 }
386
387
388 static xmltype *
389 cstring_to_xmltype(const char *string)
390 {
391         return (xmltype *) cstring_to_text(string);
392 }
393
394
395 #ifdef USE_LIBXML
396 static xmltype *
397 xmlBuffer_to_xmltype(xmlBufferPtr buf)
398 {
399         return (xmltype *) cstring_to_text_with_len((char *) xmlBufferContent(buf),
400                                                                                                 xmlBufferLength(buf));
401 }
402 #endif
403
404
405 Datum
406 xmlcomment(PG_FUNCTION_ARGS)
407 {
408 #ifdef USE_LIBXML
409         text       *arg = PG_GETARG_TEXT_P(0);
410         char       *argdata = VARDATA(arg);
411         int                     len = VARSIZE(arg) - VARHDRSZ;
412         StringInfoData buf;
413         int                     i;
414
415         /* check for "--" in string or "-" at the end */
416         for (i = 1; i < len; i++)
417         {
418                 if (argdata[i] == '-' && argdata[i - 1] == '-')
419                         ereport(ERROR,
420                                         (errcode(ERRCODE_INVALID_XML_COMMENT),
421                                          errmsg("invalid XML comment")));
422         }
423         if (len > 0 && argdata[len - 1] == '-')
424                 ereport(ERROR,
425                                 (errcode(ERRCODE_INVALID_XML_COMMENT),
426                                  errmsg("invalid XML comment")));
427
428         initStringInfo(&buf);
429         appendStringInfo(&buf, "<!--");
430         appendStringInfoText(&buf, arg);
431         appendStringInfo(&buf, "-->");
432
433         PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
434 #else
435         NO_XML_SUPPORT();
436         return 0;
437 #endif
438 }
439
440
441
442 /*
443  * TODO: xmlconcat needs to merge the notations and unparsed entities
444  * of the argument values.      Not very important in practice, though.
445  */
446 xmltype *
447 xmlconcat(List *args)
448 {
449 #ifdef USE_LIBXML
450         int                     global_standalone = 1;
451         xmlChar    *global_version = NULL;
452         bool            global_version_no_value = false;
453         StringInfoData buf;
454         ListCell   *v;
455
456         initStringInfo(&buf);
457         foreach(v, args)
458         {
459                 xmltype    *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
460                 size_t          len;
461                 xmlChar    *version;
462                 int                     standalone;
463                 char       *str;
464
465                 len = VARSIZE(x) - VARHDRSZ;
466                 str = text_to_cstring((text *) x);
467
468                 parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
469
470                 if (standalone == 0 && global_standalone == 1)
471                         global_standalone = 0;
472                 if (standalone < 0)
473                         global_standalone = -1;
474
475                 if (!version)
476                         global_version_no_value = true;
477                 else if (!global_version)
478                         global_version = version;
479                 else if (xmlStrcmp(version, global_version) != 0)
480                         global_version_no_value = true;
481
482                 appendStringInfoString(&buf, str + len);
483                 pfree(str);
484         }
485
486         if (!global_version_no_value || global_standalone >= 0)
487         {
488                 StringInfoData buf2;
489
490                 initStringInfo(&buf2);
491
492                 print_xml_decl(&buf2,
493                                            (!global_version_no_value) ? global_version : NULL,
494                                            0,
495                                            global_standalone);
496
497                 appendStringInfoString(&buf2, buf.data);
498                 buf = buf2;
499         }
500
501         return stringinfo_to_xmltype(&buf);
502 #else
503         NO_XML_SUPPORT();
504         return NULL;
505 #endif
506 }
507
508
509 /*
510  * XMLAGG support
511  */
512 Datum
513 xmlconcat2(PG_FUNCTION_ARGS)
514 {
515         if (PG_ARGISNULL(0))
516         {
517                 if (PG_ARGISNULL(1))
518                         PG_RETURN_NULL();
519                 else
520                         PG_RETURN_XML_P(PG_GETARG_XML_P(1));
521         }
522         else if (PG_ARGISNULL(1))
523                 PG_RETURN_XML_P(PG_GETARG_XML_P(0));
524         else
525                 PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
526                                                                                          PG_GETARG_XML_P(1))));
527 }
528
529
530 Datum
531 texttoxml(PG_FUNCTION_ARGS)
532 {
533         text       *data = PG_GETARG_TEXT_P(0);
534
535         PG_RETURN_XML_P(xmlparse(data, xmloption, true));
536 }
537
538
539 Datum
540 xmltotext(PG_FUNCTION_ARGS)
541 {
542         xmltype    *data = PG_GETARG_XML_P(0);
543
544         /* It's actually binary compatible. */
545         PG_RETURN_TEXT_P((text *) data);
546 }
547
548
549 text *
550 xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg)
551 {
552         if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data))
553                 ereport(ERROR,
554                                 (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
555                                  errmsg("not an XML document")));
556
557         /* It's actually binary compatible, save for the above check. */
558         return (text *) data;
559 }
560
561
562 xmltype *
563 xmlelement(XmlExprState *xmlExpr, ExprContext *econtext)
564 {
565 #ifdef USE_LIBXML
566         XmlExpr    *xexpr = (XmlExpr *) xmlExpr->xprstate.expr;
567         xmltype    *result;
568         List       *named_arg_strings;
569         List       *arg_strings;
570         int                     i;
571         ListCell   *arg;
572         ListCell   *narg;
573         PgXmlErrorContext *xmlerrcxt;
574         volatile xmlBufferPtr buf = NULL;
575         volatile xmlTextWriterPtr writer = NULL;
576
577         /*
578          * We first evaluate all the arguments, then start up libxml and create
579          * the result.  This avoids issues if one of the arguments involves a call
580          * to some other function or subsystem that wants to use libxml on its own
581          * terms.
582          */
583         named_arg_strings = NIL;
584         i = 0;
585         foreach(arg, xmlExpr->named_args)
586         {
587                 ExprState  *e = (ExprState *) lfirst(arg);
588                 Datum           value;
589                 bool            isnull;
590                 char       *str;
591
592                 value = ExecEvalExpr(e, econtext, &isnull, NULL);
593                 if (isnull)
594                         str = NULL;
595                 else
596                         str = map_sql_value_to_xml_value(value, exprType((Node *) e->expr), false);
597                 named_arg_strings = lappend(named_arg_strings, str);
598                 i++;
599         }
600
601         arg_strings = NIL;
602         foreach(arg, xmlExpr->args)
603         {
604                 ExprState  *e = (ExprState *) lfirst(arg);
605                 Datum           value;
606                 bool            isnull;
607                 char       *str;
608
609                 value = ExecEvalExpr(e, econtext, &isnull, NULL);
610                 /* here we can just forget NULL elements immediately */
611                 if (!isnull)
612                 {
613                         str = map_sql_value_to_xml_value(value,
614                                                                                    exprType((Node *) e->expr), true);
615                         arg_strings = lappend(arg_strings, str);
616                 }
617         }
618
619         /* now safe to run libxml */
620         xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
621
622         PG_TRY();
623         {
624                 buf = xmlBufferCreate();
625                 if (buf == NULL || xmlerrcxt->err_occurred)
626                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
627                                                 "could not allocate xmlBuffer");
628                 writer = xmlNewTextWriterMemory(buf, 0);
629                 if (writer == NULL || xmlerrcxt->err_occurred)
630                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
631                                                 "could not allocate xmlTextWriter");
632
633                 xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
634
635                 forboth(arg, named_arg_strings, narg, xexpr->arg_names)
636                 {
637                         char       *str = (char *) lfirst(arg);
638                         char       *argname = strVal(lfirst(narg));
639
640                         if (str)
641                                 xmlTextWriterWriteAttribute(writer,
642                                                                                         (xmlChar *) argname,
643                                                                                         (xmlChar *) str);
644                 }
645
646                 foreach(arg, arg_strings)
647                 {
648                         char       *str = (char *) lfirst(arg);
649
650                         xmlTextWriterWriteRaw(writer, (xmlChar *) str);
651                 }
652
653                 xmlTextWriterEndElement(writer);
654
655                 /* we MUST do this now to flush data out to the buffer ... */
656                 xmlFreeTextWriter(writer);
657                 writer = NULL;
658
659                 result = xmlBuffer_to_xmltype(buf);
660         }
661         PG_CATCH();
662         {
663                 if (writer)
664                         xmlFreeTextWriter(writer);
665                 if (buf)
666                         xmlBufferFree(buf);
667
668                 pg_xml_done(xmlerrcxt, true);
669
670                 PG_RE_THROW();
671         }
672         PG_END_TRY();
673
674         xmlBufferFree(buf);
675
676         pg_xml_done(xmlerrcxt, false);
677
678         return result;
679 #else
680         NO_XML_SUPPORT();
681         return NULL;
682 #endif
683 }
684
685
686 xmltype *
687 xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
688 {
689 #ifdef USE_LIBXML
690         xmlDocPtr       doc;
691
692         doc = xml_parse(data, xmloption_arg, preserve_whitespace,
693                                         GetDatabaseEncoding());
694         xmlFreeDoc(doc);
695
696         return (xmltype *) data;
697 #else
698         NO_XML_SUPPORT();
699         return NULL;
700 #endif
701 }
702
703
704 xmltype *
705 xmlpi(char *target, text *arg, bool arg_is_null, bool *result_is_null)
706 {
707 #ifdef USE_LIBXML
708         xmltype    *result;
709         StringInfoData buf;
710
711         if (pg_strcasecmp(target, "xml") == 0)
712                 ereport(ERROR,
713                                 (errcode(ERRCODE_SYNTAX_ERROR), /* really */
714                                  errmsg("invalid XML processing instruction"),
715                                  errdetail("XML processing instruction target name cannot be \"%s\".", target)));
716
717         /*
718          * Following the SQL standard, the null check comes after the syntax check
719          * above.
720          */
721         *result_is_null = arg_is_null;
722         if (*result_is_null)
723                 return NULL;
724
725         initStringInfo(&buf);
726
727         appendStringInfo(&buf, "<?%s", target);
728
729         if (arg != NULL)
730         {
731                 char       *string;
732
733                 string = text_to_cstring(arg);
734                 if (strstr(string, "?>") != NULL)
735                         ereport(ERROR,
736                                         (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
737                                          errmsg("invalid XML processing instruction"),
738                         errdetail("XML processing instruction cannot contain \"?>\".")));
739
740                 appendStringInfoChar(&buf, ' ');
741                 appendStringInfoString(&buf, string + strspn(string, " "));
742                 pfree(string);
743         }
744         appendStringInfoString(&buf, "?>");
745
746         result = stringinfo_to_xmltype(&buf);
747         pfree(buf.data);
748         return result;
749 #else
750         NO_XML_SUPPORT();
751         return NULL;
752 #endif
753 }
754
755
756 xmltype *
757 xmlroot(xmltype *data, text *version, int standalone)
758 {
759 #ifdef USE_LIBXML
760         char       *str;
761         size_t          len;
762         xmlChar    *orig_version;
763         int                     orig_standalone;
764         StringInfoData buf;
765
766         len = VARSIZE(data) - VARHDRSZ;
767         str = text_to_cstring((text *) data);
768
769         parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
770
771         if (version)
772                 orig_version = xml_text2xmlChar(version);
773         else
774                 orig_version = NULL;
775
776         switch (standalone)
777         {
778                 case XML_STANDALONE_YES:
779                         orig_standalone = 1;
780                         break;
781                 case XML_STANDALONE_NO:
782                         orig_standalone = 0;
783                         break;
784                 case XML_STANDALONE_NO_VALUE:
785                         orig_standalone = -1;
786                         break;
787                 case XML_STANDALONE_OMITTED:
788                         /* leave original value */
789                         break;
790         }
791
792         initStringInfo(&buf);
793         print_xml_decl(&buf, orig_version, 0, orig_standalone);
794         appendStringInfoString(&buf, str + len);
795
796         return stringinfo_to_xmltype(&buf);
797 #else
798         NO_XML_SUPPORT();
799         return NULL;
800 #endif
801 }
802
803
804 /*
805  * Validate document (given as string) against DTD (given as external link)
806  *
807  * This has been removed because it is a security hole: unprivileged users
808  * should not be able to use Postgres to fetch arbitrary external files,
809  * which unfortunately is exactly what libxml is willing to do with the DTD
810  * parameter.
811  */
812 Datum
813 xmlvalidate(PG_FUNCTION_ARGS)
814 {
815         ereport(ERROR,
816                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
817                          errmsg("xmlvalidate is not implemented")));
818         return 0;
819 }
820
821
822 bool
823 xml_is_document(xmltype *arg)
824 {
825 #ifdef USE_LIBXML
826         bool            result;
827         volatile xmlDocPtr doc = NULL;
828         MemoryContext ccxt = CurrentMemoryContext;
829
830         /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
831         PG_TRY();
832         {
833                 doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
834                                                 GetDatabaseEncoding());
835                 result = true;
836         }
837         PG_CATCH();
838         {
839                 ErrorData  *errdata;
840                 MemoryContext ecxt;
841
842                 ecxt = MemoryContextSwitchTo(ccxt);
843                 errdata = CopyErrorData();
844                 if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT)
845                 {
846                         FlushErrorState();
847                         result = false;
848                 }
849                 else
850                 {
851                         MemoryContextSwitchTo(ecxt);
852                         PG_RE_THROW();
853                 }
854         }
855         PG_END_TRY();
856
857         if (doc)
858                 xmlFreeDoc(doc);
859
860         return result;
861 #else                                                   /* not USE_LIBXML */
862         NO_XML_SUPPORT();
863         return false;
864 #endif   /* not USE_LIBXML */
865 }
866
867
868 #ifdef USE_LIBXML
869
870 /*
871  * pg_xml_init_library --- set up for use of libxml
872  *
873  * This should be called by each function that is about to use libxml
874  * facilities but doesn't require error handling.  It initializes libxml
875  * and verifies compatibility with the loaded libxml version.  These are
876  * once-per-session activities.
877  *
878  * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
879  * check)
880  */
881 void
882 pg_xml_init_library(void)
883 {
884         static bool first_time = true;
885
886         if (first_time)
887         {
888                 /* Stuff we need do only once per session */
889
890                 /*
891                  * Currently, we have no pure UTF-8 support for internals -- check if
892                  * we can work.
893                  */
894                 if (sizeof(char) != sizeof(xmlChar))
895                         ereport(ERROR,
896                                         (errmsg("could not initialize XML library"),
897                                          errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
898                                                            (int) sizeof(char), (int) sizeof(xmlChar))));
899
900 #ifdef USE_LIBXMLCONTEXT
901                 /* Set up libxml's memory allocation our way */
902                 xml_memory_init();
903 #endif
904
905                 /* Check library compatibility */
906                 LIBXML_TEST_VERSION;
907
908                 first_time = false;
909         }
910 }
911
912 /*
913  * pg_xml_init --- set up for use of libxml and register an error handler
914  *
915  * This should be called by each function that is about to use libxml
916  * facilities and requires error handling.  It initializes libxml with
917  * pg_xml_init_library() and establishes our libxml error handler.
918  *
919  * strictness determines which errors are reported and which are ignored.
920  *
921  * Calls to this function MUST be followed by a PG_TRY block that guarantees
922  * that pg_xml_done() is called during either normal or error exit.
923  *
924  * This is exported for use by contrib/xml2, as well as other code that might
925  * wish to share use of this module's libxml error handler.
926  */
927 PgXmlErrorContext *
928 pg_xml_init(PgXmlStrictness strictness)
929 {
930         PgXmlErrorContext *errcxt;
931         void       *new_errcxt;
932
933         /* Do one-time setup if needed */
934         pg_xml_init_library();
935
936         /* Create error handling context structure */
937         errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
938         errcxt->magic = ERRCXT_MAGIC;
939         errcxt->strictness = strictness;
940         errcxt->err_occurred = false;
941         initStringInfo(&errcxt->err_buf);
942
943         /*
944          * Save original error handler and install ours. libxml originally didn't
945          * distinguish between the contexts for generic and for structured error
946          * handlers.  If we're using an old libxml version, we must thus save
947          * the generic error context, even though we're using a structured
948          * error handler.
949          */
950         errcxt->saved_errfunc = xmlStructuredError;
951
952 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
953         errcxt->saved_errcxt = xmlStructuredErrorContext;
954 #else
955         errcxt->saved_errcxt = xmlGenericErrorContext;
956 #endif
957
958         xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler);
959
960         /*
961          * Verify that xmlSetStructuredErrorFunc set the context variable we
962          * expected it to.  If not, the error context pointer we just saved is not
963          * the correct thing to restore, and since that leaves us without a way to
964          * restore the context in pg_xml_done, we must fail.
965          *
966          * The only known situation in which this test fails is if we compile with
967          * headers from a libxml2 that doesn't track the structured error context
968          * separately (<= 2.7.3), but at runtime use a version that does, or vice
969          * versa.  The libxml2 authors did not treat that change as constituting
970          * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
971          * fails to protect us from this.
972          */
973
974 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
975         new_errcxt = xmlStructuredErrorContext;
976 #else
977         new_errcxt = xmlGenericErrorContext;
978 #endif
979
980         if (new_errcxt != (void *) errcxt)
981                 ereport(ERROR,
982                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
983                                  errmsg("could not set up XML error handler"),
984                                  errhint("This probably indicates that the version of libxml2"
985                                                  " being used is not compatible with the libxml2"
986                                                  " header files that PostgreSQL was built with.")));
987
988         return errcxt;
989 }
990
991
992 /*
993  * pg_xml_done --- restore previous libxml error handling
994  *
995  * Resets libxml's global error-handling state to what it was before
996  * pg_xml_init() was called.
997  *
998  * This routine verifies that all pending errors have been dealt with
999  * (in assert-enabled builds, anyway).
1000  */
1001 void
1002 pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1003 {
1004         void       *cur_errcxt;
1005
1006         /* An assert seems like enough protection here */
1007         Assert(errcxt->magic == ERRCXT_MAGIC);
1008
1009         /*
1010          * In a normal exit, there should be no un-handled libxml errors.  But we
1011          * shouldn't try to enforce this during error recovery, since the longjmp
1012          * could have been thrown before xml_ereport had a chance to run.
1013          */
1014         Assert(!errcxt->err_occurred || isError);
1015
1016         /*
1017          * Check that libxml's global state is correct, warn if not.  This is
1018          * a real test and not an Assert because it has a higher probability
1019          * of happening.
1020          */
1021 #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1022         cur_errcxt = xmlStructuredErrorContext;
1023 #else
1024         cur_errcxt = xmlGenericErrorContext;
1025 #endif
1026
1027         if (cur_errcxt != (void *) errcxt)
1028                 elog(WARNING, "libxml error handling state is out of sync with xml.c");
1029
1030         /* Restore the saved handler */
1031         xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1032
1033         /*
1034          * Mark the struct as invalid, just in case somebody somehow manages to
1035          * call xml_errorHandler or xml_ereport with it.
1036          */
1037         errcxt->magic = 0;
1038
1039         /* Release memory */
1040         pfree(errcxt->err_buf.data);
1041         pfree(errcxt);
1042 }
1043
1044
1045 /*
1046  * pg_xml_error_occurred() --- test the error flag
1047  */
1048 bool
1049 pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1050 {
1051         return errcxt->err_occurred;
1052 }
1053
1054
1055 /*
1056  * SQL/XML allows storing "XML documents" or "XML content".  "XML
1057  * documents" are specified by the XML specification and are parsed
1058  * easily by libxml.  "XML content" is specified by SQL/XML as the
1059  * production "XMLDecl? content".  But libxml can only parse the
1060  * "content" part, so we have to parse the XML declaration ourselves
1061  * to complete this.
1062  */
1063
1064 #define CHECK_XML_SPACE(p) \
1065         do { \
1066                 if (!xmlIsBlank_ch(*(p))) \
1067                         return XML_ERR_SPACE_REQUIRED; \
1068         } while (0)
1069
1070 #define SKIP_XML_SPACE(p) \
1071         while (xmlIsBlank_ch(*(p))) (p)++
1072
1073 /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1074 /* Beware of multiple evaluations of argument! */
1075 #define PG_XMLISNAMECHAR(c) \
1076         (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1077                         || xmlIsDigit_ch(c) \
1078                         || c == '.' || c == '-' || c == '_' || c == ':' \
1079                         || xmlIsCombiningQ(c) \
1080                         || xmlIsExtender_ch(c))
1081
1082 /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1083 static xmlChar *
1084 xml_pnstrdup(const xmlChar *str, size_t len)
1085 {
1086         xmlChar    *result;
1087
1088         result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1089         memcpy(result, str, len * sizeof(xmlChar));
1090         result[len] = 0;
1091         return result;
1092 }
1093
1094 /*
1095  * str is the null-terminated input string.  Remaining arguments are
1096  * output arguments; each can be NULL if value is not wanted.
1097  * version and encoding are returned as locally-palloc'd strings.
1098  * Result is 0 if OK, an error code if not.
1099  */
1100 static int
1101 parse_xml_decl(const xmlChar *str, size_t *lenp,
1102                            xmlChar **version, xmlChar **encoding, int *standalone)
1103 {
1104         const xmlChar *p;
1105         const xmlChar *save_p;
1106         size_t          len;
1107         int                     utf8char;
1108         int                     utf8len;
1109
1110         /*
1111          * Only initialize libxml.  We don't need error handling here, but we do
1112          * need to make sure libxml is initialized before calling any of its
1113          * functions.  Note that this is safe (and a no-op) if caller has already
1114          * done pg_xml_init().
1115          */
1116         pg_xml_init_library();
1117
1118         /* Initialize output arguments to "not present" */
1119         if (version)
1120                 *version = NULL;
1121         if (encoding)
1122                 *encoding = NULL;
1123         if (standalone)
1124                 *standalone = -1;
1125
1126         p = str;
1127
1128         if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1129                 goto finished;
1130
1131         /* if next char is name char, it's a PI like <?xml-stylesheet ...?> */
1132         utf8len = strlen((const char *) (p + 5));
1133         utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1134         if (PG_XMLISNAMECHAR(utf8char))
1135                 goto finished;
1136
1137         p += 5;
1138
1139         /* version */
1140         CHECK_XML_SPACE(p);
1141         SKIP_XML_SPACE(p);
1142         if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1143                 return XML_ERR_VERSION_MISSING;
1144         p += 7;
1145         SKIP_XML_SPACE(p);
1146         if (*p != '=')
1147                 return XML_ERR_VERSION_MISSING;
1148         p += 1;
1149         SKIP_XML_SPACE(p);
1150
1151         if (*p == '\'' || *p == '"')
1152         {
1153                 const xmlChar *q;
1154
1155                 q = xmlStrchr(p + 1, *p);
1156                 if (!q)
1157                         return XML_ERR_VERSION_MISSING;
1158
1159                 if (version)
1160                         *version = xml_pnstrdup(p + 1, q - p - 1);
1161                 p = q + 1;
1162         }
1163         else
1164                 return XML_ERR_VERSION_MISSING;
1165
1166         /* encoding */
1167         save_p = p;
1168         SKIP_XML_SPACE(p);
1169         if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1170         {
1171                 CHECK_XML_SPACE(save_p);
1172                 p += 8;
1173                 SKIP_XML_SPACE(p);
1174                 if (*p != '=')
1175                         return XML_ERR_MISSING_ENCODING;
1176                 p += 1;
1177                 SKIP_XML_SPACE(p);
1178
1179                 if (*p == '\'' || *p == '"')
1180                 {
1181                         const xmlChar *q;
1182
1183                         q = xmlStrchr(p + 1, *p);
1184                         if (!q)
1185                                 return XML_ERR_MISSING_ENCODING;
1186
1187                         if (encoding)
1188                                 *encoding = xml_pnstrdup(p + 1, q - p - 1);
1189                         p = q + 1;
1190                 }
1191                 else
1192                         return XML_ERR_MISSING_ENCODING;
1193         }
1194         else
1195         {
1196                 p = save_p;
1197         }
1198
1199         /* standalone */
1200         save_p = p;
1201         SKIP_XML_SPACE(p);
1202         if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1203         {
1204                 CHECK_XML_SPACE(save_p);
1205                 p += 10;
1206                 SKIP_XML_SPACE(p);
1207                 if (*p != '=')
1208                         return XML_ERR_STANDALONE_VALUE;
1209                 p += 1;
1210                 SKIP_XML_SPACE(p);
1211                 if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1212                         xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1213                 {
1214                         if (standalone)
1215                                 *standalone = 1;
1216                         p += 5;
1217                 }
1218                 else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1219                                  xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1220                 {
1221                         if (standalone)
1222                                 *standalone = 0;
1223                         p += 4;
1224                 }
1225                 else
1226                         return XML_ERR_STANDALONE_VALUE;
1227         }
1228         else
1229         {
1230                 p = save_p;
1231         }
1232
1233         SKIP_XML_SPACE(p);
1234         if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1235                 return XML_ERR_XMLDECL_NOT_FINISHED;
1236         p += 2;
1237
1238 finished:
1239         len = p - str;
1240
1241         for (p = str; p < str + len; p++)
1242                 if (*p > 127)
1243                         return XML_ERR_INVALID_CHAR;
1244
1245         if (lenp)
1246                 *lenp = len;
1247
1248         return XML_ERR_OK;
1249 }
1250
1251
1252 /*
1253  * Write an XML declaration.  On output, we adjust the XML declaration
1254  * as follows.  (These rules are the moral equivalent of the clause
1255  * "Serialization of an XML value" in the SQL standard.)
1256  *
1257  * We try to avoid generating an XML declaration if possible.  This is
1258  * so that you don't get trivial things like xml '<foo/>' resulting in
1259  * '<?xml version="1.0"?><foo/>', which would surely be annoying.  We
1260  * must provide a declaration if the standalone property is specified
1261  * or if we include an encoding declaration.  If we have a
1262  * declaration, we must specify a version (XML requires this).
1263  * Otherwise we only make a declaration if the version is not "1.0",
1264  * which is the default version specified in SQL:2003.
1265  */
1266 static bool
1267 print_xml_decl(StringInfo buf, const xmlChar *version,
1268                            pg_enc encoding, int standalone)
1269 {
1270         if ((version && strcmp((char *) version, PG_XML_DEFAULT_VERSION) != 0)
1271                 || (encoding && encoding != PG_UTF8)
1272                 || standalone != -1)
1273         {
1274                 appendStringInfoString(buf, "<?xml");
1275
1276                 if (version)
1277                         appendStringInfo(buf, " version=\"%s\"", version);
1278                 else
1279                         appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1280
1281                 if (encoding && encoding != PG_UTF8)
1282                 {
1283                         /*
1284                          * XXX might be useful to convert this to IANA names (ISO-8859-1
1285                          * instead of LATIN1 etc.); needs field experience
1286                          */
1287                         appendStringInfo(buf, " encoding=\"%s\"",
1288                                                          pg_encoding_to_char(encoding));
1289                 }
1290
1291                 if (standalone == 1)
1292                         appendStringInfoString(buf, " standalone=\"yes\"");
1293                 else if (standalone == 0)
1294                         appendStringInfoString(buf, " standalone=\"no\"");
1295                 appendStringInfoString(buf, "?>");
1296
1297                 return true;
1298         }
1299         else
1300                 return false;
1301 }
1302
1303
1304 /*
1305  * Convert a C string to XML internal representation
1306  *
1307  * Note: it is caller's responsibility to xmlFreeDoc() the result,
1308  * else a permanent memory leak will ensue!
1309  *
1310  * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1311  * yet do not use SAX - see xmlreader.c)
1312  */
1313 static xmlDocPtr
1314 xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1315                   int encoding)
1316 {
1317         int32           len;
1318         xmlChar    *string;
1319         xmlChar    *utf8string;
1320         PgXmlErrorContext *xmlerrcxt;
1321         volatile xmlParserCtxtPtr ctxt = NULL;
1322         volatile xmlDocPtr doc = NULL;
1323
1324         len = VARSIZE(data) - VARHDRSZ;         /* will be useful later */
1325         string = xml_text2xmlChar(data);
1326
1327         utf8string = pg_do_encoding_conversion(string,
1328                                                                                    len,
1329                                                                                    encoding,
1330                                                                                    PG_UTF8);
1331
1332         /* Start up libxml and its parser */
1333         xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1334
1335         /* Use a TRY block to ensure we clean up correctly */
1336         PG_TRY();
1337         {
1338                 xmlInitParser();
1339
1340                 ctxt = xmlNewParserCtxt();
1341                 if (ctxt == NULL || xmlerrcxt->err_occurred)
1342                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1343                                                 "could not allocate parser context");
1344
1345                 if (xmloption_arg == XMLOPTION_DOCUMENT)
1346                 {
1347                         /*
1348                          * Note, that here we try to apply DTD defaults
1349                          * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
1350                          * 'Default values defined by internal DTD are applied'. As for
1351                          * external DTDs, we try to support them too, (see SQL/XML:2008 GR
1352                          * 10.16.7.e)
1353                          */
1354                         doc = xmlCtxtReadDoc(ctxt, utf8string,
1355                                                                  NULL,
1356                                                                  "UTF-8",
1357                                                                  XML_PARSE_NOENT | XML_PARSE_DTDATTR
1358                                                    | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
1359                         if (doc == NULL || xmlerrcxt->err_occurred)
1360                                 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
1361                                                         "invalid XML document");
1362                 }
1363                 else
1364                 {
1365                         int                     res_code;
1366                         size_t          count;
1367                         xmlChar    *version;
1368                         int                     standalone;
1369
1370                         res_code = parse_xml_decl(utf8string,
1371                                                                           &count, &version, NULL, &standalone);
1372                         if (res_code != 0)
1373                                 xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
1374                                                           "invalid XML content: invalid XML declaration",
1375                                                                         res_code);
1376
1377                         doc = xmlNewDoc(version);
1378                         Assert(doc->encoding == NULL);
1379                         doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1380                         doc->standalone = standalone;
1381
1382                         res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1383                                                                                                    utf8string + count, NULL);
1384                         if (res_code != 0 || xmlerrcxt->err_occurred)
1385                                 xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1386                                                         "invalid XML content");
1387                 }
1388         }
1389         PG_CATCH();
1390         {
1391                 if (doc != NULL)
1392                         xmlFreeDoc(doc);
1393                 if (ctxt != NULL)
1394                         xmlFreeParserCtxt(ctxt);
1395
1396                 pg_xml_done(xmlerrcxt, true);
1397
1398                 PG_RE_THROW();
1399         }
1400         PG_END_TRY();
1401
1402         xmlFreeParserCtxt(ctxt);
1403
1404         pg_xml_done(xmlerrcxt, false);
1405
1406         return doc;
1407 }
1408
1409
1410 /*
1411  * xmlChar<->text conversions
1412  */
1413 static xmlChar *
1414 xml_text2xmlChar(text *in)
1415 {
1416         return (xmlChar *) text_to_cstring(in);
1417 }
1418
1419
1420 #ifdef USE_LIBXMLCONTEXT
1421
1422 /*
1423  * Manage the special context used for all libxml allocations (but only
1424  * in special debug builds; see notes at top of file)
1425  */
1426 static void
1427 xml_memory_init(void)
1428 {
1429         /* Create memory context if not there already */
1430         if (LibxmlContext == NULL)
1431                 LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1432                                                                                           "LibxmlContext",
1433                                                                                           ALLOCSET_DEFAULT_MINSIZE,
1434                                                                                           ALLOCSET_DEFAULT_INITSIZE,
1435                                                                                           ALLOCSET_DEFAULT_MAXSIZE);
1436
1437         /* Re-establish the callbacks even if already set */
1438         xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1439 }
1440
1441 /*
1442  * Wrappers for memory management functions
1443  */
1444 static void *
1445 xml_palloc(size_t size)
1446 {
1447         return MemoryContextAlloc(LibxmlContext, size);
1448 }
1449
1450
1451 static void *
1452 xml_repalloc(void *ptr, size_t size)
1453 {
1454         return repalloc(ptr, size);
1455 }
1456
1457
1458 static void
1459 xml_pfree(void *ptr)
1460 {
1461         /* At least some parts of libxml assume xmlFree(NULL) is allowed */
1462         if (ptr)
1463                 pfree(ptr);
1464 }
1465
1466
1467 static char *
1468 xml_pstrdup(const char *string)
1469 {
1470         return MemoryContextStrdup(LibxmlContext, string);
1471 }
1472 #endif   /* USE_LIBXMLCONTEXT */
1473
1474
1475 /*
1476  * xml_ereport --- report an XML-related error
1477  *
1478  * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
1479  * standard.  This function adds libxml's native error message, if any, as
1480  * detail.
1481  *
1482  * This is exported for modules that want to share the core libxml error
1483  * handler.  Note that pg_xml_init() *must* have been called previously.
1484  */
1485 void
1486 xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
1487 {
1488         char       *detail;
1489
1490         /* Defend against someone passing us a bogus context struct */
1491         if (errcxt->magic != ERRCXT_MAGIC)
1492                 elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
1493
1494         /* Flag that the current libxml error has been reported */
1495         errcxt->err_occurred = false;
1496
1497         /* Include detail only if we have some text from libxml */
1498         if (errcxt->err_buf.len > 0)
1499                 detail = errcxt->err_buf.data;
1500         else
1501                 detail = NULL;
1502
1503         ereport(level,
1504                         (errcode(sqlcode),
1505                          errmsg_internal("%s", msg),
1506                          detail ? errdetail_internal("%s", detail) : 0));
1507 }
1508
1509
1510 /*
1511  * Error handler for libxml errors and warnings
1512  */
1513 static void
1514 xml_errorHandler(void *data, xmlErrorPtr error)
1515 {
1516         PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
1517         xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
1518         xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
1519         xmlNodePtr node = error->node;
1520         const xmlChar *name = (node != NULL &&
1521                                                    node->type == XML_ELEMENT_NODE) ? node->name : NULL;
1522         int                     domain = error->domain;
1523         int                     level = error->level;
1524         StringInfo      errorBuf;
1525
1526         /*
1527          * Defend against someone passing us a bogus context struct.
1528          *
1529          * We force a backend exit if this check fails because longjmp'ing out of
1530          * libxml would likely render it unsafe to use further.
1531          */
1532         if (xmlerrcxt->magic != ERRCXT_MAGIC)
1533                 elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
1534
1535         /*----------
1536          * Older libxml versions report some errors differently.
1537          * First, some errors were previously reported as coming from the parser
1538          * domain but are now reported as coming from the namespace domain.
1539          * Second, some warnings were upgraded to errors.
1540          * We attempt to compensate for that here.
1541          *----------
1542          */
1543         switch (error->code)
1544         {
1545                 case XML_WAR_NS_URI:
1546                         level = XML_ERR_ERROR;
1547                         domain = XML_FROM_NAMESPACE;
1548                         break;
1549
1550                 case XML_ERR_NS_DECL_ERROR:
1551                 case XML_WAR_NS_URI_RELATIVE:
1552                 case XML_WAR_NS_COLUMN:
1553                 case XML_NS_ERR_XML_NAMESPACE:
1554                 case XML_NS_ERR_UNDEFINED_NAMESPACE:
1555                 case XML_NS_ERR_QNAME:
1556                 case XML_NS_ERR_ATTRIBUTE_REDEFINED:
1557                 case XML_NS_ERR_EMPTY:
1558                         domain = XML_FROM_NAMESPACE;
1559                         break;
1560         }
1561
1562         /* Decide whether to act on the error or not */
1563         switch (domain)
1564         {
1565                 case XML_FROM_PARSER:
1566                 case XML_FROM_NONE:
1567                 case XML_FROM_MEMORY:
1568                 case XML_FROM_IO:
1569                         /* Accept error regardless of the parsing purpose */
1570                         break;
1571
1572                 default:
1573                         /* Ignore error if only doing well-formedness check */
1574                         if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
1575                                 return;
1576                         break;
1577         }
1578
1579         /* Prepare error message in errorBuf */
1580         errorBuf = makeStringInfo();
1581
1582         if (error->line > 0)
1583                 appendStringInfo(errorBuf, "line %d: ", error->line);
1584         if (name != NULL)
1585                 appendStringInfo(errorBuf, "element %s: ", name);
1586         appendStringInfoString(errorBuf, error->message);
1587
1588         /*
1589          * Append context information to errorBuf.
1590          *
1591          * xmlParserPrintFileContext() uses libxml's "generic" error handler to
1592          * write the context.  Since we don't want to duplicate libxml
1593          * functionality here, we set up a generic error handler temporarily.
1594          *
1595          * We use appendStringInfo() directly as libxml's generic error handler.
1596          * This should work because it has essentially the same signature as
1597          * libxml expects, namely (void *ptr, const char *msg, ...).
1598          */
1599         if (input != NULL)
1600         {
1601                 xmlGenericErrorFunc errFuncSaved = xmlGenericError;
1602                 void   *errCtxSaved = xmlGenericErrorContext;
1603
1604                 xmlSetGenericErrorFunc((void *) errorBuf,
1605                                                            (xmlGenericErrorFunc) appendStringInfo);
1606
1607                 /* Add context information to errorBuf */
1608                 appendStringInfoLineSeparator(errorBuf);
1609
1610                 xmlParserPrintFileContext(input);
1611
1612                 /* Restore generic error func */
1613                 xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
1614         }
1615
1616         /* Get rid of any trailing newlines in errorBuf */
1617         chopStringInfoNewlines(errorBuf);
1618
1619         /*
1620          * Legacy error handling mode.  err_occurred is never set, we just add the
1621          * message to err_buf.  This mode exists because the xml2 contrib module
1622          * uses our error-handling infrastructure, but we don't want to change its
1623          * behaviour since it's deprecated anyway.  This is also why we don't
1624          * distinguish between notices, warnings and errors here --- the old-style
1625          * generic error handler wouldn't have done that either.
1626          */
1627         if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
1628         {
1629                 appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1630                 appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
1631
1632                 pfree(errorBuf->data);
1633                 pfree(errorBuf);
1634                 return;
1635         }
1636
1637         /*
1638          * We don't want to ereport() here because that'd probably leave libxml in
1639          * an inconsistent state.  Instead, we remember the error and ereport()
1640          * from xml_ereport().
1641          *
1642          * Warnings and notices can be reported immediately since they won't cause
1643          * a longjmp() out of libxml.
1644          */
1645         if (level >= XML_ERR_ERROR)
1646         {
1647                 appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1648                 appendStringInfoString(&xmlerrcxt->err_buf, errorBuf->data);
1649
1650                 xmlerrcxt->err_occurred = true;
1651         }
1652         else if (level >= XML_ERR_WARNING)
1653         {
1654                 ereport(WARNING,
1655                                 (errmsg_internal("%s", errorBuf->data)));
1656         }
1657         else
1658         {
1659                 ereport(NOTICE,
1660                                 (errmsg_internal("%s", errorBuf->data)));
1661         }
1662
1663         pfree(errorBuf->data);
1664         pfree(errorBuf);
1665 }
1666
1667
1668 /*
1669  * Wrapper for "ereport" function for XML-related errors.  The "msg"
1670  * is the SQL-level message; some can be adopted from the SQL/XML
1671  * standard.  This function uses "code" to create a textual detail
1672  * message.  At the moment, we only need to cover those codes that we
1673  * may raise in this file.
1674  */
1675 static void
1676 xml_ereport_by_code(int level, int sqlcode,
1677                                         const char *msg, int code)
1678 {
1679         const char *det;
1680
1681         switch (code)
1682         {
1683                 case XML_ERR_INVALID_CHAR:
1684                         det = gettext_noop("Invalid character value.");
1685                         break;
1686                 case XML_ERR_SPACE_REQUIRED:
1687                         det = gettext_noop("Space required.");
1688                         break;
1689                 case XML_ERR_STANDALONE_VALUE:
1690                         det = gettext_noop("standalone accepts only 'yes' or 'no'.");
1691                         break;
1692                 case XML_ERR_VERSION_MISSING:
1693                         det = gettext_noop("Malformed declaration: missing version.");
1694                         break;
1695                 case XML_ERR_MISSING_ENCODING:
1696                         det = gettext_noop("Missing encoding in text declaration.");
1697                         break;
1698                 case XML_ERR_XMLDECL_NOT_FINISHED:
1699                         det = gettext_noop("Parsing XML declaration: '?>' expected.");
1700                         break;
1701                 default:
1702                         det = gettext_noop("Unrecognized libxml error code: %d.");
1703                         break;
1704         }
1705
1706         ereport(level,
1707                         (errcode(sqlcode),
1708                          errmsg_internal("%s", msg),
1709                          errdetail(det, code)));
1710 }
1711
1712
1713 /*
1714  * Remove all trailing newlines from a StringInfo string
1715  */
1716 static void
1717 chopStringInfoNewlines(StringInfo str)
1718 {
1719         while (str->len > 0 && str->data[str->len - 1] == '\n')
1720                 str->data[--str->len] = '\0';
1721 }
1722
1723
1724 /*
1725  * Append a newline after removing any existing trailing newlines
1726  */
1727 static void
1728 appendStringInfoLineSeparator(StringInfo str)
1729 {
1730         chopStringInfoNewlines(str);
1731         if (str->len > 0)
1732                 appendStringInfoChar(str, '\n');
1733 }
1734
1735
1736 /*
1737  * Convert one char in the current server encoding to a Unicode codepoint.
1738  */
1739 static pg_wchar
1740 sqlchar_to_unicode(char *s)
1741 {
1742         char       *utf8string;
1743         pg_wchar        ret[2];                 /* need space for trailing zero */
1744
1745         utf8string = (char *) pg_do_encoding_conversion((unsigned char *) s,
1746                                                                                                         pg_mblen(s),
1747                                                                                                         GetDatabaseEncoding(),
1748                                                                                                         PG_UTF8);
1749
1750         pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
1751                                                                   pg_encoding_mblen(PG_UTF8, utf8string));
1752
1753         if (utf8string != s)
1754                 pfree(utf8string);
1755
1756         return ret[0];
1757 }
1758
1759
1760 static bool
1761 is_valid_xml_namefirst(pg_wchar c)
1762 {
1763         /* (Letter | '_' | ':') */
1764         return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1765                         || c == '_' || c == ':');
1766 }
1767
1768
1769 static bool
1770 is_valid_xml_namechar(pg_wchar c)
1771 {
1772         /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1773         return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
1774                         || xmlIsDigitQ(c)
1775                         || c == '.' || c == '-' || c == '_' || c == ':'
1776                         || xmlIsCombiningQ(c)
1777                         || xmlIsExtenderQ(c));
1778 }
1779 #endif   /* USE_LIBXML */
1780
1781
1782 /*
1783  * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
1784  */
1785 char *
1786 map_sql_identifier_to_xml_name(char *ident, bool fully_escaped,
1787                                                            bool escape_period)
1788 {
1789 #ifdef USE_LIBXML
1790         StringInfoData buf;
1791         char       *p;
1792
1793         /*
1794          * SQL/XML doesn't make use of this case anywhere, so it's probably a
1795          * mistake.
1796          */
1797         Assert(fully_escaped || !escape_period);
1798
1799         initStringInfo(&buf);
1800
1801         for (p = ident; *p; p += pg_mblen(p))
1802         {
1803                 if (*p == ':' && (p == ident || fully_escaped))
1804                         appendStringInfo(&buf, "_x003A_");
1805                 else if (*p == '_' && *(p + 1) == 'x')
1806                         appendStringInfo(&buf, "_x005F_");
1807                 else if (fully_escaped && p == ident &&
1808                                  pg_strncasecmp(p, "xml", 3) == 0)
1809                 {
1810                         if (*p == 'x')
1811                                 appendStringInfo(&buf, "_x0078_");
1812                         else
1813                                 appendStringInfo(&buf, "_x0058_");
1814                 }
1815                 else if (escape_period && *p == '.')
1816                         appendStringInfo(&buf, "_x002E_");
1817                 else
1818                 {
1819                         pg_wchar        u = sqlchar_to_unicode(p);
1820
1821                         if ((p == ident)
1822                                 ? !is_valid_xml_namefirst(u)
1823                                 : !is_valid_xml_namechar(u))
1824                                 appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
1825                         else
1826                                 appendBinaryStringInfo(&buf, p, pg_mblen(p));
1827                 }
1828         }
1829
1830         return buf.data;
1831 #else                                                   /* not USE_LIBXML */
1832         NO_XML_SUPPORT();
1833         return NULL;
1834 #endif   /* not USE_LIBXML */
1835 }
1836
1837
1838 /*
1839  * Map a Unicode codepoint into the current server encoding.
1840  */
1841 static char *
1842 unicode_to_sqlchar(pg_wchar c)
1843 {
1844         unsigned char utf8string[5];    /* need room for trailing zero */
1845         char       *result;
1846
1847         memset(utf8string, 0, sizeof(utf8string));
1848         unicode_to_utf8(c, utf8string);
1849
1850         result = (char *) pg_do_encoding_conversion(utf8string,
1851                                                                                                 pg_encoding_mblen(PG_UTF8,
1852                                                                                                                 (char *) utf8string),
1853                                                                                                 PG_UTF8,
1854                                                                                                 GetDatabaseEncoding());
1855         /* if pg_do_encoding_conversion didn't strdup, we must */
1856         if (result == (char *) utf8string)
1857                 result = pstrdup(result);
1858         return result;
1859 }
1860
1861
1862 /*
1863  * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
1864  */
1865 char *
1866 map_xml_name_to_sql_identifier(char *name)
1867 {
1868         StringInfoData buf;
1869         char       *p;
1870
1871         initStringInfo(&buf);
1872
1873         for (p = name; *p; p += pg_mblen(p))
1874         {
1875                 if (*p == '_' && *(p + 1) == 'x'
1876                         && isxdigit((unsigned char) *(p + 2))
1877                         && isxdigit((unsigned char) *(p + 3))
1878                         && isxdigit((unsigned char) *(p + 4))
1879                         && isxdigit((unsigned char) *(p + 5))
1880                         && *(p + 6) == '_')
1881                 {
1882                         unsigned int u;
1883
1884                         sscanf(p + 2, "%X", &u);
1885                         appendStringInfoString(&buf, unicode_to_sqlchar(u));
1886                         p += 6;
1887                 }
1888                 else
1889                         appendBinaryStringInfo(&buf, p, pg_mblen(p));
1890         }
1891
1892         return buf.data;
1893 }
1894
1895 /*
1896  * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
1897  *
1898  * When xml_escape_strings is true, then certain characters in string
1899  * values are replaced by entity references (&lt; etc.), as specified
1900  * in SQL/XML:2008 section 9.8 GR 9) a) iii).   This is normally what is
1901  * wanted.      The false case is mainly useful when the resulting value
1902  * is used with xmlTextWriterWriteAttribute() to write out an
1903  * attribute, because that function does the escaping itself.
1904  */
1905 char *
1906 map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
1907 {
1908         if (type_is_array_domain(type))
1909         {
1910                 ArrayType  *array;
1911                 Oid                     elmtype;
1912                 int16           elmlen;
1913                 bool            elmbyval;
1914                 char            elmalign;
1915                 int                     num_elems;
1916                 Datum      *elem_values;
1917                 bool       *elem_nulls;
1918                 StringInfoData buf;
1919                 int                     i;
1920
1921                 array = DatumGetArrayTypeP(value);
1922                 elmtype = ARR_ELEMTYPE(array);
1923                 get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
1924
1925                 deconstruct_array(array, elmtype,
1926                                                   elmlen, elmbyval, elmalign,
1927                                                   &elem_values, &elem_nulls,
1928                                                   &num_elems);
1929
1930                 initStringInfo(&buf);
1931
1932                 for (i = 0; i < num_elems; i++)
1933                 {
1934                         if (elem_nulls[i])
1935                                 continue;
1936                         appendStringInfoString(&buf, "<element>");
1937                         appendStringInfoString(&buf,
1938                                                                    map_sql_value_to_xml_value(elem_values[i],
1939                                                                                                                           elmtype, true));
1940                         appendStringInfoString(&buf, "</element>");
1941                 }
1942
1943                 pfree(elem_values);
1944                 pfree(elem_nulls);
1945
1946                 return buf.data;
1947         }
1948         else
1949         {
1950                 Oid                     typeOut;
1951                 bool            isvarlena;
1952                 char       *str;
1953
1954                 /*
1955                  * Special XSD formatting for some data types
1956                  */
1957                 switch (type)
1958                 {
1959                         case BOOLOID:
1960                                 if (DatumGetBool(value))
1961                                         return "true";
1962                                 else
1963                                         return "false";
1964
1965                         case DATEOID:
1966                                 {
1967                                         DateADT         date;
1968                                         struct pg_tm tm;
1969                                         char            buf[MAXDATELEN + 1];
1970
1971                                         date = DatumGetDateADT(value);
1972                                         /* XSD doesn't support infinite values */
1973                                         if (DATE_NOT_FINITE(date))
1974                                                 ereport(ERROR,
1975                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
1976                                                                  errmsg("date out of range"),
1977                                                                  errdetail("XML does not support infinite date values.")));
1978                                         j2date(date + POSTGRES_EPOCH_JDATE,
1979                                                    &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
1980                                         EncodeDateOnly(&tm, USE_XSD_DATES, buf);
1981
1982                                         return pstrdup(buf);
1983                                 }
1984
1985                         case TIMESTAMPOID:
1986                                 {
1987                                         Timestamp       timestamp;
1988                                         struct pg_tm tm;
1989                                         fsec_t          fsec;
1990                                         char       *tzn = NULL;
1991                                         char            buf[MAXDATELEN + 1];
1992
1993                                         timestamp = DatumGetTimestamp(value);
1994
1995                                         /* XSD doesn't support infinite values */
1996                                         if (TIMESTAMP_NOT_FINITE(timestamp))
1997                                                 ereport(ERROR,
1998                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
1999                                                                  errmsg("timestamp out of range"),
2000                                                                  errdetail("XML does not support infinite timestamp values.")));
2001                                         else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2002                                                 EncodeDateTime(&tm, fsec, NULL, &tzn, USE_XSD_DATES, buf);
2003                                         else
2004                                                 ereport(ERROR,
2005                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2006                                                                  errmsg("timestamp out of range")));
2007
2008                                         return pstrdup(buf);
2009                                 }
2010
2011                         case TIMESTAMPTZOID:
2012                                 {
2013                                         TimestampTz timestamp;
2014                                         struct pg_tm tm;
2015                                         int                     tz;
2016                                         fsec_t          fsec;
2017                                         char       *tzn = NULL;
2018                                         char            buf[MAXDATELEN + 1];
2019
2020                                         timestamp = DatumGetTimestamp(value);
2021
2022                                         /* XSD doesn't support infinite values */
2023                                         if (TIMESTAMP_NOT_FINITE(timestamp))
2024                                                 ereport(ERROR,
2025                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2026                                                                  errmsg("timestamp out of range"),
2027                                                                  errdetail("XML does not support infinite timestamp values.")));
2028                                         else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2029                                                 EncodeDateTime(&tm, fsec, &tz, &tzn, USE_XSD_DATES, buf);
2030                                         else
2031                                                 ereport(ERROR,
2032                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2033                                                                  errmsg("timestamp out of range")));
2034
2035                                         return pstrdup(buf);
2036                                 }
2037
2038 #ifdef USE_LIBXML
2039                         case BYTEAOID:
2040                                 {
2041                                         bytea      *bstr = DatumGetByteaPP(value);
2042                                         PgXmlErrorContext *xmlerrcxt;
2043                                         volatile xmlBufferPtr buf = NULL;
2044                                         volatile xmlTextWriterPtr writer = NULL;
2045                                         char       *result;
2046
2047                                         xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2048
2049                                         PG_TRY();
2050                                         {
2051                                                 buf = xmlBufferCreate();
2052                                                 if (buf == NULL || xmlerrcxt->err_occurred)
2053                                                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2054                                                                                 "could not allocate xmlBuffer");
2055                                                 writer = xmlNewTextWriterMemory(buf, 0);
2056                                                 if (writer == NULL || xmlerrcxt->err_occurred)
2057                                                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2058                                                                                 "could not allocate xmlTextWriter");
2059
2060                                                 if (xmlbinary == XMLBINARY_BASE64)
2061                                                         xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2062                                                                                                  0, VARSIZE_ANY_EXHDR(bstr));
2063                                                 else
2064                                                         xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2065                                                                                                  0, VARSIZE_ANY_EXHDR(bstr));
2066
2067                                                 /* we MUST do this now to flush data out to the buffer */
2068                                                 xmlFreeTextWriter(writer);
2069                                                 writer = NULL;
2070
2071                                                 result = pstrdup((const char *) xmlBufferContent(buf));
2072                                         }
2073                                         PG_CATCH();
2074                                         {
2075                                                 if (writer)
2076                                                         xmlFreeTextWriter(writer);
2077                                                 if (buf)
2078                                                         xmlBufferFree(buf);
2079
2080                                                 pg_xml_done(xmlerrcxt, true);
2081
2082                                                 PG_RE_THROW();
2083                                         }
2084                                         PG_END_TRY();
2085
2086                                         xmlBufferFree(buf);
2087
2088                                         pg_xml_done(xmlerrcxt, false);
2089
2090                                         return result;
2091                                 }
2092 #endif   /* USE_LIBXML */
2093
2094                 }
2095
2096                 /*
2097                  * otherwise, just use the type's native text representation
2098                  */
2099                 getTypeOutputInfo(type, &typeOut, &isvarlena);
2100                 str = OidOutputFunctionCall(typeOut, value);
2101
2102                 /* ... exactly as-is for XML, and when escaping is not wanted */
2103                 if (type == XMLOID || !xml_escape_strings)
2104                         return str;
2105
2106                 /* otherwise, translate special characters as needed */
2107                 return escape_xml(str);
2108         }
2109 }
2110
2111
2112 /*
2113  * Escape characters in text that have special meanings in XML.
2114  *
2115  * Returns a palloc'd string.
2116  *
2117  * NB: this is intentionally not dependent on libxml.
2118  */
2119 char *
2120 escape_xml(const char *str)
2121 {
2122         StringInfoData buf;
2123         const char *p;
2124
2125         initStringInfo(&buf);
2126         for (p = str; *p; p++)
2127         {
2128                 switch (*p)
2129                 {
2130                         case '&':
2131                                 appendStringInfoString(&buf, "&amp;");
2132                                 break;
2133                         case '<':
2134                                 appendStringInfoString(&buf, "&lt;");
2135                                 break;
2136                         case '>':
2137                                 appendStringInfoString(&buf, "&gt;");
2138                                 break;
2139                         case '\r':
2140                                 appendStringInfoString(&buf, "&#x0d;");
2141                                 break;
2142                         default:
2143                                 appendStringInfoCharMacro(&buf, *p);
2144                                 break;
2145                 }
2146         }
2147         return buf.data;
2148 }
2149
2150
2151 static char *
2152 _SPI_strdup(const char *s)
2153 {
2154         size_t          len = strlen(s) + 1;
2155         char       *ret = SPI_palloc(len);
2156
2157         memcpy(ret, s, len);
2158         return ret;
2159 }
2160
2161
2162 /*
2163  * SQL to XML mapping functions
2164  *
2165  * What follows below was at one point intentionally organized so that
2166  * you can read along in the SQL/XML standard. The functions are
2167  * mostly split up the way the clauses lay out in the standards
2168  * document, and the identifiers are also aligned with the standard
2169  * text.  Unfortunately, SQL/XML:2006 reordered the clauses
2170  * differently than SQL/XML:2003, so the order below doesn't make much
2171  * sense anymore.
2172  *
2173  * There are many things going on there:
2174  *
2175  * There are two kinds of mappings: Mapping SQL data (table contents)
2176  * to XML documents, and mapping SQL structure (the "schema") to XML
2177  * Schema.      And there are functions that do both at the same time.
2178  *
2179  * Then you can map a database, a schema, or a table, each in both
2180  * ways.  This breaks down recursively: Mapping a database invokes
2181  * mapping schemas, which invokes mapping tables, which invokes
2182  * mapping rows, which invokes mapping columns, although you can't
2183  * call the last two from the outside.  Because of this, there are a
2184  * number of xyz_internal() functions which are to be called both from
2185  * the function manager wrapper and from some upper layer in a
2186  * recursive call.
2187  *
2188  * See the documentation about what the common function arguments
2189  * nulls, tableforest, and targetns mean.
2190  *
2191  * Some style guidelines for XML output: Use double quotes for quoting
2192  * XML attributes.      Indent XML elements by two spaces, but remember
2193  * that a lot of code is called recursively at different levels, so
2194  * it's better not to indent rather than create output that indents
2195  * and outdents weirdly.  Add newlines to make the output look nice.
2196  */
2197
2198
2199 /*
2200  * Visibility of objects for XML mappings; see SQL/XML:2008 section
2201  * 4.10.8.
2202  */
2203
2204 /*
2205  * Given a query, which must return type oid as first column, produce
2206  * a list of Oids with the query results.
2207  */
2208 static List *
2209 query_to_oid_list(const char *query)
2210 {
2211         int                     i;
2212         List       *list = NIL;
2213
2214         SPI_execute(query, true, 0);
2215
2216         for (i = 0; i < SPI_processed; i++)
2217         {
2218                 Datum           oid;
2219                 bool            isnull;
2220
2221                 oid = SPI_getbinval(SPI_tuptable->vals[i],
2222                                                         SPI_tuptable->tupdesc,
2223                                                         1,
2224                                                         &isnull);
2225                 if (!isnull)
2226                         list = lappend_oid(list, DatumGetObjectId(oid));
2227         }
2228
2229         return list;
2230 }
2231
2232
2233 static List *
2234 schema_get_xml_visible_tables(Oid nspid)
2235 {
2236         StringInfoData query;
2237
2238         initStringInfo(&query);
2239         appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class WHERE relnamespace = %u AND relkind IN ('r', 'v') AND pg_catalog.has_table_privilege (oid, 'SELECT') ORDER BY relname;", nspid);
2240
2241         return query_to_oid_list(query.data);
2242 }
2243
2244
2245 /*
2246  * Including the system schemas is probably not useful for a database
2247  * mapping.
2248  */
2249 #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2250
2251 #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2252
2253
2254 static List *
2255 database_get_xml_visible_schemas(void)
2256 {
2257         return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2258 }
2259
2260
2261 static List *
2262 database_get_xml_visible_tables(void)
2263 {
2264         /* At the moment there is no order required here. */
2265         return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class WHERE relkind IN ('r', 'v') AND pg_catalog.has_table_privilege (pg_class.oid, 'SELECT') AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2266 }
2267
2268
2269 /*
2270  * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2271  * section 9.11.
2272  */
2273
2274 static StringInfo
2275 table_to_xml_internal(Oid relid,
2276                                           const char *xmlschema, bool nulls, bool tableforest,
2277                                           const char *targetns, bool top_level)
2278 {
2279         StringInfoData query;
2280
2281         initStringInfo(&query);
2282         appendStringInfo(&query, "SELECT * FROM %s",
2283                                          DatumGetCString(DirectFunctionCall1(regclassout,
2284                                                                                                   ObjectIdGetDatum(relid))));
2285         return query_to_xml_internal(query.data, get_rel_name(relid),
2286                                                                  xmlschema, nulls, tableforest,
2287                                                                  targetns, top_level);
2288 }
2289
2290
2291 Datum
2292 table_to_xml(PG_FUNCTION_ARGS)
2293 {
2294         Oid                     relid = PG_GETARG_OID(0);
2295         bool            nulls = PG_GETARG_BOOL(1);
2296         bool            tableforest = PG_GETARG_BOOL(2);
2297         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2298
2299         PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2300                                                                                                                   nulls, tableforest,
2301                                                                                                                    targetns, true)));
2302 }
2303
2304
2305 Datum
2306 query_to_xml(PG_FUNCTION_ARGS)
2307 {
2308         char       *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2309         bool            nulls = PG_GETARG_BOOL(1);
2310         bool            tableforest = PG_GETARG_BOOL(2);
2311         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2312
2313         PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2314                                                                                                         NULL, nulls, tableforest,
2315                                                                                                                    targetns, true)));
2316 }
2317
2318
2319 Datum
2320 cursor_to_xml(PG_FUNCTION_ARGS)
2321 {
2322         char       *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2323         int32           count = PG_GETARG_INT32(1);
2324         bool            nulls = PG_GETARG_BOOL(2);
2325         bool            tableforest = PG_GETARG_BOOL(3);
2326         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2327
2328         StringInfoData result;
2329         Portal          portal;
2330         int                     i;
2331
2332         initStringInfo(&result);
2333
2334         SPI_connect();
2335         portal = SPI_cursor_find(name);
2336         if (portal == NULL)
2337                 ereport(ERROR,
2338                                 (errcode(ERRCODE_UNDEFINED_CURSOR),
2339                                  errmsg("cursor \"%s\" does not exist", name)));
2340
2341         SPI_cursor_fetch(portal, true, count);
2342         for (i = 0; i < SPI_processed; i++)
2343                 SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2344                                                                   tableforest, targetns, true);
2345
2346         SPI_finish();
2347
2348         PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2349 }
2350
2351
2352 /*
2353  * Write the start tag of the root element of a data mapping.
2354  *
2355  * top_level means that this is the very top level of the eventual
2356  * output.      For example, when the user calls table_to_xml, then a call
2357  * with a table name to this function is the top level.  When the user
2358  * calls database_to_xml, then a call with a schema name to this
2359  * function is not the top level.  If top_level is false, then the XML
2360  * namespace declarations are omitted, because they supposedly already
2361  * appeared earlier in the output.      Repeating them is not wrong, but
2362  * it looks ugly.
2363  */
2364 static void
2365 xmldata_root_element_start(StringInfo result, const char *eltname,
2366                                                    const char *xmlschema, const char *targetns,
2367                                                    bool top_level)
2368 {
2369         /* This isn't really wrong but currently makes no sense. */
2370         Assert(top_level || !xmlschema);
2371
2372         appendStringInfo(result, "<%s", eltname);
2373         if (top_level)
2374         {
2375                 appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
2376                 if (strlen(targetns) > 0)
2377                         appendStringInfo(result, " xmlns=\"%s\"", targetns);
2378         }
2379         if (xmlschema)
2380         {
2381                 /* FIXME: better targets */
2382                 if (strlen(targetns) > 0)
2383                         appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
2384                 else
2385                         appendStringInfo(result, " xsi:noNamespaceSchemaLocation=\"#\"");
2386         }
2387         appendStringInfo(result, ">\n\n");
2388 }
2389
2390
2391 static void
2392 xmldata_root_element_end(StringInfo result, const char *eltname)
2393 {
2394         appendStringInfo(result, "</%s>\n", eltname);
2395 }
2396
2397
2398 static StringInfo
2399 query_to_xml_internal(const char *query, char *tablename,
2400                                           const char *xmlschema, bool nulls, bool tableforest,
2401                                           const char *targetns, bool top_level)
2402 {
2403         StringInfo      result;
2404         char       *xmltn;
2405         int                     i;
2406
2407         if (tablename)
2408                 xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
2409         else
2410                 xmltn = "table";
2411
2412         result = makeStringInfo();
2413
2414         SPI_connect();
2415         if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
2416                 ereport(ERROR,
2417                                 (errcode(ERRCODE_DATA_EXCEPTION),
2418                                  errmsg("invalid query")));
2419
2420         if (!tableforest)
2421                 xmldata_root_element_start(result, xmltn, xmlschema,
2422                                                                    targetns, top_level);
2423
2424         if (xmlschema)
2425                 appendStringInfo(result, "%s\n\n", xmlschema);
2426
2427         for (i = 0; i < SPI_processed; i++)
2428                 SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
2429                                                                   tableforest, targetns, top_level);
2430
2431         if (!tableforest)
2432                 xmldata_root_element_end(result, xmltn);
2433
2434         SPI_finish();
2435
2436         return result;
2437 }
2438
2439
2440 Datum
2441 table_to_xmlschema(PG_FUNCTION_ARGS)
2442 {
2443         Oid                     relid = PG_GETARG_OID(0);
2444         bool            nulls = PG_GETARG_BOOL(1);
2445         bool            tableforest = PG_GETARG_BOOL(2);
2446         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2447         const char *result;
2448         Relation        rel;
2449
2450         rel = heap_open(relid, AccessShareLock);
2451         result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2452                                                                                 tableforest, targetns);
2453         heap_close(rel, NoLock);
2454
2455         PG_RETURN_XML_P(cstring_to_xmltype(result));
2456 }
2457
2458
2459 Datum
2460 query_to_xmlschema(PG_FUNCTION_ARGS)
2461 {
2462         char       *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2463         bool            nulls = PG_GETARG_BOOL(1);
2464         bool            tableforest = PG_GETARG_BOOL(2);
2465         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2466         const char *result;
2467         SPIPlanPtr      plan;
2468         Portal          portal;
2469
2470         SPI_connect();
2471
2472         if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2473                 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2474
2475         if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2476                 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2477
2478         result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2479                                                                                                         InvalidOid, nulls,
2480                                                                                                         tableforest, targetns));
2481         SPI_cursor_close(portal);
2482         SPI_finish();
2483
2484         PG_RETURN_XML_P(cstring_to_xmltype(result));
2485 }
2486
2487
2488 Datum
2489 cursor_to_xmlschema(PG_FUNCTION_ARGS)
2490 {
2491         char       *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2492         bool            nulls = PG_GETARG_BOOL(1);
2493         bool            tableforest = PG_GETARG_BOOL(2);
2494         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2495         const char *xmlschema;
2496         Portal          portal;
2497
2498         SPI_connect();
2499         portal = SPI_cursor_find(name);
2500         if (portal == NULL)
2501                 ereport(ERROR,
2502                                 (errcode(ERRCODE_UNDEFINED_CURSOR),
2503                                  errmsg("cursor \"%s\" does not exist", name)));
2504
2505         xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2506                                                                                                            InvalidOid, nulls,
2507                                                                                                          tableforest, targetns));
2508         SPI_finish();
2509
2510         PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
2511 }
2512
2513
2514 Datum
2515 table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2516 {
2517         Oid                     relid = PG_GETARG_OID(0);
2518         bool            nulls = PG_GETARG_BOOL(1);
2519         bool            tableforest = PG_GETARG_BOOL(2);
2520         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2521         Relation        rel;
2522         const char *xmlschema;
2523
2524         rel = heap_open(relid, AccessShareLock);
2525         xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2526                                                                                    tableforest, targetns);
2527         heap_close(rel, NoLock);
2528
2529         PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
2530                                                                                            xmlschema, nulls, tableforest,
2531                                                                                                                    targetns, true)));
2532 }
2533
2534
2535 Datum
2536 query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2537 {
2538         char       *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2539         bool            nulls = PG_GETARG_BOOL(1);
2540         bool            tableforest = PG_GETARG_BOOL(2);
2541         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2542
2543         const char *xmlschema;
2544         SPIPlanPtr      plan;
2545         Portal          portal;
2546
2547         SPI_connect();
2548
2549         if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2550                 elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2551
2552         if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2553                 elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2554
2555         xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2556                                                                   InvalidOid, nulls, tableforest, targetns));
2557         SPI_cursor_close(portal);
2558         SPI_finish();
2559
2560         PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2561                                                                                            xmlschema, nulls, tableforest,
2562                                                                                                                    targetns, true)));
2563 }
2564
2565
2566 /*
2567  * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
2568  * sections 9.13, 9.14.
2569  */
2570
2571 static StringInfo
2572 schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
2573                                            bool tableforest, const char *targetns, bool top_level)
2574 {
2575         StringInfo      result;
2576         char       *xmlsn;
2577         List       *relid_list;
2578         ListCell   *cell;
2579
2580         xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
2581                                                                                    true, false);
2582         result = makeStringInfo();
2583
2584         xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
2585
2586         if (xmlschema)
2587                 appendStringInfo(result, "%s\n\n", xmlschema);
2588
2589         SPI_connect();
2590
2591         relid_list = schema_get_xml_visible_tables(nspid);
2592
2593         SPI_push();
2594
2595         foreach(cell, relid_list)
2596         {
2597                 Oid                     relid = lfirst_oid(cell);
2598                 StringInfo      subres;
2599
2600                 subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
2601                                                                            targetns, false);
2602
2603                 appendStringInfoString(result, subres->data);
2604                 appendStringInfoChar(result, '\n');
2605         }
2606
2607         SPI_pop();
2608         SPI_finish();
2609
2610         xmldata_root_element_end(result, xmlsn);
2611
2612         return result;
2613 }
2614
2615
2616 Datum
2617 schema_to_xml(PG_FUNCTION_ARGS)
2618 {
2619         Name            name = PG_GETARG_NAME(0);
2620         bool            nulls = PG_GETARG_BOOL(1);
2621         bool            tableforest = PG_GETARG_BOOL(2);
2622         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2623
2624         char       *schemaname;
2625         Oid                     nspid;
2626
2627         schemaname = NameStr(*name);
2628         nspid = LookupExplicitNamespace(schemaname);
2629
2630         PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
2631                                                                            nulls, tableforest, targetns, true)));
2632 }
2633
2634
2635 /*
2636  * Write the start element of the root element of an XML Schema mapping.
2637  */
2638 static void
2639 xsd_schema_element_start(StringInfo result, const char *targetns)
2640 {
2641         appendStringInfoString(result,
2642                                                    "<xsd:schema\n"
2643                                                    "    xmlns:xsd=\"" NAMESPACE_XSD "\"");
2644         if (strlen(targetns) > 0)
2645                 appendStringInfo(result,
2646                                                  "\n"
2647                                                  "    targetNamespace=\"%s\"\n"
2648                                                  "    elementFormDefault=\"qualified\"",
2649                                                  targetns);
2650         appendStringInfoString(result,
2651                                                    ">\n\n");
2652 }
2653
2654
2655 static void
2656 xsd_schema_element_end(StringInfo result)
2657 {
2658         appendStringInfoString(result, "</xsd:schema>");
2659 }
2660
2661
2662 static StringInfo
2663 schema_to_xmlschema_internal(const char *schemaname, bool nulls,
2664                                                          bool tableforest, const char *targetns)
2665 {
2666         Oid                     nspid;
2667         List       *relid_list;
2668         List       *tupdesc_list;
2669         ListCell   *cell;
2670         StringInfo      result;
2671
2672         result = makeStringInfo();
2673
2674         nspid = LookupExplicitNamespace(schemaname);
2675
2676         xsd_schema_element_start(result, targetns);
2677
2678         SPI_connect();
2679
2680         relid_list = schema_get_xml_visible_tables(nspid);
2681
2682         tupdesc_list = NIL;
2683         foreach(cell, relid_list)
2684         {
2685                 Relation        rel;
2686
2687                 rel = heap_open(lfirst_oid(cell), AccessShareLock);
2688                 tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
2689                 heap_close(rel, NoLock);
2690         }
2691
2692         appendStringInfoString(result,
2693                                                    map_sql_typecoll_to_xmlschema_types(tupdesc_list));
2694
2695         appendStringInfoString(result,
2696                                                  map_sql_schema_to_xmlschema_types(nspid, relid_list,
2697                                                                                           nulls, tableforest, targetns));
2698
2699         xsd_schema_element_end(result);
2700
2701         SPI_finish();
2702
2703         return result;
2704 }
2705
2706
2707 Datum
2708 schema_to_xmlschema(PG_FUNCTION_ARGS)
2709 {
2710         Name            name = PG_GETARG_NAME(0);
2711         bool            nulls = PG_GETARG_BOOL(1);
2712         bool            tableforest = PG_GETARG_BOOL(2);
2713         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2714
2715         PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
2716                                                                                          nulls, tableforest, targetns)));
2717 }
2718
2719
2720 Datum
2721 schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2722 {
2723         Name            name = PG_GETARG_NAME(0);
2724         bool            nulls = PG_GETARG_BOOL(1);
2725         bool            tableforest = PG_GETARG_BOOL(2);
2726         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2727         char       *schemaname;
2728         Oid                     nspid;
2729         StringInfo      xmlschema;
2730
2731         schemaname = NameStr(*name);
2732         nspid = LookupExplicitNamespace(schemaname);
2733
2734         xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
2735                                                                                          tableforest, targetns);
2736
2737         PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
2738                                                                                                           xmlschema->data, nulls,
2739                                                                                           tableforest, targetns, true)));
2740 }
2741
2742
2743 /*
2744  * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
2745  * sections 9.16, 9.17.
2746  */
2747
2748 static StringInfo
2749 database_to_xml_internal(const char *xmlschema, bool nulls,
2750                                                  bool tableforest, const char *targetns)
2751 {
2752         StringInfo      result;
2753         List       *nspid_list;
2754         ListCell   *cell;
2755         char       *xmlcn;
2756
2757         xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
2758                                                                                    true, false);
2759         result = makeStringInfo();
2760
2761         xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
2762
2763         if (xmlschema)
2764                 appendStringInfo(result, "%s\n\n", xmlschema);
2765
2766         SPI_connect();
2767
2768         nspid_list = database_get_xml_visible_schemas();
2769
2770         SPI_push();
2771
2772         foreach(cell, nspid_list)
2773         {
2774                 Oid                     nspid = lfirst_oid(cell);
2775                 StringInfo      subres;
2776
2777                 subres = schema_to_xml_internal(nspid, NULL, nulls,
2778                                                                                 tableforest, targetns, false);
2779
2780                 appendStringInfoString(result, subres->data);
2781                 appendStringInfoChar(result, '\n');
2782         }
2783
2784         SPI_pop();
2785         SPI_finish();
2786
2787         xmldata_root_element_end(result, xmlcn);
2788
2789         return result;
2790 }
2791
2792
2793 Datum
2794 database_to_xml(PG_FUNCTION_ARGS)
2795 {
2796         bool            nulls = PG_GETARG_BOOL(0);
2797         bool            tableforest = PG_GETARG_BOOL(1);
2798         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
2799
2800         PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
2801                                                                                                         tableforest, targetns)));
2802 }
2803
2804
2805 static StringInfo
2806 database_to_xmlschema_internal(bool nulls, bool tableforest,
2807                                                            const char *targetns)
2808 {
2809         List       *relid_list;
2810         List       *nspid_list;
2811         List       *tupdesc_list;
2812         ListCell   *cell;
2813         StringInfo      result;
2814
2815         result = makeStringInfo();
2816
2817         xsd_schema_element_start(result, targetns);
2818
2819         SPI_connect();
2820
2821         relid_list = database_get_xml_visible_tables();
2822         nspid_list = database_get_xml_visible_schemas();
2823
2824         tupdesc_list = NIL;
2825         foreach(cell, relid_list)
2826         {
2827                 Relation        rel;
2828
2829                 rel = heap_open(lfirst_oid(cell), AccessShareLock);
2830                 tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
2831                 heap_close(rel, NoLock);
2832         }
2833
2834         appendStringInfoString(result,
2835                                                    map_sql_typecoll_to_xmlschema_types(tupdesc_list));
2836
2837         appendStringInfoString(result,
2838                                                    map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
2839
2840         xsd_schema_element_end(result);
2841
2842         SPI_finish();
2843
2844         return result;
2845 }
2846
2847
2848 Datum
2849 database_to_xmlschema(PG_FUNCTION_ARGS)
2850 {
2851         bool            nulls = PG_GETARG_BOOL(0);
2852         bool            tableforest = PG_GETARG_BOOL(1);
2853         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
2854
2855         PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
2856                                                                                                         tableforest, targetns)));
2857 }
2858
2859
2860 Datum
2861 database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2862 {
2863         bool            nulls = PG_GETARG_BOOL(0);
2864         bool            tableforest = PG_GETARG_BOOL(1);
2865         const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
2866         StringInfo      xmlschema;
2867
2868         xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
2869
2870         PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
2871                                                                                          nulls, tableforest, targetns)));
2872 }
2873
2874
2875 /*
2876  * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
2877  * 9.2.
2878  */
2879 static char *
2880 map_multipart_sql_identifier_to_xml_name(char *a, char *b, char *c, char *d)
2881 {
2882         StringInfoData result;
2883
2884         initStringInfo(&result);
2885
2886         if (a)
2887                 appendStringInfo(&result, "%s",
2888                                                  map_sql_identifier_to_xml_name(a, true, true));
2889         if (b)
2890                 appendStringInfo(&result, ".%s",
2891                                                  map_sql_identifier_to_xml_name(b, true, true));
2892         if (c)
2893                 appendStringInfo(&result, ".%s",
2894                                                  map_sql_identifier_to_xml_name(c, true, true));
2895         if (d)
2896                 appendStringInfo(&result, ".%s",
2897                                                  map_sql_identifier_to_xml_name(d, true, true));
2898
2899         return result.data;
2900 }
2901
2902
2903 /*
2904  * Map an SQL table to an XML Schema document; see SQL/XML:2008
2905  * section 9.11.
2906  *
2907  * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
2908  * 9.9.
2909  */
2910 static const char *
2911 map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
2912                                                    bool tableforest, const char *targetns)
2913 {
2914         int                     i;
2915         char       *xmltn;
2916         char       *tabletypename;
2917         char       *rowtypename;
2918         StringInfoData result;
2919
2920         initStringInfo(&result);
2921
2922         if (OidIsValid(relid))
2923         {
2924                 HeapTuple       tuple;
2925                 Form_pg_class reltuple;
2926
2927                 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
2928                 if (!HeapTupleIsValid(tuple))
2929                         elog(ERROR, "cache lookup failed for relation %u", relid);
2930                 reltuple = (Form_pg_class) GETSTRUCT(tuple);
2931
2932                 xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
2933                                                                                            true, false);
2934
2935                 tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
2936                                                                                          get_database_name(MyDatabaseId),
2937                                                                   get_namespace_name(reltuple->relnamespace),
2938                                                                                                  NameStr(reltuple->relname));
2939
2940                 rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
2941                                                                                          get_database_name(MyDatabaseId),
2942                                                                   get_namespace_name(reltuple->relnamespace),
2943                                                                                                  NameStr(reltuple->relname));
2944
2945                 ReleaseSysCache(tuple);
2946         }
2947         else
2948         {
2949                 if (tableforest)
2950                         xmltn = "row";
2951                 else
2952                         xmltn = "table";
2953
2954                 tabletypename = "TableType";
2955                 rowtypename = "RowType";
2956         }
2957
2958         xsd_schema_element_start(&result, targetns);
2959
2960         appendStringInfoString(&result,
2961                                    map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
2962
2963         appendStringInfo(&result,
2964                                          "<xsd:complexType name=\"%s\">\n"
2965                                          "  <xsd:sequence>\n",
2966                                          rowtypename);
2967
2968         for (i = 0; i < tupdesc->natts; i++)
2969         {
2970                 if (tupdesc->attrs[i]->attisdropped)
2971                         continue;
2972                 appendStringInfo(&result,
2973                            "    <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
2974                   map_sql_identifier_to_xml_name(NameStr(tupdesc->attrs[i]->attname),
2975                                                                                  true, false),
2976                                    map_sql_type_to_xml_name(tupdesc->attrs[i]->atttypid, -1),
2977                                                  nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
2978         }
2979
2980         appendStringInfoString(&result,
2981                                                    "  </xsd:sequence>\n"
2982                                                    "</xsd:complexType>\n\n");
2983
2984         if (!tableforest)
2985         {
2986                 appendStringInfo(&result,
2987                                                  "<xsd:complexType name=\"%s\">\n"
2988                                                  "  <xsd:sequence>\n"
2989                                                  "    <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
2990                                                  "  </xsd:sequence>\n"
2991                                                  "</xsd:complexType>\n\n",
2992                                                  tabletypename, rowtypename);
2993
2994                 appendStringInfo(&result,
2995                                                  "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
2996                                                  xmltn, tabletypename);
2997         }
2998         else
2999                 appendStringInfo(&result,
3000                                                  "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3001                                                  xmltn, rowtypename);
3002
3003         xsd_schema_element_end(&result);
3004
3005         return result.data;
3006 }
3007
3008
3009 /*
3010  * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3011  * section 9.12.
3012  */
3013 static const char *
3014 map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3015                                                                   bool tableforest, const char *targetns)
3016 {
3017         char       *dbname;
3018         char       *nspname;
3019         char       *xmlsn;
3020         char       *schematypename;
3021         StringInfoData result;
3022         ListCell   *cell;
3023
3024         dbname = get_database_name(MyDatabaseId);
3025         nspname = get_namespace_name(nspid);
3026
3027         initStringInfo(&result);
3028
3029         xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3030
3031         schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3032                                                                                                                           dbname,
3033                                                                                                                           nspname,
3034                                                                                                                           NULL);
3035
3036         appendStringInfo(&result,
3037                                          "<xsd:complexType name=\"%s\">\n", schematypename);
3038         if (!tableforest)
3039                 appendStringInfoString(&result,
3040                                                            "  <xsd:all>\n");
3041         else
3042                 appendStringInfoString(&result,
3043                                                            "  <xsd:sequence>\n");
3044
3045         foreach(cell, relid_list)
3046         {
3047                 Oid                     relid = lfirst_oid(cell);
3048                 char       *relname = get_rel_name(relid);
3049                 char       *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3050                 char       *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3051                                                                                                                                           dbname,
3052                                                                                                                                          nspname,
3053                                                                                                                                         relname);
3054
3055                 if (!tableforest)
3056                         appendStringInfo(&result,
3057                                                          "    <xsd:element name=\"%s\" type=\"%s\"/>\n",
3058                                                          xmltn, tabletypename);
3059                 else
3060                         appendStringInfo(&result,
3061                                                          "    <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3062                                                          xmltn, tabletypename);
3063         }
3064
3065         if (!tableforest)
3066                 appendStringInfoString(&result,
3067                                                            "  </xsd:all>\n");
3068         else
3069                 appendStringInfoString(&result,
3070                                                            "  </xsd:sequence>\n");
3071         appendStringInfoString(&result,
3072                                                    "</xsd:complexType>\n\n");
3073
3074         appendStringInfo(&result,
3075                                          "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3076                                          xmlsn, schematypename);
3077
3078         return result.data;
3079 }
3080
3081
3082 /*
3083  * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3084  * section 9.15.
3085  */
3086 static const char *
3087 map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3088                                                                    bool tableforest, const char *targetns)
3089 {
3090         char       *dbname;
3091         char       *xmlcn;
3092         char       *catalogtypename;
3093         StringInfoData result;
3094         ListCell   *cell;
3095
3096         dbname = get_database_name(MyDatabaseId);
3097
3098         initStringInfo(&result);
3099
3100         xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3101
3102         catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3103                                                                                                                            dbname,
3104                                                                                                                            NULL,
3105                                                                                                                            NULL);
3106
3107         appendStringInfo(&result,
3108                                          "<xsd:complexType name=\"%s\">\n", catalogtypename);
3109         appendStringInfoString(&result,
3110                                                    "  <xsd:all>\n");
3111
3112         foreach(cell, nspid_list)
3113         {
3114                 Oid                     nspid = lfirst_oid(cell);
3115                 char       *nspname = get_namespace_name(nspid);
3116                 char       *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3117                 char       *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3118                                                                                                                                           dbname,
3119                                                                                                                                          nspname,
3120                                                                                                                                            NULL);
3121
3122                 appendStringInfo(&result,
3123                                                  "    <xsd:element name=\"%s\" type=\"%s\"/>\n",
3124                                                  xmlsn, schematypename);
3125         }
3126
3127         appendStringInfoString(&result,
3128                                                    "  </xsd:all>\n");
3129         appendStringInfoString(&result,
3130                                                    "</xsd:complexType>\n\n");
3131
3132         appendStringInfo(&result,
3133                                          "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3134                                          xmlcn, catalogtypename);
3135
3136         return result.data;
3137 }
3138
3139
3140 /*
3141  * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3142  */
3143 static const char *
3144 map_sql_type_to_xml_name(Oid typeoid, int typmod)
3145 {
3146         StringInfoData result;
3147
3148         initStringInfo(&result);
3149
3150         switch (typeoid)
3151         {
3152                 case BPCHAROID:
3153                         if (typmod == -1)
3154                                 appendStringInfo(&result, "CHAR");
3155                         else
3156                                 appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3157                         break;
3158                 case VARCHAROID:
3159                         if (typmod == -1)
3160                                 appendStringInfo(&result, "VARCHAR");
3161                         else
3162                                 appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3163                         break;
3164                 case NUMERICOID:
3165                         if (typmod == -1)
3166                                 appendStringInfo(&result, "NUMERIC");
3167                         else
3168                                 appendStringInfo(&result, "NUMERIC_%d_%d",
3169                                                                  ((typmod - VARHDRSZ) >> 16) & 0xffff,
3170                                                                  (typmod - VARHDRSZ) & 0xffff);
3171                         break;
3172                 case INT4OID:
3173                         appendStringInfo(&result, "INTEGER");
3174                         break;
3175                 case INT2OID:
3176                         appendStringInfo(&result, "SMALLINT");
3177                         break;
3178                 case INT8OID:
3179                         appendStringInfo(&result, "BIGINT");
3180                         break;
3181                 case FLOAT4OID:
3182                         appendStringInfo(&result, "REAL");
3183                         break;
3184                 case FLOAT8OID:
3185                         appendStringInfo(&result, "DOUBLE");
3186                         break;
3187                 case BOOLOID:
3188                         appendStringInfo(&result, "BOOLEAN");
3189                         break;
3190                 case TIMEOID:
3191                         if (typmod == -1)
3192                                 appendStringInfo(&result, "TIME");
3193                         else
3194                                 appendStringInfo(&result, "TIME_%d", typmod);
3195                         break;
3196                 case TIMETZOID:
3197                         if (typmod == -1)
3198                                 appendStringInfo(&result, "TIME_WTZ");
3199                         else
3200                                 appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3201                         break;
3202                 case TIMESTAMPOID:
3203                         if (typmod == -1)
3204                                 appendStringInfo(&result, "TIMESTAMP");
3205                         else
3206                                 appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3207                         break;
3208                 case TIMESTAMPTZOID:
3209                         if (typmod == -1)
3210                                 appendStringInfo(&result, "TIMESTAMP_WTZ");
3211                         else
3212                                 appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3213                         break;
3214                 case DATEOID:
3215                         appendStringInfo(&result, "DATE");
3216                         break;
3217                 case XMLOID:
3218                         appendStringInfo(&result, "XML");
3219                         break;
3220                 default:
3221                         {
3222                                 HeapTuple       tuple;
3223                                 Form_pg_type typtuple;
3224
3225                                 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3226                                 if (!HeapTupleIsValid(tuple))
3227                                         elog(ERROR, "cache lookup failed for type %u", typeoid);
3228                                 typtuple = (Form_pg_type) GETSTRUCT(tuple);
3229
3230                                 appendStringInfoString(&result,
3231                                                                            map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3232                                                                                          get_database_name(MyDatabaseId),
3233                                                                   get_namespace_name(typtuple->typnamespace),
3234                                                                                                 NameStr(typtuple->typname)));
3235
3236                                 ReleaseSysCache(tuple);
3237                         }
3238         }
3239
3240         return result.data;
3241 }
3242
3243
3244 /*
3245  * Map a collection of SQL data types to XML Schema data types; see
3246  * SQL/XML:2008 section 9.7.
3247  */
3248 static const char *
3249 map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3250 {
3251         List       *uniquetypes = NIL;
3252         int                     i;
3253         StringInfoData result;
3254         ListCell   *cell0;
3255
3256         /* extract all column types used in the set of TupleDescs */
3257         foreach(cell0, tupdesc_list)
3258         {
3259                 TupleDesc       tupdesc = (TupleDesc) lfirst(cell0);
3260
3261                 for (i = 0; i < tupdesc->natts; i++)
3262                 {
3263                         if (tupdesc->attrs[i]->attisdropped)
3264                                 continue;
3265                         uniquetypes = list_append_unique_oid(uniquetypes,
3266                                                                                                  tupdesc->attrs[i]->atttypid);
3267                 }
3268         }
3269
3270         /* add base types of domains */
3271         foreach(cell0, uniquetypes)
3272         {
3273                 Oid                     typid = lfirst_oid(cell0);
3274                 Oid                     basetypid = getBaseType(typid);
3275
3276                 if (basetypid != typid)
3277                         uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3278         }
3279
3280         /* Convert to textual form */
3281         initStringInfo(&result);
3282
3283         foreach(cell0, uniquetypes)
3284         {
3285                 appendStringInfo(&result, "%s\n",
3286                                                  map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3287                                                                                                                 -1));
3288         }
3289
3290         return result.data;
3291 }
3292
3293
3294 /*
3295  * Map an SQL data type to a named XML Schema data type; see
3296  * SQL/XML:2008 sections 9.5 and 9.6.
3297  *
3298  * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3299  * a name attribute, which this function does.  The name-less version
3300  * 9.5 doesn't appear to be required anywhere.)
3301  */
3302 static const char *
3303 map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3304 {
3305         StringInfoData result;
3306         const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3307
3308         initStringInfo(&result);
3309
3310         if (typeoid == XMLOID)
3311         {
3312                 appendStringInfo(&result,
3313                                                  "<xsd:complexType mixed=\"true\">\n"
3314                                                  "  <xsd:sequence>\n"
3315                                                  "    <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3316                                                  "  </xsd:sequence>\n"
3317                                                  "</xsd:complexType>\n");
3318         }
3319         else
3320         {
3321                 appendStringInfo(&result,
3322                                                  "<xsd:simpleType name=\"%s\">\n", typename);
3323
3324                 switch (typeoid)
3325                 {
3326                         case BPCHAROID:
3327                         case VARCHAROID:
3328                         case TEXTOID:
3329                                 appendStringInfo(&result,
3330                                                                  "  <xsd:restriction base=\"xsd:string\">\n");
3331                                 if (typmod != -1)
3332                                         appendStringInfo(&result,
3333                                                                          "    <xsd:maxLength value=\"%d\"/>\n",
3334                                                                          typmod - VARHDRSZ);
3335                                 appendStringInfo(&result,
3336                                                                  "  </xsd:restriction>\n");
3337                                 break;
3338
3339                         case BYTEAOID:
3340                                 appendStringInfo(&result,
3341                                                                  "  <xsd:restriction base=\"xsd:%s\">\n"
3342                                                                  "  </xsd:restriction>\n",
3343                                 xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3344                                 break;
3345
3346                         case NUMERICOID:
3347                                 if (typmod != -1)
3348                                         appendStringInfo(&result,
3349                                                                  "  <xsd:restriction base=\"xsd:decimal\">\n"
3350                                                                          "    <xsd:totalDigits value=\"%d\"/>\n"
3351                                                                    "    <xsd:fractionDigits value=\"%d\"/>\n"
3352                                                                          "  </xsd:restriction>\n",
3353                                                                          ((typmod - VARHDRSZ) >> 16) & 0xffff,
3354                                                                          (typmod - VARHDRSZ) & 0xffff);
3355                                 break;
3356
3357                         case INT2OID:
3358                                 appendStringInfo(&result,
3359                                                                  "  <xsd:restriction base=\"xsd:short\">\n"
3360                                                                  "    <xsd:maxInclusive value=\"%d\"/>\n"
3361                                                                  "    <xsd:minInclusive value=\"%d\"/>\n"
3362                                                                  "  </xsd:restriction>\n",
3363                                                                  SHRT_MAX, SHRT_MIN);
3364                                 break;
3365
3366                         case INT4OID:
3367                                 appendStringInfo(&result,
3368                                                                  "  <xsd:restriction base=\"xsd:int\">\n"
3369                                                                  "    <xsd:maxInclusive value=\"%d\"/>\n"
3370                                                                  "    <xsd:minInclusive value=\"%d\"/>\n"
3371                                                                  "  </xsd:restriction>\n",
3372                                                                  INT_MAX, INT_MIN);
3373                                 break;
3374
3375                         case INT8OID:
3376                                 appendStringInfo(&result,
3377                                                                  "  <xsd:restriction base=\"xsd:long\">\n"
3378                                            "    <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
3379                                            "    <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
3380                                                                  "  </xsd:restriction>\n",
3381                                                            (((uint64) 1) << (sizeof(int64) * 8 - 1)) - 1,
3382                                                                  (((uint64) 1) << (sizeof(int64) * 8 - 1)));
3383                                 break;
3384
3385                         case FLOAT4OID:
3386                                 appendStringInfo(&result,
3387                                 "  <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
3388                                 break;
3389
3390                         case FLOAT8OID:
3391                                 appendStringInfo(&result,
3392                                                                  "  <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
3393                                 break;
3394
3395                         case BOOLOID:
3396                                 appendStringInfo(&result,
3397                                                                  "  <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
3398                                 break;
3399
3400                         case TIMEOID:
3401                         case TIMETZOID:
3402                                 {
3403                                         const char *tz = (typeoid == TIMETZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3404
3405                                         if (typmod == -1)
3406                                                 appendStringInfo(&result,
3407                                                                         "  <xsd:restriction base=\"xsd:time\">\n"
3408                                                                                  "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3409                                                                                  "  </xsd:restriction>\n", tz);
3410                                         else if (typmod == 0)
3411                                                 appendStringInfo(&result,
3412                                                                         "  <xsd:restriction base=\"xsd:time\">\n"
3413                                                                                  "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3414                                                                                  "  </xsd:restriction>\n", tz);
3415                                         else
3416                                                 appendStringInfo(&result,
3417                                                                         "  <xsd:restriction base=\"xsd:time\">\n"
3418                                                                                  "    <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3419                                                         "  </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3420                                         break;
3421                                 }
3422
3423                         case TIMESTAMPOID:
3424                         case TIMESTAMPTZOID:
3425                                 {
3426                                         const char *tz = (typeoid == TIMESTAMPTZOID ? "(+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3427
3428                                         if (typmod == -1)
3429                                                 appendStringInfo(&result,
3430                                                                 "  <xsd:restriction base=\"xsd:dateTime\">\n"
3431                                                                                  "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3432                                                                                  "  </xsd:restriction>\n", tz);
3433                                         else if (typmod == 0)
3434                                                 appendStringInfo(&result,
3435                                                                 "  <xsd:restriction base=\"xsd:dateTime\">\n"
3436                                                                                  "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3437                                                                                  "  </xsd:restriction>\n", tz);
3438                                         else
3439                                                 appendStringInfo(&result,
3440                                                                 "  <xsd:restriction base=\"xsd:dateTime\">\n"
3441                                                                                  "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3442                                                         "  </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3443                                         break;
3444                                 }
3445
3446                         case DATEOID:
3447                                 appendStringInfo(&result,
3448                                                                  "  <xsd:restriction base=\"xsd:date\">\n"
3449                                                                  "    <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
3450                                                                  "  </xsd:restriction>\n");
3451                                 break;
3452
3453                         default:
3454                                 if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
3455                                 {
3456                                         Oid                     base_typeoid;
3457                                         int32           base_typmod = -1;
3458
3459                                         base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
3460
3461                                         appendStringInfo(&result,
3462                                                                          "  <xsd:restriction base=\"%s\"/>\n",
3463                                                 map_sql_type_to_xml_name(base_typeoid, base_typmod));
3464                                 }
3465                                 break;
3466                 }
3467                 appendStringInfo(&result,
3468                                                  "</xsd:simpleType>\n");
3469         }
3470
3471         return result.data;
3472 }
3473
3474
3475 /*
3476  * Map an SQL row to an XML element, taking the row from the active
3477  * SPI cursor.  See also SQL/XML:2008 section 9.10.
3478  */
3479 static void
3480 SPI_sql_row_to_xmlelement(int rownum, StringInfo result, char *tablename,
3481                                                   bool nulls, bool tableforest,
3482                                                   const char *targetns, bool top_level)
3483 {
3484         int                     i;
3485         char       *xmltn;
3486
3487         if (tablename)
3488                 xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3489         else
3490         {
3491                 if (tableforest)
3492                         xmltn = "row";
3493                 else
3494                         xmltn = "table";
3495         }
3496
3497         if (tableforest)
3498                 xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
3499         else
3500                 appendStringInfoString(result, "<row>\n");
3501
3502         for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
3503         {
3504                 char       *colname;
3505                 Datum           colval;
3506                 bool            isnull;
3507
3508                 colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
3509                                                                                                  true, false);
3510                 colval = SPI_getbinval(SPI_tuptable->vals[rownum],
3511                                                            SPI_tuptable->tupdesc,
3512                                                            i,
3513                                                            &isnull);
3514                 if (isnull)
3515                 {
3516                         if (nulls)
3517                                 appendStringInfo(result, "  <%s xsi:nil=\"true\"/>\n", colname);
3518                 }
3519                 else
3520                         appendStringInfo(result, "  <%s>%s</%s>\n",
3521                                                          colname,
3522                                                          map_sql_value_to_xml_value(colval,
3523                                                           SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
3524                                                          colname);
3525         }
3526
3527         if (tableforest)
3528         {
3529                 xmldata_root_element_end(result, xmltn);
3530                 appendStringInfoChar(result, '\n');
3531         }
3532         else
3533                 appendStringInfoString(result, "</row>\n\n");
3534 }
3535
3536
3537 /*
3538  * XPath related functions
3539  */
3540
3541 #ifdef USE_LIBXML
3542
3543 /*
3544  * Convert XML node to text (dump subtree in case of element,
3545  * return value otherwise)
3546  */
3547 static text *
3548 xml_xmlnodetoxmltype(xmlNodePtr cur)
3549 {
3550         xmltype    *result;
3551
3552         if (cur->type == XML_ELEMENT_NODE)
3553         {
3554                 xmlBufferPtr buf;
3555
3556                 buf = xmlBufferCreate();
3557                 PG_TRY();
3558                 {
3559                         xmlNodeDump(buf, NULL, cur, 0, 1);
3560                         result = xmlBuffer_to_xmltype(buf);
3561                 }
3562                 PG_CATCH();
3563                 {
3564                         xmlBufferFree(buf);
3565                         PG_RE_THROW();
3566                 }
3567                 PG_END_TRY();
3568                 xmlBufferFree(buf);
3569         }
3570         else
3571         {
3572                 xmlChar    *str;
3573
3574                 str = xmlXPathCastNodeToString(cur);
3575                 PG_TRY();
3576                 {
3577                         /* Here we rely on XML having the same representation as TEXT */
3578                         char   *escaped = escape_xml((char *) str);
3579
3580                         result = (xmltype *) cstring_to_text(escaped);
3581                         pfree(escaped);
3582                 }
3583                 PG_CATCH();
3584                 {
3585                         xmlFree(str);
3586                         PG_RE_THROW();
3587                 }
3588                 PG_END_TRY();
3589                 xmlFree(str);
3590         }
3591
3592         return result;
3593 }
3594
3595 /*
3596  * Convert an XML XPath object (the result of evaluating an XPath expression)
3597  * to an array of xml values, which is returned at *astate.  The function
3598  * result value is the number of elements in the array.
3599  *
3600  * If "astate" is NULL then we don't generate the array value, but we still
3601  * return the number of elements it would have had.
3602  *
3603  * Nodesets are converted to an array containing the nodes' textual
3604  * representations.  Primitive values (float, double, string) are converted
3605  * to a single-element array containing the value's string representation.
3606  */
3607 static int
3608 xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
3609                                            ArrayBuildState **astate)
3610 {
3611         int                     result = 0;
3612         Datum           datum;
3613         Oid                     datumtype;
3614         char       *result_str;
3615
3616         if (astate != NULL)
3617                 *astate = NULL;
3618
3619         switch (xpathobj->type)
3620         {
3621                 case XPATH_NODESET:
3622                         if (xpathobj->nodesetval != NULL)
3623                         {
3624                                 result = xpathobj->nodesetval->nodeNr;
3625                                 if (astate != NULL)
3626                                 {
3627                                         int             i;
3628
3629                                         for (i = 0; i < result; i++)
3630                                         {
3631                                                 datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i]));
3632                                                 *astate = accumArrayResult(*astate, datum,
3633                                                                                                    false, XMLOID,
3634                                                                                                    CurrentMemoryContext);
3635                                         }
3636                                 }
3637                         }
3638                         return result;
3639
3640                 case XPATH_BOOLEAN:
3641                         if (astate == NULL)
3642                                 return 1;
3643                         datum = BoolGetDatum(xpathobj->boolval);
3644                         datumtype = BOOLOID;
3645                         break;
3646
3647                 case XPATH_NUMBER:
3648                         if (astate == NULL)
3649                                 return 1;
3650                         datum = Float8GetDatum(xpathobj->floatval);
3651                         datumtype = FLOAT8OID;
3652                         break;
3653
3654                 case XPATH_STRING:
3655                         if (astate == NULL)
3656                                 return 1;
3657                         datum = CStringGetDatum((char *) xpathobj->stringval);
3658                         datumtype = CSTRINGOID;
3659                         break;
3660
3661                 default:
3662                         elog(ERROR, "xpath expression result type %d is unsupported",
3663                                  xpathobj->type);
3664                         return 0;                       /* keep compiler quiet */
3665         }
3666
3667         /* Common code for scalar-value cases */
3668         result_str = map_sql_value_to_xml_value(datum, datumtype, true);
3669         datum = PointerGetDatum(cstring_to_xmltype(result_str));
3670         *astate = accumArrayResult(*astate, datum,
3671                                                            false, XMLOID,
3672                                                            CurrentMemoryContext);
3673         return 1;
3674 }
3675
3676
3677 /*
3678  * Common code for xpath() and xmlexists()
3679  *
3680  * Evaluate XPath expression and return number of nodes in res_items
3681  * and array of XML values in astate.  Either of those pointers can be
3682  * NULL if the corresponding result isn't wanted.
3683  *
3684  * It is up to the user to ensure that the XML passed is in fact
3685  * an XML document - XPath doesn't work easily on fragments without
3686  * a context node being known.
3687  */
3688 static void
3689 xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
3690                            int *res_nitems, ArrayBuildState **astate)
3691 {
3692         PgXmlErrorContext *xmlerrcxt;
3693         volatile xmlParserCtxtPtr ctxt = NULL;
3694         volatile xmlDocPtr doc = NULL;
3695         volatile xmlXPathContextPtr xpathctx = NULL;
3696         volatile xmlXPathCompExprPtr xpathcomp = NULL;
3697         volatile xmlXPathObjectPtr xpathobj = NULL;
3698         char       *datastr;
3699         int32           len;
3700         int32           xpath_len;
3701         xmlChar    *string;
3702         xmlChar    *xpath_expr;
3703         int                     i;
3704         int                     ndim;
3705         Datum      *ns_names_uris;
3706         bool       *ns_names_uris_nulls;
3707         int                     ns_count;
3708
3709         /*
3710          * Namespace mappings are passed as text[].  If an empty array is passed
3711          * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
3712          * Else, a 2-dimensional array with length of the second axis being equal
3713          * to 2 should be passed, i.e., every subarray contains 2 elements, the
3714          * first element defining the name, the second one the URI.  Example:
3715          * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
3716          * 'http://example2.com']].
3717          */
3718         ndim = namespaces ? ARR_NDIM(namespaces) : 0;
3719         if (ndim != 0)
3720         {
3721                 int                *dims;
3722
3723                 dims = ARR_DIMS(namespaces);
3724
3725                 if (ndim != 2 || dims[1] != 2)
3726                         ereport(ERROR,
3727                                         (errcode(ERRCODE_DATA_EXCEPTION),
3728                                          errmsg("invalid array for XML namespace mapping"),
3729                                          errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
3730
3731                 Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
3732
3733                 deconstruct_array(namespaces, TEXTOID, -1, false, 'i',
3734                                                   &ns_names_uris, &ns_names_uris_nulls,
3735                                                   &ns_count);
3736
3737                 Assert((ns_count % 2) == 0);    /* checked above */
3738                 ns_count /= 2;                  /* count pairs only */
3739         }
3740         else
3741         {
3742                 ns_names_uris = NULL;
3743                 ns_names_uris_nulls = NULL;
3744                 ns_count = 0;
3745         }
3746
3747         datastr = VARDATA(data);
3748         len = VARSIZE(data) - VARHDRSZ;
3749         xpath_len = VARSIZE(xpath_expr_text) - VARHDRSZ;
3750         if (xpath_len == 0)
3751                 ereport(ERROR,
3752                                 (errcode(ERRCODE_DATA_EXCEPTION),
3753                                  errmsg("empty XPath expression")));
3754
3755         string = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
3756         memcpy(string, datastr, len);
3757         string[len] = '\0';
3758
3759         xpath_expr = (xmlChar *) palloc((xpath_len + 1) * sizeof(xmlChar));
3760         memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
3761         xpath_expr[xpath_len] = '\0';
3762
3763         xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
3764
3765         PG_TRY();
3766         {
3767                 xmlInitParser();
3768
3769                 /*
3770                  * redundant XML parsing (two parsings for the same value during one
3771                  * command execution are possible)
3772                  */
3773                 ctxt = xmlNewParserCtxt();
3774                 if (ctxt == NULL || xmlerrcxt->err_occurred)
3775                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3776                                                 "could not allocate parser context");
3777                 doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
3778                 if (doc == NULL || xmlerrcxt->err_occurred)
3779                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
3780                                                 "could not parse XML document");
3781                 xpathctx = xmlXPathNewContext(doc);
3782                 if (xpathctx == NULL || xmlerrcxt->err_occurred)
3783                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3784                                                 "could not allocate XPath context");
3785                 xpathctx->node = xmlDocGetRootElement(doc);
3786                 if (xpathctx->node == NULL || xmlerrcxt->err_occurred)
3787                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
3788                                                 "could not find root XML element");
3789
3790                 /* register namespaces, if any */
3791                 if (ns_count > 0)
3792                 {
3793                         for (i = 0; i < ns_count; i++)
3794                         {
3795                                 char       *ns_name;
3796                                 char       *ns_uri;
3797
3798                                 if (ns_names_uris_nulls[i * 2] ||
3799                                         ns_names_uris_nulls[i * 2 + 1])
3800                                         ereport(ERROR,
3801                                                         (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
3802                                           errmsg("neither namespace name nor URI may be null")));
3803                                 ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
3804                                 ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
3805                                 if (xmlXPathRegisterNs(xpathctx,
3806                                                                            (xmlChar *) ns_name,
3807                                                                            (xmlChar *) ns_uri) != 0)
3808                                         ereport(ERROR,          /* is this an internal error??? */
3809                                                         (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
3810                                                                         ns_name, ns_uri)));
3811                         }
3812                 }
3813
3814                 xpathcomp = xmlXPathCompile(xpath_expr);
3815                 if (xpathcomp == NULL || xmlerrcxt->err_occurred)
3816                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
3817                                                 "invalid XPath expression");
3818
3819                 /*
3820                  * Version 2.6.27 introduces a function named
3821                  * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
3822                  * but we can derive the existence by whether any nodes are returned,
3823                  * thereby preventing a library version upgrade and keeping the code
3824                  * the same.
3825                  */
3826                 xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
3827                 if (xpathobj == NULL || xmlerrcxt->err_occurred)
3828                         xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
3829                                                 "could not create XPath object");
3830
3831                 /*
3832                  * Extract the results as requested.
3833                  */
3834                 if (res_nitems != NULL)
3835                         *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate);
3836                 else
3837                         (void) xml_xpathobjtoxmlarray(xpathobj, astate);
3838         }
3839         PG_CATCH();
3840         {
3841                 if (xpathobj)
3842                         xmlXPathFreeObject(xpathobj);
3843                 if (xpathcomp)
3844                         xmlXPathFreeCompExpr(xpathcomp);
3845                 if (xpathctx)
3846                         xmlXPathFreeContext(xpathctx);
3847                 if (doc)
3848                         xmlFreeDoc(doc);
3849                 if (ctxt)
3850                         xmlFreeParserCtxt(ctxt);
3851
3852                 pg_xml_done(xmlerrcxt, true);
3853
3854                 PG_RE_THROW();
3855         }
3856         PG_END_TRY();
3857
3858         xmlXPathFreeObject(xpathobj);
3859         xmlXPathFreeCompExpr(xpathcomp);
3860         xmlXPathFreeContext(xpathctx);
3861         xmlFreeDoc(doc);
3862         xmlFreeParserCtxt(ctxt);
3863
3864         pg_xml_done(xmlerrcxt, false);
3865 }
3866 #endif   /* USE_LIBXML */
3867
3868 /*
3869  * Evaluate XPath expression and return array of XML values.
3870  *
3871  * As we have no support of XQuery sequences yet, this function seems
3872  * to be the most useful one (array of XML functions plays a role of
3873  * some kind of substitution for XQuery sequences).
3874  */
3875 Datum
3876 xpath(PG_FUNCTION_ARGS)
3877 {
3878 #ifdef USE_LIBXML
3879         text       *xpath_expr_text = PG_GETARG_TEXT_P(0);
3880         xmltype    *data = PG_GETARG_XML_P(1);
3881         ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
3882         int                     res_nitems;
3883         ArrayBuildState *astate;
3884
3885         xpath_internal(xpath_expr_text, data, namespaces,
3886                                    &res_nitems, &astate);
3887
3888         if (res_nitems == 0)
3889                 PG_RETURN_ARRAYTYPE_P(construct_empty_array(XMLOID));
3890         else
3891                 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
3892 #else
3893         NO_XML_SUPPORT();
3894         return 0;
3895 #endif
3896 }
3897
3898 /*
3899  * Determines if the node specified by the supplied XPath exists
3900  * in a given XML document, returning a boolean.
3901  */
3902 Datum
3903 xmlexists(PG_FUNCTION_ARGS)
3904 {
3905 #ifdef USE_LIBXML
3906         text       *xpath_expr_text = PG_GETARG_TEXT_P(0);
3907         xmltype    *data = PG_GETARG_XML_P(1);
3908         int                     res_nitems;
3909
3910         xpath_internal(xpath_expr_text, data, NULL,
3911                                    &res_nitems, NULL);
3912
3913         PG_RETURN_BOOL(res_nitems > 0);
3914 #else
3915         NO_XML_SUPPORT();
3916         return 0;
3917 #endif
3918 }
3919
3920 /*
3921  * Determines if the node specified by the supplied XPath exists
3922  * in a given XML document, returning a boolean. Differs from
3923  * xmlexists as it supports namespaces and is not defined in SQL/XML.
3924  */
3925 Datum
3926 xpath_exists(PG_FUNCTION_ARGS)
3927 {
3928 #ifdef USE_LIBXML
3929         text       *xpath_expr_text = PG_GETARG_TEXT_P(0);
3930         xmltype    *data = PG_GETARG_XML_P(1);
3931         ArrayType  *namespaces = PG_GETARG_ARRAYTYPE_P(2);
3932         int                     res_nitems;
3933
3934         xpath_internal(xpath_expr_text, data, namespaces,
3935                                    &res_nitems, NULL);
3936
3937         PG_RETURN_BOOL(res_nitems > 0);
3938 #else
3939         NO_XML_SUPPORT();
3940         return 0;
3941 #endif
3942 }
3943
3944 /*
3945  * Functions for checking well-formed-ness
3946  */
3947
3948 #ifdef USE_LIBXML
3949 static bool
3950 wellformed_xml(text *data, XmlOptionType xmloption_arg)
3951 {
3952         bool            result;
3953         volatile xmlDocPtr doc = NULL;
3954
3955         /* We want to catch any exceptions and return false */
3956         PG_TRY();
3957         {
3958                 doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding());
3959                 result = true;
3960         }
3961         PG_CATCH();
3962         {
3963                 FlushErrorState();
3964                 result = false;
3965         }
3966         PG_END_TRY();
3967
3968         if (doc)
3969                 xmlFreeDoc(doc);
3970
3971         return result;
3972 }
3973 #endif
3974
3975 Datum
3976 xml_is_well_formed(PG_FUNCTION_ARGS)
3977 {
3978 #ifdef USE_LIBXML
3979         text       *data = PG_GETARG_TEXT_P(0);
3980
3981         PG_RETURN_BOOL(wellformed_xml(data, xmloption));
3982 #else
3983         NO_XML_SUPPORT();
3984         return 0;
3985 #endif   /* not USE_LIBXML */
3986 }
3987
3988 Datum
3989 xml_is_well_formed_document(PG_FUNCTION_ARGS)
3990 {
3991 #ifdef USE_LIBXML
3992         text       *data = PG_GETARG_TEXT_P(0);
3993
3994         PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
3995 #else
3996         NO_XML_SUPPORT();
3997         return 0;
3998 #endif   /* not USE_LIBXML */
3999 }
4000
4001 Datum
4002 xml_is_well_formed_content(PG_FUNCTION_ARGS)
4003 {
4004 #ifdef USE_LIBXML
4005         text       *data = PG_GETARG_TEXT_P(0);
4006
4007         PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4008 #else
4009         NO_XML_SUPPORT();
4010         return 0;
4011 #endif   /* not USE_LIBXML */
4012 }