<para>
The <type>xml</type> type can store well-formed
<quote>documents</quote>, as defined by the XML standard, as well
- as <quote>content</quote> fragments, which are defined by the
- production <literal>XMLDecl? content</literal> in the XML
- standard. Roughly, this means that content fragments can have
+ as <quote>content</quote> fragments, which are defined by reference
+ to the more permissive
+ <ulink url="https://www.w3.org/TR/2010/REC-xpath-datamodel-20101214/#DocumentNode"><quote>document node</quote></ulink>
+ of the XQuery and XPath data model.
+ Roughly, this means that content fragments can have
more than one top-level element or character node. The expression
<literal><replaceable>xmlvalue</replaceable> IS DOCUMENT</literal>
can be used to evaluate whether a particular <type>xml</type>
data are allowed.
</para>
- <note>
- <para>
- With the default XML option setting, you cannot directly cast
- character strings to type <type>xml</type> if they contain a
- document type declaration, because the definition of XML content
- fragment does not accept them. If you need to do that, either
- use <literal>XMLPARSE</literal> or change the XML option.
- </para>
- </note>
-
</sect2>
<sect2>
xmlChar **version, xmlChar **encoding, int *standalone);
static bool print_xml_decl(StringInfo buf, const xmlChar *version,
pg_enc encoding, int standalone);
+static bool xml_doctype_in_content(const xmlChar *str);
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
bool preserve_whitespace, int encoding);
static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
goto finished;
- /* if next char is name char, it's a PI like <?xml-stylesheet ...?> */
- utf8len = strlen((const char *) (p + 5));
+ /*
+ * If next char is a name char, it's a PI like <?xml-stylesheet ...?>
+ * rather than an XMLDecl, so we have done what we came to do and found no
+ * XMLDecl.
+ *
+ * We need an input length value for xmlGetUTF8Char, but there's no need
+ * to count the whole document size, so use strnlen not strlen.
+ */
+ utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
utf8char = xmlGetUTF8Char(p + 5, &utf8len);
if (PG_XMLISNAMECHAR(utf8char))
goto finished;
return false;
}
+/*
+ * Test whether an input that is to be parsed as CONTENT contains a DTD.
+ *
+ * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
+ * satisfied by a document with a DTD, which is a bit of a wart, as it means
+ * the CONTENT type is not a proper superset of DOCUMENT. SQL/XML:2006 and
+ * later fix that, by redefining content with reference to the "more
+ * permissive" Document Node of the XQuery/XPath Data Model, such that any
+ * DOCUMENT value is indeed also a CONTENT value. That definition is more
+ * useful, as CONTENT becomes usable for parsing input of unknown form (think
+ * pg_restore).
+ *
+ * As used below in parse_xml when parsing for CONTENT, libxml does not give
+ * us the 2006+ behavior, but only the 2003; it will choke if the input has
+ * a DTD. But we can provide the 2006+ definition of CONTENT easily enough,
+ * by detecting this case first and simply doing the parse as DOCUMENT.
+ *
+ * A DTD can be found arbitrarily far in, but that would be a contrived case;
+ * it will ordinarily start within a few dozen characters. The only things
+ * that can precede it are an XMLDecl (here, the caller will have called
+ * parse_xml_decl already), whitespace, comments, and processing instructions.
+ * This function need only return true if it sees a valid sequence of such
+ * things leading to <!DOCTYPE. It can simply return false in any other
+ * cases, including malformed input; that will mean the input gets parsed as
+ * CONTENT as originally planned, with libxml reporting any errors.
+ *
+ * This is only to be called from xml_parse, when pg_xml_init has already
+ * been called. The input is already in UTF8 encoding.
+ */
+static bool
+xml_doctype_in_content(const xmlChar *str)
+{
+ const xmlChar *p = str;
+
+ for (;;)
+ {
+ const xmlChar *e;
+
+ SKIP_XML_SPACE(p);
+ if (*p != '<')
+ return false;
+ p++;
+
+ if (*p == '!')
+ {
+ p++;
+
+ /* if we see <!DOCTYPE, we can return true */
+ if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0)
+ return true;
+
+ /* otherwise, if it's not a comment, fail */
+ if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0)
+ return false;
+ /* find end of comment: find -- and a > must follow */
+ p = xmlStrstr(p + 2, (xmlChar *) "--");
+ if (!p || p[2] != '>')
+ return false;
+ /* advance over comment, and keep scanning */
+ p += 3;
+ continue;
+ }
+
+ /* otherwise, if it's not a PI <?target something?>, fail */
+ if (*p != '?')
+ return false;
+ p++;
+
+ /* find end of PI (the string ?> is forbidden within a PI) */
+ e = xmlStrstr(p, (xmlChar *) "?>");
+ if (!e)
+ return false;
+
+ /* we don't check PIs carefully, but do reject "xml" target */
+ if (e - p >= 3 && xmlStrncasecmp(p, (xmlChar *) "xml", 3) == 0)
+ return false;
+
+ /* advance over PI, keep scanning */
+ p = e + 2;
+ }
+}
+
/*
* Convert a C string to XML internal representation
/* Use a TRY block to ensure we clean up correctly */
PG_TRY();
{
+ bool parse_as_document = false;
+ int res_code;
+ size_t count = 0;
+ xmlChar *version = NULL;
+ int standalone = 0;
+
xmlInitParser();
ctxt = xmlNewParserCtxt();
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate parser context");
+ /* Decide whether to parse as document or content */
if (xmloption_arg == XMLOPTION_DOCUMENT)
+ parse_as_document = true;
+ else
+ {
+ /* Parse and skip over the XML declaration, if any */
+ res_code = parse_xml_decl(utf8string,
+ &count, &version, NULL, &standalone);
+ if (res_code != 0)
+ xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
+ "invalid XML content: invalid XML declaration",
+ res_code);
+
+ /* Is there a DOCTYPE element? */
+ if (xml_doctype_in_content(utf8string + count))
+ parse_as_document = true;
+ }
+
+ if (parse_as_document)
{
/*
* Note, that here we try to apply DTD defaults
XML_PARSE_NOENT | XML_PARSE_DTDATTR
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
if (doc == NULL || xmlerrcxt->err_occurred)
- xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
- "invalid XML document");
+ {
+ /* Use original option to decide which error code to throw */
+ if (xmloption_arg == XMLOPTION_DOCUMENT)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
+ "invalid XML document");
+ else
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
+ "invalid XML content");
+ }
}
else
{
- int res_code;
- size_t count;
- xmlChar *version;
- int standalone;
-
- res_code = parse_xml_decl(utf8string,
- &count, &version, NULL, &standalone);
- if (res_code != 0)
- xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
- "invalid XML content: invalid XML declaration",
- res_code);
-
doc = xmlNewDoc(version);
Assert(doc->encoding == NULL);
doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
DETAIL: line 1: Start tag expected, '<' not found
bad
^
+SELECT xml '<!DOCTYPE a><a/><b/>';
+ERROR: invalid XML document
+LINE 1: SELECT xml '<!DOCTYPE a><a/><b/>';
+ ^
+DETAIL: line 1: Extra content at the end of the document
+<!DOCTYPE a><a/><b/>
+ ^
SET XML OPTION CONTENT;
EXECUTE foo ('<bar/>');
xmlconcat
<foo/>good
(1 row)
+SELECT xml '<!-- in SQL:2006+ a doc is content too--> <?y z?> <!DOCTYPE a><a/>';
+ xml
+--------------------------------------------------------------------
+ <!-- in SQL:2006+ a doc is content too--> <?y z?> <!DOCTYPE a><a/>
+(1 row)
+
+SELECT xml '<?xml version="1.0"?> <!-- hi--> <!DOCTYPE a><a/>';
+ xml
+------------------------------
+ <!-- hi--> <!DOCTYPE a><a/>
+(1 row)
+
+SELECT xml '<!DOCTYPE a><a/>';
+ xml
+------------------
+ <!DOCTYPE a><a/>
+(1 row)
+
+SELECT xml '<!-- hi--> oops <!DOCTYPE a><a/>';
+ERROR: invalid XML content
+LINE 1: SELECT xml '<!-- hi--> oops <!DOCTYPE a><a/>';
+ ^
+DETAIL: line 1: StartTag: invalid element name
+<!-- hi--> oops <!DOCTYPE a><a/>
+ ^
+SELECT xml '<!-- hi--> <oops/> <!DOCTYPE a><a/>';
+ERROR: invalid XML content
+LINE 1: SELECT xml '<!-- hi--> <oops/> <!DOCTYPE a><a/>';
+ ^
+DETAIL: line 1: StartTag: invalid element name
+<!-- hi--> <oops/> <!DOCTYPE a><a/>
+ ^
+SELECT xml '<!DOCTYPE a><a/><b/>';
+ERROR: invalid XML content
+LINE 1: SELECT xml '<!DOCTYPE a><a/><b/>';
+ ^
+DETAIL: line 1: Extra content at the end of the document
+<!DOCTYPE a><a/><b/>
+ ^
-- Test backwards parsing
CREATE VIEW xmlview1 AS SELECT xmlcomment('test');
CREATE VIEW xmlview2 AS SELECT xmlconcat('hello', 'you');
ERROR: prepared statement "foo" does not exist
EXECUTE foo ('bad');
ERROR: prepared statement "foo" does not exist
+SELECT xml '<!DOCTYPE a><a/><b/>';
+ERROR: unsupported XML feature
+LINE 1: SELECT xml '<!DOCTYPE a><a/><b/>';
+ ^
+DETAIL: This functionality requires the server to be built with libxml support.
+HINT: You need to rebuild PostgreSQL using --with-libxml.
SET XML OPTION CONTENT;
EXECUTE foo ('<bar/>');
ERROR: prepared statement "foo" does not exist
EXECUTE foo ('good');
ERROR: prepared statement "foo" does not exist
+SELECT xml '<!-- in SQL:2006+ a doc is content too--> <?y z?> <!DOCTYPE a><a/>';
+ERROR: unsupported XML feature
+LINE 1: SELECT xml '<!-- in SQL:2006+ a doc is content too--> <?y z?...
+ ^
+DETAIL: This functionality requires the server to be built with libxml support.
+HINT: You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml '<?xml version="1.0"?> <!-- hi--> <!DOCTYPE a><a/>';
+ERROR: unsupported XML feature
+LINE 1: SELECT xml '<?xml version="1.0"?> <!-- hi--> <!DOCTYPE a><a/...
+ ^
+DETAIL: This functionality requires the server to be built with libxml support.
+HINT: You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml '<!DOCTYPE a><a/>';
+ERROR: unsupported XML feature
+LINE 1: SELECT xml '<!DOCTYPE a><a/>';
+ ^
+DETAIL: This functionality requires the server to be built with libxml support.
+HINT: You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml '<!-- hi--> oops <!DOCTYPE a><a/>';
+ERROR: unsupported XML feature
+LINE 1: SELECT xml '<!-- hi--> oops <!DOCTYPE a><a/>';
+ ^
+DETAIL: This functionality requires the server to be built with libxml support.
+HINT: You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml '<!-- hi--> <oops/> <!DOCTYPE a><a/>';
+ERROR: unsupported XML feature
+LINE 1: SELECT xml '<!-- hi--> <oops/> <!DOCTYPE a><a/>';
+ ^
+DETAIL: This functionality requires the server to be built with libxml support.
+HINT: You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml '<!DOCTYPE a><a/><b/>';
+ERROR: unsupported XML feature
+LINE 1: SELECT xml '<!DOCTYPE a><a/><b/>';
+ ^
+DETAIL: This functionality requires the server to be built with libxml support.
+HINT: You need to rebuild PostgreSQL using --with-libxml.
-- Test backwards parsing
CREATE VIEW xmlview1 AS SELECT xmlcomment('test');
CREATE VIEW xmlview2 AS SELECT xmlconcat('hello', 'you');
DETAIL: line 1: Start tag expected, '<' not found
bad
^
+SELECT xml '<!DOCTYPE a><a/><b/>';
+ERROR: invalid XML document
+LINE 1: SELECT xml '<!DOCTYPE a><a/><b/>';
+ ^
+DETAIL: line 1: Extra content at the end of the document
+<!DOCTYPE a><a/><b/>
+ ^
SET XML OPTION CONTENT;
EXECUTE foo ('<bar/>');
xmlconcat
<foo/>good
(1 row)
+SELECT xml '<!-- in SQL:2006+ a doc is content too--> <?y z?> <!DOCTYPE a><a/>';
+ xml
+--------------------------------------------------------------------
+ <!-- in SQL:2006+ a doc is content too--> <?y z?> <!DOCTYPE a><a/>
+(1 row)
+
+SELECT xml '<?xml version="1.0"?> <!-- hi--> <!DOCTYPE a><a/>';
+ xml
+------------------------------
+ <!-- hi--> <!DOCTYPE a><a/>
+(1 row)
+
+SELECT xml '<!DOCTYPE a><a/>';
+ xml
+------------------
+ <!DOCTYPE a><a/>
+(1 row)
+
+SELECT xml '<!-- hi--> oops <!DOCTYPE a><a/>';
+ERROR: invalid XML content
+LINE 1: SELECT xml '<!-- hi--> oops <!DOCTYPE a><a/>';
+ ^
+DETAIL: line 1: StartTag: invalid element name
+<!-- hi--> oops <!DOCTYPE a><a/>
+ ^
+SELECT xml '<!-- hi--> <oops/> <!DOCTYPE a><a/>';
+ERROR: invalid XML content
+LINE 1: SELECT xml '<!-- hi--> <oops/> <!DOCTYPE a><a/>';
+ ^
+DETAIL: line 1: StartTag: invalid element name
+<!-- hi--> <oops/> <!DOCTYPE a><a/>
+ ^
+SELECT xml '<!DOCTYPE a><a/><b/>';
+ERROR: invalid XML content
+LINE 1: SELECT xml '<!DOCTYPE a><a/><b/>';
+ ^
+DETAIL: line 1: Extra content at the end of the document
+<!DOCTYPE a><a/><b/>
+ ^
-- Test backwards parsing
CREATE VIEW xmlview1 AS SELECT xmlcomment('test');
CREATE VIEW xmlview2 AS SELECT xmlconcat('hello', 'you');
SET XML OPTION DOCUMENT;
EXECUTE foo ('<bar/>');
EXECUTE foo ('bad');
+SELECT xml '<!DOCTYPE a><a/><b/>';
SET XML OPTION CONTENT;
EXECUTE foo ('<bar/>');
EXECUTE foo ('good');
+SELECT xml '<!-- in SQL:2006+ a doc is content too--> <?y z?> <!DOCTYPE a><a/>';
+SELECT xml '<?xml version="1.0"?> <!-- hi--> <!DOCTYPE a><a/>';
+SELECT xml '<!DOCTYPE a><a/>';
+SELECT xml '<!-- hi--> oops <!DOCTYPE a><a/>';
+SELECT xml '<!-- hi--> <oops/> <!DOCTYPE a><a/>';
+SELECT xml '<!DOCTYPE a><a/><b/>';
-- Test backwards parsing