]> granicus.if.org Git - postgresql/commitdiff
Fix minor deficiencies in XMLTABLE, xpath(), xmlexists()
authorAlvaro Herrera <alvherre@alvh.no-ip.org>
Thu, 7 Mar 2019 18:21:56 +0000 (15:21 -0300)
committerAlvaro Herrera <alvherre@alvh.no-ip.org>
Thu, 7 Mar 2019 21:16:34 +0000 (18:16 -0300)
Correctly process nodes of more types than previously.  In some cases,
nodes were being ignored (nothing was output); in other cases, trying to
return them resulted in errors about unrecognized nodes.  In yet other
cases, necessary escaping (of XML special characters) was not being
done.  Fix all those (as far as the authors could find) and add
regression tests cases verifying the new behavior.

I (Álvaro) was of two minds about backpatching these changes.  They do
seem bugfixes that would benefit most users of the affected functions;
but on the other hand it would change established behavior in minor
releases, so it seems prudent not to.

Authors: Pavel Stehule, Markus Winand, Chapman Flack
Discussion:
   https://postgr.es/m/CAFj8pRA6J25CtAZ2TuRvxK3gat7-bBUYh0rfE2yM7Hj9GD14Dg@mail.gmail.com
   https://postgr.es/m/8BDB0627-2105-4564-AA76-7849F028B96E@winand.at

The elephant in the room as pointed out by Chapman Flack, not fixed in
this commit, is that we still have XMLTABLE operating on XPath 1.0
instead of the standard-mandated XQuery (or even its subset XPath 2.0).
Fixing that is a major undertaking, however.

doc/src/sgml/func.sgml
src/backend/utils/adt/xml.c
src/test/regress/expected/xml.out
src/test/regress/expected/xml_1.out
src/test/regress/expected/xml_2.out
src/test/regress/sql/xml.sql

index 7e14478db7d150afa45061e7fd0b078029b70d38..03859a78eaa35c71efb2cfd532173d651d4479fe 100644 (file)
@@ -11010,9 +11010,9 @@ $$ AS data;
 
 SELECT xmltable.*
   FROM xmlelements, XMLTABLE('/root' PASSING data COLUMNS element text);
-       element        
-----------------------
-   Hello2a2   bbbCC  
+         element         
+-------------------------
+   Hello2a2   bbbxxxCC  
 ]]></screen>
     </para>
 
index f36aab899a4fa144b248b12b398f32b227da2335..28b3eaaa2010aa7a0afe4b80385b57c6bae1ed34 100644 (file)
@@ -1176,6 +1176,36 @@ pg_xmlCharStrndup(const char *str, size_t len)
        return result;
 }
 
+/*
+ * Copy xmlChar string to PostgreSQL-owned memory, freeing the input.
+ *
+ * The input xmlChar is freed regardless of success of the copy.
+ */
+static char *
+xml_pstrdup_and_free(xmlChar *str)
+{
+       char       *result;
+
+       if (str)
+       {
+               PG_TRY();
+               {
+                       result = pstrdup((char *) str);
+               }
+               PG_CATCH();
+               {
+                       xmlFree(str);
+                       PG_RE_THROW();
+               }
+               PG_END_TRY();
+               xmlFree(str);
+       }
+       else
+               result = NULL;
+
+       return result;
+}
+
 /*
  * str is the null-terminated input string.  Remaining arguments are
  * output arguments; each can be NULL if value is not wanted.
@@ -3678,15 +3708,17 @@ SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
 #ifdef USE_LIBXML
 
 /*
- * Convert XML node to text (dump subtree in case of element,
- * return value otherwise)
+ * Convert XML node to text.
+ *
+ * For attribute and text nodes, return the escaped text.  For anything else,
+ * dump the whole subtree.
  */
 static text *
 xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
 {
        xmltype    *result;
 
-       if (cur->type == XML_ELEMENT_NODE)
+       if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE)
        {
                xmlBufferPtr buf;
                xmlNodePtr      cur_copy;
@@ -4479,9 +4511,9 @@ XmlTableGetValue(TableFuncScanState *state, int colnum,
                /*
                 * There are four possible cases, depending on the number of nodes
                 * returned by the XPath expression and the type of the target column:
-                * a) XPath returns no nodes.  b) One node is returned, and column is
-                * of type XML.  c) One node, column type other than XML.  d) Multiple
-                * nodes are returned.
+                * a) XPath returns no nodes.  b) The target type is XML (return all
+                * as XML).  For non-XML return types:  c) One node (return content).
+                * d) Multiple nodes (error).
                 */
                if (xpathobj->type == XPATH_NODESET)
                {
@@ -4494,84 +4526,69 @@ XmlTableGetValue(TableFuncScanState *state, int colnum,
                        {
                                *isnull = true;
                        }
-                       else if (count == 1 && typid == XMLOID)
-                       {
-                               text       *textstr;
-
-                               /* simple case, result is one value */
-                               textstr = xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[0],
-                                                                                          xtCxt->xmlerrcxt);
-                               cstr = text_to_cstring(textstr);
-                       }
-                       else if (count == 1)
+                       else
                        {
-                               xmlChar    *str;
-                               xmlNodePtr      node;
-
-                               /*
-                                * Most nodes (elements and even attributes) store their data
-                                * in children nodes. If they don't have children nodes, it
-                                * means that they are empty (e.g. <element/>). Text nodes and
-                                * CDATA sections are an exception: they don't have children
-                                * but have content in the Text/CDATA node itself.
-                                */
-                               node = xpathobj->nodesetval->nodeTab[0];
-                               if (node->type != XML_CDATA_SECTION_NODE &&
-                                       node->type != XML_TEXT_NODE)
-                                       node = node->xmlChildrenNode;
-
-                               str = xmlNodeListGetString(xtCxt->doc, node, 1);
-                               if (str != NULL)
+                               if (typid == XMLOID)
                                {
-                                       PG_TRY();
-                                       {
-                                               cstr = pstrdup((char *) str);
-                                       }
-                                       PG_CATCH();
+                                       text       *textstr;
+                                       StringInfoData str;
+
+                                       /* Concatenate serialized values */
+                                       initStringInfo(&str);
+                                       for (int i = 0; i < count; i++)
                                        {
-                                               xmlFree(str);
-                                               PG_RE_THROW();
+                                               textstr =
+                                                       xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
+                                                                                                xtCxt->xmlerrcxt);
+
+                                               appendStringInfoText(&str, textstr);
                                        }
-                                       PG_END_TRY();
-                                       xmlFree(str);
+                                       cstr = str.data;
                                }
                                else
                                {
-                                       /* Ensure mapping of empty tags to PostgreSQL values. */
-                                       cstr = "";
+                                       xmlChar    *str;
+
+                                       if (count > 1)
+                                               ereport(ERROR,
+                                                               (errcode(ERRCODE_CARDINALITY_VIOLATION),
+                                                                errmsg("more than one value returned by column XPath expression")));
+
+                                       str = xmlXPathCastNodeSetToString(xpathobj->nodesetval);
+                                       cstr = str ? xml_pstrdup_and_free(str) : "";
                                }
                        }
+               }
+               else if (xpathobj->type == XPATH_STRING)
+               {
+                       /* Content should be escaped when target will be XML */
+                       if (typid == XMLOID)
+                               cstr = escape_xml((char *) xpathobj->stringval);
                        else
-                       {
-                               StringInfoData str;
-                               int                     i;
+                               cstr = (char *) xpathobj->stringval;
+               }
+               else if (xpathobj->type == XPATH_BOOLEAN)
+               {
+                       char            typcategory;
+                       bool            typispreferred;
+                       xmlChar    *str;
 
-                               Assert(count > 1);
+                       /* Allow implicit casting from boolean to numbers */
+                       get_type_category_preferred(typid, &typcategory, &typispreferred);
 
-                               /*
-                                * When evaluating the XPath expression returns multiple
-                                * nodes, the result is the concatenation of them all. The
-                                * target type must be XML.
-                                */
-                               if (typid != XMLOID)
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_CARDINALITY_VIOLATION),
-                                                        errmsg("more than one value returned by column XPath expression")));
+                       if (typcategory != TYPCATEGORY_NUMERIC)
+                               str = xmlXPathCastBooleanToString(xpathobj->boolval);
+                       else
+                               str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval));
 
-                               /* Concatenate serialized values */
-                               initStringInfo(&str);
-                               for (i = 0; i < count; i++)
-                               {
-                                       appendStringInfoText(&str,
-                                                                                xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
-                                                                                                                         xtCxt->xmlerrcxt));
-                               }
-                               cstr = str.data;
-                       }
+                       cstr = xml_pstrdup_and_free(str);
                }
-               else if (xpathobj->type == XPATH_STRING)
+               else if (xpathobj->type == XPATH_NUMBER)
                {
-                       cstr = (char *) xpathobj->stringval;
+                       xmlChar    *str;
+
+                       str = xmlXPathCastNumberToString(xpathobj->floatval);
+                       cstr = xml_pstrdup_and_free(str);
                }
                else
                        elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
index 6e1f885112ca0ac4b8e2fa3564727bea07cb21d4..2085fa039110d978500182f9435307d204acdd90 100644 (file)
@@ -1210,9 +1210,9 @@ SELECT xmltable.* FROM xmldata, LATERAL xmltable('/ROWS/ROW[COUNTRY_NAME="Japan"
 (2 rows)
 
 SELECT * FROM xmltable('/root' passing '<root><element>a1a<!-- aaaa -->a2a<?aaaaa?> <!--z-->  bbbb<x>xxx</x>cccc</element></root>' COLUMNS element text);
-      element      
--------------------
- a1aa2a   bbbbcccc
+       element        
+----------------------
+ a1aa2a   bbbbxxxcccc
 (1 row)
 
 SELECT * FROM xmltable('/root' passing '<root><element>a1a<!-- aaaa -->a2a<?aaaaa?> <!--z-->  bbbb<x>xxx</x>cccc</element></root>' COLUMNS element text PATH 'element/text()'); -- should fail
@@ -1493,3 +1493,24 @@ SELECT xmltable.* FROM xmltest2, LATERAL xmltable(('/d/r/' || lower(_path) || 'c
  14
 (4 rows)
 
+-- XPath result can be boolean or number too
+SELECT * FROM XMLTABLE('*' PASSING '<a>a</a>' COLUMNS a xml PATH '.', b text PATH '.', c text PATH '"hi"', d boolean PATH '. = "a"', e integer PATH 'string-length(.)');
+    a     | b | c  | d | e 
+----------+---+----+---+---
+ <a>a</a> | a | hi | t | 1
+(1 row)
+
+\x
+SELECT * FROM XMLTABLE('*' PASSING '<e>pre<!--c1--><?pi arg?><![CDATA[&ent1]]><n2>&amp;deep</n2>post</e>' COLUMNS x xml PATH 'node()', y xml PATH '/');
+-[ RECORD 1 ]-----------------------------------------------------------
+x | pre<!--c1--><?pi arg?><![CDATA[&ent1]]><n2>&amp;deep</n2>post
+y | <e>pre<!--c1--><?pi arg?><![CDATA[&ent1]]><n2>&amp;deep</n2>post</e>+
+  | 
+
+\x
+SELECT * FROM XMLTABLE('.' PASSING XMLELEMENT(NAME a) columns a varchar(20) PATH '"<foo/>"', b xml PATH '"<foo/>"');
+   a    |      b       
+--------+--------------
+ <foo/> | &lt;foo/&gt;
+(1 row)
+
index 0eba424346285a3ade2a7f816740812889953edb..7f8669697344675ed9d51e0dc11799a5772fec91 100644 (file)
@@ -1343,3 +1343,22 @@ SELECT xmltable.* FROM xmltest2, LATERAL xmltable(('/d/r/' || lower(_path) || 'c
 ---
 (0 rows)
 
+-- XPath result can be boolean or number too
+SELECT * FROM XMLTABLE('*' PASSING '<a>a</a>' COLUMNS a xml PATH '.', b text PATH '.', c text PATH '"hi"', d boolean PATH '. = "a"', e integer PATH 'string-length(.)');
+ERROR:  unsupported XML feature
+LINE 1: SELECT * FROM XMLTABLE('*' PASSING '<a>a</a>' COLUMNS a xml ...
+                                           ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+\x
+SELECT * FROM XMLTABLE('*' PASSING '<e>pre<!--c1--><?pi arg?><![CDATA[&ent1]]><n2>&amp;deep</n2>post</e>' COLUMNS x xml PATH 'node()', y xml PATH '/');
+ERROR:  unsupported XML feature
+LINE 1: SELECT * FROM XMLTABLE('*' PASSING '<e>pre<!--c1--><?pi arg?...
+                                           ^
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
+\x
+SELECT * FROM XMLTABLE('.' PASSING XMLELEMENT(NAME a) columns a varchar(20) PATH '"<foo/>"', b xml PATH '"<foo/>"');
+ERROR:  unsupported XML feature
+DETAIL:  This functionality requires the server to be built with libxml support.
+HINT:  You need to rebuild PostgreSQL using --with-libxml.
index 3ec56e4c54b960c590e2651c1484cf9adda35724..510b09bd03fe1ad1f25e184bb7b39c8e9bb5c75e 100644 (file)
@@ -1190,9 +1190,9 @@ SELECT xmltable.* FROM xmldata, LATERAL xmltable('/ROWS/ROW[COUNTRY_NAME="Japan"
 (2 rows)
 
 SELECT * FROM xmltable('/root' passing '<root><element>a1a<!-- aaaa -->a2a<?aaaaa?> <!--z-->  bbbb<x>xxx</x>cccc</element></root>' COLUMNS element text);
-      element      
--------------------
- a1aa2a   bbbbcccc
+       element        
+----------------------
+ a1aa2a   bbbbxxxcccc
 (1 row)
 
 SELECT * FROM xmltable('/root' passing '<root><element>a1a<!-- aaaa -->a2a<?aaaaa?> <!--z-->  bbbb<x>xxx</x>cccc</element></root>' COLUMNS element text PATH 'element/text()'); -- should fail
@@ -1473,3 +1473,24 @@ SELECT xmltable.* FROM xmltest2, LATERAL xmltable(('/d/r/' || lower(_path) || 'c
  14
 (4 rows)
 
+-- XPath result can be boolean or number too
+SELECT * FROM XMLTABLE('*' PASSING '<a>a</a>' COLUMNS a xml PATH '.', b text PATH '.', c text PATH '"hi"', d boolean PATH '. = "a"', e integer PATH 'string-length(.)');
+    a     | b | c  | d | e 
+----------+---+----+---+---
+ <a>a</a> | a | hi | t | 1
+(1 row)
+
+\x
+SELECT * FROM XMLTABLE('*' PASSING '<e>pre<!--c1--><?pi arg?><![CDATA[&ent1]]><n2>&amp;deep</n2>post</e>' COLUMNS x xml PATH 'node()', y xml PATH '/');
+-[ RECORD 1 ]-----------------------------------------------------------
+x | pre<!--c1--><?pi arg?><![CDATA[&ent1]]><n2>&amp;deep</n2>post
+y | <e>pre<!--c1--><?pi arg?><![CDATA[&ent1]]><n2>&amp;deep</n2>post</e>+
+  | 
+
+\x
+SELECT * FROM XMLTABLE('.' PASSING XMLELEMENT(NAME a) columns a varchar(20) PATH '"<foo/>"', b xml PATH '"<foo/>"');
+   a    |      b       
+--------+--------------
+ <foo/> | &lt;foo/&gt;
+(1 row)
+
index 3b91b56d5a9800bc4ab5467aded03127e9f2bf1a..8057a461788b1ee4218572ca86e5674def8ae4a0 100644 (file)
@@ -595,3 +595,11 @@ INSERT INTO xmltest2 VALUES('<d><r><dc>2</dc></r></d>', 'D');
 SELECT xmltable.* FROM xmltest2, LATERAL xmltable('/d/r' PASSING x COLUMNS a int PATH '' || lower(_path) || 'c');
 SELECT xmltable.* FROM xmltest2, LATERAL xmltable(('/d/r/' || lower(_path) || 'c') PASSING x COLUMNS a int PATH '.');
 SELECT xmltable.* FROM xmltest2, LATERAL xmltable(('/d/r/' || lower(_path) || 'c') PASSING x COLUMNS a int PATH 'x' DEFAULT ascii(_path) - 54);
+
+-- XPath result can be boolean or number too
+SELECT * FROM XMLTABLE('*' PASSING '<a>a</a>' COLUMNS a xml PATH '.', b text PATH '.', c text PATH '"hi"', d boolean PATH '. = "a"', e integer PATH 'string-length(.)');
+\x
+SELECT * FROM XMLTABLE('*' PASSING '<e>pre<!--c1--><?pi arg?><![CDATA[&ent1]]><n2>&amp;deep</n2>post</e>' COLUMNS x xml PATH 'node()', y xml PATH '/');
+\x
+
+SELECT * FROM XMLTABLE('.' PASSING XMLELEMENT(NAME a) columns a varchar(20) PATH '"<foo/>"', b xml PATH '"<foo/>"');