]> granicus.if.org Git - postgresql/commitdiff
Add xmlpath() to evaluate XPath expressions, with namespaces support.
authorBruce Momjian <bruce@momjian.us>
Thu, 22 Mar 2007 20:14:58 +0000 (20:14 +0000)
committerBruce Momjian <bruce@momjian.us>
Thu, 22 Mar 2007 20:14:58 +0000 (20:14 +0000)
Nikolay Samokhvalov

src/backend/utils/adt/xml.c
src/include/catalog/pg_proc.h
src/include/utils/xml.h
src/test/regress/expected/xml.out
src/test/regress/expected/xml_1.out
src/test/regress/sql/xml.sql

index fa82837fd0cce6a64042decd83152e6d42e972e4..00f661df9d4fae699690888186fa10eec9e91386 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.35 2007/03/15 23:12:06 tgl Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.36 2007/03/22 20:14:58 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -47,6 +47,8 @@
 #include <libxml/uri.h>
 #include <libxml/xmlerror.h>
 #include <libxml/xmlwriter.h>
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
 #endif /* USE_LIBXML */
 
 #include "catalog/namespace.h"
@@ -67,6 +69,7 @@
 #include "utils/datetime.h"
 #include "utils/lsyscache.h"
 #include "utils/memutils.h"
+#include "access/tupmacs.h"
 #include "utils/xml.h"
 
 
@@ -88,6 +91,7 @@ static xmlChar *xml_text2xmlChar(text *in);
 static int             parse_xml_decl(const xmlChar *str, size_t *lenp, xmlChar **version, xmlChar **encoding, int *standalone);
 static bool            print_xml_decl(StringInfo buf, const xmlChar *version, pg_enc encoding, int standalone);
 static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, xmlChar *encoding);
+static text            *xml_xmlnodetotext(xmlNodePtr cur);
 
 #endif /* USE_LIBXML */
 
@@ -1463,7 +1467,6 @@ map_xml_name_to_sql_identifier(char *name)
        return buf.data;
 }
 
-
 /*
  * Map SQL value to XML value; see SQL/XML:2003 section 9.16.
  */
@@ -2403,3 +2406,247 @@ SPI_sql_row_to_xmlelement(int rownum, StringInfo result, char *tablename, bool n
        else
                appendStringInfoString(result, "</row>\n\n");
 }
+
+
+/*
+ * XPath related functions
+ */
+
+#ifdef USE_LIBXML
+/* 
+ * Convert XML node to text (return value only, it's not dumping)
+ */
+text *
+xml_xmlnodetotext(xmlNodePtr cur)
+{
+       xmlChar                 *str;
+       text                    *result;
+       size_t                  len;    
+       
+       str = xmlXPathCastNodeToString(cur);
+       len = strlen((char *) str);
+       result = (text *) palloc(len + VARHDRSZ);
+       SET_VARSIZE(result, len + VARHDRSZ);
+       memcpy(VARDATA(result), str, len);
+       
+       return result;
+}
+#endif
+
+/*
+ * Evaluate XPath expression and return array of XML values.
+ * As we have no support of XQuery sequences yet, this functions seems
+ * to be the most useful one (array of XML functions plays a role of
+ * some kind of substritution for XQuery sequences).
+
+ * Workaround here: we parse XML data in different way to allow XPath for
+ * fragments (see "XPath for fragment" TODO comment inside).
+ */
+Datum
+xmlpath(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+       ArrayBuildState         *astate = NULL;
+       xmlParserCtxtPtr        ctxt = NULL;
+       xmlDocPtr                       doc = NULL;
+       xmlXPathContextPtr      xpathctx = NULL;
+       xmlXPathCompExprPtr     xpathcomp = NULL;
+       xmlXPathObjectPtr       xpathobj = NULL;
+       int32                           len, xpath_len;
+       xmlChar                         *string, *xpath_expr;
+       bool                            res_is_null = FALSE;
+       int                                     i;
+       xmltype                         *data;
+       text                            *xpath_expr_text;
+       ArrayType                       *namespaces;
+       int                                     *dims, ndims, ns_count = 0, bitmask = 1;
+       char                            *ptr;
+       bits8                           *bitmap;
+       char                            **ns_names = NULL, **ns_uris = NULL;
+       int16                           typlen;
+       bool                            typbyval;
+       char                            typalign;
+       
+       /* the function is not strict, we must check first two args */
+       if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
+               PG_RETURN_NULL();
+       
+       xpath_expr_text = PG_GETARG_TEXT_P(0);
+       data  = PG_GETARG_XML_P(1);
+       
+       /* Namespace mappings passed as text[].
+        * Assume that 2-dimensional array has been passed, 
+        * the 1st subarray is array of names, the 2nd -- array of URIs,
+        * example: ARRAY[ARRAY['myns', 'myns2'], ARRAY['http://example.com', 'http://example2.com']]. 
+        */
+       if (!PG_ARGISNULL(2))
+       {
+               namespaces = PG_GETARG_ARRAYTYPE_P(2);
+               ndims = ARR_NDIM(namespaces);
+               dims = ARR_DIMS(namespaces);
+               
+               /* Sanity check */
+               if (ndims != 2)
+                       ereport(ERROR, (errmsg("invalid array passed for namespace mappings"),
+                                                       errdetail("Only 2-dimensional array may be used for namespace mappings.")));
+               
+               Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
+               
+               ns_count = ArrayGetNItems(ndims, dims) / 2;
+               get_typlenbyvalalign(ARR_ELEMTYPE(namespaces),
+                                                        &typlen, &typbyval, &typalign);
+               ns_names = (char **) palloc(ns_count * sizeof(char *));
+               ns_uris = (char **) palloc(ns_count * sizeof(char *));
+               ptr = ARR_DATA_PTR(namespaces);
+               bitmap = ARR_NULLBITMAP(namespaces);
+               bitmask = 1;
+               
+               for (i = 0; i < ns_count * 2; i++)
+               {
+                       if (bitmap && (*bitmap & bitmask) == 0)
+                               ereport(ERROR, (errmsg("neither namespace nor URI may be NULL"))); /* TODO: better message */
+                       else
+                       {
+                               if (i < ns_count)
+                                       ns_names[i] = DatumGetCString(DirectFunctionCall1(textout,
+                                                                                                                 PointerGetDatum(ptr)));
+                               else
+                                       ns_uris[i - ns_count] = DatumGetCString(DirectFunctionCall1(textout,
+                                                                                                                 PointerGetDatum(ptr)));
+                               ptr = att_addlength(ptr, typlen, PointerGetDatum(ptr));
+                               ptr = (char *) att_align(ptr, typalign);
+                       }
+       
+                       /* advance bitmap pointer if any */
+                       if (bitmap)
+                       {
+                               bitmask <<= 1;
+                               if (bitmask == 0x100)
+                               {
+                                       bitmap++;
+                                       bitmask = 1;
+                               }
+                       }
+               }
+       }
+       
+       len = VARSIZE(data) - VARHDRSZ;
+       xpath_len = VARSIZE(xpath_expr_text) - VARHDRSZ;
+       if (xpath_len == 0)
+               ereport(ERROR, (errmsg("empty XPath expression")));
+       
+       if (xmlStrncmp((xmlChar *) VARDATA(data), (xmlChar *) "<?xml", 5) == 0)
+       {
+               string = palloc(len + 1);
+               memcpy(string, VARDATA(data), len);
+               string[len] = '\0';
+               xpath_expr = palloc(xpath_len + 1);
+               memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
+               xpath_expr[xpath_len] = '\0';
+       }
+       else
+       {
+               /* use "<x>...</x>" as dummy root element to enable XPath for fragments */
+               /* TODO: (XPath for fragment) find better solution to work with XML fragment! */
+               string = xmlStrncatNew((xmlChar *) "<x>", (xmlChar *) VARDATA(data), len);
+               string = xmlStrncat(string, (xmlChar *) "</x>", 5);
+               len += 7;
+               xpath_expr = xmlStrncatNew((xmlChar *) "/x", (xmlChar *) VARDATA(xpath_expr_text), xpath_len);
+               len += 2;
+       }
+       
+       xml_init();
+
+       PG_TRY();
+       {
+               /* redundant XML parsing (two parsings for the same value in the same session are possible) */
+               ctxt = xmlNewParserCtxt();
+               if (ctxt == NULL)
+                       xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+                                               "could not allocate parser context");
+               doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
+               if (doc == NULL)
+                       xml_ereport(ERROR, ERRCODE_INVALID_XML_DOCUMENT,
+                                               "could not parse XML data");
+               xpathctx = xmlXPathNewContext(doc);
+               if (xpathctx == NULL)
+                       xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+                                               "could not allocate XPath context");
+               xpathctx->node = xmlDocGetRootElement(doc);
+               if (xpathctx->node == NULL)
+                       xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+                                               "could not find root XML element"); 
+
+               /* register namespaces, if any */
+               if ((ns_count > 0) && ns_names && ns_uris)
+                       for (i = 0; i < ns_count; i++)
+                               if (0 != xmlXPathRegisterNs(xpathctx, (xmlChar *) ns_names[i], (xmlChar *) ns_uris[i]))
+                                       ereport(ERROR, 
+                                               (errmsg("could not register XML namespace with prefix=\"%s\" and href=\"%s\"", ns_names[i], ns_uris[i])));
+               
+               xpathcomp = xmlXPathCompile(xpath_expr);
+               if (xpathcomp == NULL)
+                       xml_ereport(ERROR, ERRCODE_INTERNAL_ERROR,
+                                               "invalid XPath expression"); /* TODO: show proper XPath error details */
+               
+               xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
+               xmlXPathFreeCompExpr(xpathcomp);
+               if (xpathobj == NULL)
+                       ereport(ERROR, (errmsg("could not create XPath object")));
+               
+               if (xpathobj->nodesetval == NULL)
+                       res_is_null = TRUE;
+               
+               if (!res_is_null && xpathobj->nodesetval->nodeNr == 0)
+                       /* TODO maybe empty array should be here, not NULL? (if so -- fix segfault) */
+                       /*PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));*/
+                       res_is_null = TRUE;
+               
+               if (!res_is_null) 
+                       for (i = 0; i < xpathobj->nodesetval->nodeNr; i++)
+                       {
+                               Datum           elem;
+                               bool            elemisnull = false;
+                               elem = PointerGetDatum(xml_xmlnodetotext(xpathobj->nodesetval->nodeTab[i]));
+                               astate = accumArrayResult(astate, elem,
+                                                                                 elemisnull, XMLOID,
+                                                                                 CurrentMemoryContext);
+                       }
+               
+               xmlXPathFreeObject(xpathobj);
+               xmlXPathFreeContext(xpathctx);
+               xmlFreeParserCtxt(ctxt);
+               xmlFreeDoc(doc);
+               xmlCleanupParser();
+       }
+       PG_CATCH();
+       {
+               if (xpathcomp)
+                       xmlXPathFreeCompExpr(xpathcomp);
+               if (xpathobj)
+                       xmlXPathFreeObject(xpathobj);
+               if (xpathctx)
+                       xmlXPathFreeContext(xpathctx);
+               if (doc)
+                       xmlFreeDoc(doc);
+               if (ctxt)
+                       xmlFreeParserCtxt(ctxt);
+               xmlCleanupParser();
+
+               PG_RE_THROW();
+       }
+       PG_END_TRY();
+       
+       if (res_is_null)
+       {
+               PG_RETURN_NULL();
+       }
+       else
+       {
+               PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
+       }
+#else
+       NO_XML_SUPPORT();
+       return 0;
+#endif
+}
index 8007129a1bbec02fb401a0ef0b98fe1d967ab661..0a096972edcdff7e12b569a5ff2fe4a91b39f07a 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.449 2007/03/20 05:45:00 neilc Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.450 2007/03/22 20:14:58 momjian Exp $
  *
  * NOTES
  *       The script catalog/genbki.sh reads this file and generates .bki
@@ -4095,6 +4095,10 @@ DESCR("map table contents and structure to XML and XML Schema");
 DATA(insert OID = 2930 (  query_to_xml_and_xmlschema  PGNSP PGUID 12 100 0 f f t f s 4 142 "25 16 16 25" _null_ _null_ "{query,nulls,tableforest,targetns}" query_to_xml_and_xmlschema - _null_ ));
 DESCR("map query result and structure to XML and XML Schema");
 
+DATA(insert OID = 2931 (  xmlpath      PGNSP PGUID 12 1 0 f f f f i 3 143 "25 142 1009" _null_ _null_ _null_ xmlpath - _null_ ));
+DESCR("evaluate XPath expression, with namespaces support");
+DATA(insert OID = 2932 (  xmlpath      PGNSP PGUID 14 1 0 f f f f i 2 143 "25 142" _null_ _null_ _null_ "select pg_catalog.xmlpath($1, $2, NULL)" - _null_ ));
+DESCR("evaluate XPath expression");
 
 /* uuid */ 
 DATA(insert OID = 2952 (  uuid_in                 PGNSP PGUID 12 1 0 f f t f i 1 2950 "2275" _null_ _null_ _null_ uuid_in - _null_ ));
index 7cf23b670661b61f4927c33f5175ccedb8f032d5..ef06e41b92bfc8e6070e48b25a68be6fbc3b5ef2 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/xml.h,v 1.16 2007/02/16 07:46:55 petere Exp $
+ * $PostgreSQL: pgsql/src/include/utils/xml.h,v 1.17 2007/03/22 20:14:58 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -36,6 +36,7 @@ extern Datum xmlconcat2(PG_FUNCTION_ARGS);
 extern Datum texttoxml(PG_FUNCTION_ARGS);
 extern Datum xmltotext(PG_FUNCTION_ARGS);
 extern Datum xmlvalidate(PG_FUNCTION_ARGS);
+extern Datum xmlpath(PG_FUNCTION_ARGS);
 
 extern Datum table_to_xml(PG_FUNCTION_ARGS);
 extern Datum query_to_xml(PG_FUNCTION_ARGS);
index 97c30600e84ab470e685482c6c3aa2e6d347bc57..189c22113b3bdf32b8a165ec20a07e429c35aded 100644 (file)
@@ -401,3 +401,33 @@ SELECT table_name, view_definition FROM information_schema.views
  xmlview9   | SELECT XMLSERIALIZE(CONTENT 'good'::"xml" AS text) AS "xmlserialize";
 (9 rows)
 
+-- Text XPath expressions evaluation
+SELECT xmlpath('/value', data) FROM xmltest;
+ xmlpath 
+---------
+ {one}
+ {two}
+(2 rows)
+
+SELECT xmlpath(NULL, NULL) IS NULL FROM xmltest;
+ ?column? 
+----------
+ t
+ t
+(2 rows)
+
+SELECT xmlpath('', '<!-- error -->');
+ERROR:  empty XPath expression
+CONTEXT:  SQL function "xmlpath" statement 1
+SELECT xmlpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
+    xmlpath     
+----------------
+ {"number one"}
+(1 row)
+
+SELECT xmlpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc'], ARRAY['http://127.0.0.1']]);
+ xmlpath 
+---------
+ {1,2}
+(1 row)
+
index 95b9d736151283ab52fc4ef7f8c7f8923e0de7cc..3ba6e58d2d8fde59904d25d8f280dda1e1d3113d 100644 (file)
@@ -197,3 +197,18 @@ SELECT table_name, view_definition FROM information_schema.views
  xmlview5   | SELECT XMLPARSE(CONTENT '<abc>x</abc>'::text STRIP WHITESPACE) AS "xmlparse";
 (2 rows)
 
+-- Text XPath expressions evaluation
+SELECT xmlpath('/value', data) FROM xmltest;
+ xmlpath 
+---------
+(0 rows)
+
+SELECT xmlpath(NULL, NULL) IS NULL FROM xmltest;
+ERROR:  no XML support in this installation
+CONTEXT:  SQL function "xmlpath" statement 1
+SELECT xmlpath('', '<!-- error -->');
+ERROR:  no XML support in this installation
+SELECT xmlpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
+ERROR:  no XML support in this installation
+SELECT xmlpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc'], ARRAY['http://127.0.0.1']]);
+ERROR:  no XML support in this installation
index 6ed9d8c47a215a8c71aee6b8d5ca109619011279..32ac15610c74ce3039b0d79888f35f04f82b7867 100644 (file)
@@ -144,3 +144,11 @@ CREATE VIEW xmlview9 AS SELECT xmlserialize(content 'good' as text);
 
 SELECT table_name, view_definition FROM information_schema.views
   WHERE table_name LIKE 'xmlview%' ORDER BY 1;
+
+-- Text XPath expressions evaluation
+
+SELECT xmlpath('/value', data) FROM xmltest;
+SELECT xmlpath(NULL, NULL) IS NULL FROM xmltest;
+SELECT xmlpath('', '<!-- error -->');
+SELECT xmlpath('//text()', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>');
+SELECT xmlpath('//loc:piece/@id', '<local:data xmlns:local="http://127.0.0.1"><local:piece id="1">number one</local:piece><local:piece id="2" /></local:data>', ARRAY[ARRAY['loc'], ARRAY['http://127.0.0.1']]);