]> granicus.if.org Git - postgresql/commitdiff
Thanks to the generous support of Torchbox (http://www.torchbox.com), I
authorBruce Momjian <bruce@momjian.us>
Fri, 5 Mar 2004 03:24:50 +0000 (03:24 +0000)
committerBruce Momjian <bruce@momjian.us>
Fri, 5 Mar 2004 03:24:50 +0000 (03:24 +0000)
have been able to significantly improve the contrib/xml XPath
integration code.

New features:

* XPath set-returning function allows multiple results from an several
XPath queries to be used as a virtual table.
* Using libxslt, XSLT transformations (with and without parameters) are
supported. (Caution: This support allows generic URL fetching from
within the backend as well).

I've removed the old code so that it is all libxml based. Rather than
attach as a patch, I've put the tar.gz (10k!) at
http://www.azuli.co.uk/pgxml-1.0.tar.gz
(all files in archive are xml/....).

I think this is worth replacing the contrib version with, even though
the function names have changed (though the same functionality is
there), because it includes a SRF and some SPI usage, in addition to
linking to an external library. And it isn't a big module! Obviously, I
understand that people might prefer to move it elsewhere, or might have
reservations about replacing an existing contrib module with an
incompatible one. I'm open to suggestions.

John Gray

contrib/xml/Makefile
contrib/xml/README.pgxml
contrib/xml/TODO [deleted file]
contrib/xml/pgxml.c [deleted file]
contrib/xml/pgxml.h [deleted file]
contrib/xml/pgxml.sql.in
contrib/xml/pgxml_dom.c [deleted file]
contrib/xml/pgxml_dom.sql.in [deleted file]
contrib/xml/xpath.c [new file with mode: 0644]
contrib/xml/xslt_proc.c [new file with mode: 0644]

index 52f176c649b592d8c3e1d5359392a702428bb046..9177ca865c3e6739f32b0c1b5ddb763ad9ae6762 100644 (file)
@@ -1,13 +1,18 @@
-# $PostgreSQL: pgsql/contrib/xml/Makefile,v 1.4 2003/11/29 19:51:36 pgsql Exp $
-
+# This makefile will build the new XML and XSLT routines.
 subdir = contrib/xml
-top_builddir = ../..
+top_builddir = ../../
 include $(top_builddir)/src/Makefile.global
 
-MODULE_big = pgxml_dom
-OBJS = pgxml_dom.o
-SHLIB_LINK = -lxml2
-DATA_built = pgxml_dom.sql
+MODULE_big = pgxml
+
+# Remove xslt_proc.o from the following line if you don't have libxslt
+OBJS = xpath.o xslt_proc.o
+
+# Remove -lxslt from the following line if you don't have libxslt.
+SHLIB_LINK = -lxml2 -lxslt
+
+DATA_built = pgxml.sql
 DOCS = README.pgxml
 
-include $(top_srcdir)/contrib/contrib-global.mk
+include $(top_builddir)contrib/contrib-global.mk
+
index 6c714f74e120f467f9d07842084da2b02707afac..f29d071722eac00b695aca653d4dfa1a48cdefc1 100644 (file)
-This package contains some simple routines for manipulating XML
-documents stored in PostgreSQL. This is a work-in-progress and
-somewhat basic at the moment (see the file TODO for some outline of
-what remains to be done).
+XML-handling functions for PostgreSQL
+=====================================
 
-At present, two modules (based on different XML handling libraries)
-are provided.
+Development of this module was sponsored by Torchbox Ltd. (www.torchbox.com)
 
-Prerequisite:
+This version of the XML functions provides both XPath querying and
+XSLT functionality. There is also a new table function which allows
+the straightforward return of multiple XML results. Note that the current code
+doesn't take any particular care over character sets - this is
+something that should be fixed at some point!
 
-pgxml.c:
-expat parser 1.95.0 or newer (http://expat.sourceforge.net)
+Installation
+------------
 
-or
+The current build process will only work if the files are in
+contrib/xml in a PostgreSQL 7.3 or 7.4 source tree which has been
+configured and built (If you alter the subdir value in the Makefile
+you can place it in a different directory in a PostgreSQL tree).
 
-pgxml_dom.c:
-libxml2 (http://xmlsoft.org)
+Before you begin, just check the Makefile, and then just 'make' and
+'make install'.
 
-The libxml2 version provides more complete XPath functionality, and
-seems like a good way to go. I've left the old versions in there for
-comparison.
+This code requires libxml to be previously installed.
 
-Compiling and loading:
-----------------------
+Description of functions
+------------------------
 
-The Makefile only builds the libxml2 version.
+The first set of functions are straightforward XML parsing and XPath queries:
 
-To compile, just type make.
+pgxml_parse(document) RETURNS bool
 
-Then you can use psql to load the two function definitions: 
-\i pgxml_dom.sql
+This parses the document text in its parameter and returns true if the
+document is well-formed XML.
 
+xpath_string(document,query) RETURNS text
+xpath_number(document,query) RETURNS float4
+xpath_bool(document,query) RETURNS bool
 
-Function documentation and usage:
----------------------------------
+These functions evaluate the XPath query on the supplied document, and
+cast the result to the specified type.
 
-pgxml_parse(text) returns bool
-  parses the provided text and returns true or false if it is 
-well-formed or not. It returns NULL if the parser couldn't be
-created for any reason.
 
-pgxml_xpath (XQuery functions) - differs between the versions:
+xpath_nodeset(document,query,toptag,itemtag) RETURNS text
 
-pgxml.c (expat version) has:
+This evaluates query on document and wraps the result in XML tags. If
+the result is multivalued, the output will look like:
 
-pgxml_xpath(text doc, text xpath, int n) returns text
-  parses doc and returns the cdata of the nth occurence of
-the "simple path" entry. 
+<toptag>
+<itemtag>Value 1 which could be an XML fragment</itemtag>
+<itemtag>Value 2....</itemtag>
+</toptag>
 
-However, the remainder of this document will cover the pgxml_dom.c version.
+If either toptag or itemtag is an empty string, the relevant tag is omitted.
+There are also wrapper functions for this operation:
 
-pgxml_xpath(text doc, text xpath, text toptag, text septag) returns text
-  evaluates xpath on doc, and returns the result wrapped in
-<toptag>...</toptag> and each result node wrapped in
-<septag></septag>. toptag and septag may be empty strings, in which
-case the respective tag will be omitted.
+xpath_nodeset(document,query) RETURNS text omits both tags.
+xpath_nodeset(document,query,itemtag) RETURNS text omits toptag.
 
-Example:
 
-Given a  table docstore:
+xpath_list(document,query,seperator) RETURNS text
 
- Attribute |  Type   | Modifier 
------------+---------+----------
- docid     | integer | 
- document  | text    | 
+This function returns multiple values seperated by the specified
+seperator, e.g. Value 1,Value 2,Value 3 if seperator=','.
 
-containing documents such as (these are archaeological site
-descriptions, in case anyone is wondering):
+xpath_list(document,query) RETURNS text
 
-<?XML version="1.0"?>
-<site provider="Foundations" sitecode="ak97" version="1">
-   <name>Church Farm, Ashton Keynes</name>
-   <invtype>watching brief</invtype>
-   <location scheme="osgb">SU04209424</location>
-</site>
+This is a wrapper for the above function that uses ',' as the seperator.
 
-one can type:
 
-select docid, 
-pgxml_xpath(document,'//site/name/text()','','') as sitename,
-pgxml_xpath(document,'//site/location/text()','','') as location
- from docstore;
-and get as output:
+xpath_table
+-----------
 
- docid |               sitename               |  location  
--------+--------------------------------------+------------
-     1 | Church Farm, Ashton Keynes           | SU04209424
-     2 | Glebe Farm, Long Itchington          | SP41506500
-     3 | The Bungalow, Thames Lane, Cricklade | SU10229362
-(3 rows)
+This is a table function which evaluates a set of XPath queries on
+each of a set of documents and returns the results as a table. The
+primary key field from the original document table is returned as the
+first column of the result so that the resultset from xpath_table can
+be readily used in joins.
 
-or, to illustrate the use of the extra tags:
+The function itself takes 5 arguments, all text.
 
-select docid as id,
-pgxml_xpath(document,'//find/type/text()','set','findtype') 
-from docstore;
+xpath_table(key,document,relation,xpaths,criteria)
 
- id |                               pgxml_xpath                               
-----+-------------------------------------------------------------------------
-  1 | <set></set>
-  2 | <set><findtype>Urn</findtype></set>
-  3 | <set><findtype>Pottery</findtype><findtype>Animal bone</findtype></set>
-(3 rows)
+key - the name of the "key" field - this is just a field to be used as
+the first column of the output table i.e. it identifies the record from
+which each output row came.
 
-Which produces a new, well-formed document. Note that document 1 had
-no matching instances, so the set returned contains no
-elements. document 2 has 1 matching element and document 3 has 2.
+document - the name of the field containing the XML document
 
-This is just scratching the surface because XPath allows all sorts of
-operations.
+relation - the name of the table or view containing the documents
 
-Note: I've only implemented the return of nodeset and string values so
-far. This covers (I think) many types of queries, however.
+xpaths - multiple xpath expressions separated by |
 
-John Gray <jgray@azuli.co.uk>  16 August 2001
+criteria - The contents of the where clause. This needs to be specified,
+so use "true" or "1=1" here if you want to process all the rows in the
+relation.
 
+NB These parameters (except the XPath strings) are just substituted
+into a plain SQL SELECT statement, so you have some flexibility - the
+statement is
 
+SELECT <key>,<document> FROM <relation> WHERE <criteria>
+
+so those parameters can be *anything* valid in those particular
+locations. The result from this SELECT needs to return exactly two
+columns (which it will unless you try to list multiple fields for key
+or document). Beware that this simplistic approach requires that you
+validate any user-supplied values to avoid SQL injection attacks.
+
+Using the function
+
+The function has to be used in a FROM expression. This gives the following
+form:
+
+SELECT * FROM
+xpath_table('article_id', 
+       'article_xml',
+       'articles', 
+       '/article/author|/article/pages|/article/title',
+       'date_entered > ''2003-01-01'' ') 
+AS t(article_id integer, author text, page_count integer, title text);
+
+The AS clause defines the names and types of the columns in the
+virtual table. If there are more XPath queries than result columns,
+the extra queries will be ignored. If there are more result columns
+than XPath queries, the extra columns will be NULL.
+
+Note that I've said in this example that pages is an integer.  The
+function deals internally with string representations, so when you say
+you want an integer in the output, it will take the string
+representation of the XPath result and use PostgreSQL input functions
+to transform it into an integer (or whatever type the AS clause
+requests). An error will result if it can't do this - for example if
+the result is empty - so you may wish to just stick to 'text' as the
+column type if you think your data has any problems.
+
+The select statement doesn't need to use * alone - it can reference the
+columns by name or join them to other tables. The function produces a
+virtual table with which you can perform any operation you wish (e.g.
+aggregation, joining, sorting etc). So we could also have:
+
+SELECT t.title, p.fullname, p.email 
+FROM xpath_table('article_id','article_xml','articles',
+            '/article/title|/article/author/@id',
+            'xpath_string(article_xml,''/article/@date'') > ''2003-03-20'' ')
+            AS t(article_id integer, title text, author_id integer), 
+     tblPeopleInfo AS p 
+WHERE t.author_id = p.person_id;
+
+as a more complicated example. Of course, you could wrap all
+of this in a view for convenience.
+
+XSLT functions
+--------------
+
+The following functions are available if libxslt is installed (this is
+not currently detected automatically, so you will have to amend the
+Makefile)
+
+xslt_process(document,stylesheet,paramlist) RETURNS text
+
+This function appplies the XSL stylesheet to the document and returns
+the transformed result. The paramlist is a list of parameter
+assignments to be used in the transformation, specified in the form
+'a=1,b=2'. Note that this is also proof-of-concept code and the
+parameter parsing is very simple-minded (e.g. parameter values cannot
+contain commas!)
+
+Also note that if either the document or stylesheet values do not
+begin with a < then they will be treated as URLs and libxslt will
+fetch them. It thus follows that you can use xslt_process as a means
+to fetch the contents of URLs - you should be aware of the security
+implications of this.
+
+There is also a two-parameter version of xslt_process which does not
+pass any parameters to the transformation.
+
+If you have any comments or suggestions, please do contact me at
+jgray@azuli.co.uk. Unfortunately, this isn't my main job, so I can't
+guarantee a rapid response to your query!
diff --git a/contrib/xml/TODO b/contrib/xml/TODO
deleted file mode 100644 (file)
index 5ddd62a..0000000
+++ /dev/null
@@ -1,78 +0,0 @@
-PGXML TODO List
-===============
-
-Some of these items still require much more thought! Since the first
-release, the XPath support has improved (because I'm no longer using a
-homemade algorithm!).
-
-1. Performance considerations
-
-At present each document is parsed to produce the DOM tree on every query.
-
-Pros: 
-       Easy
-       No persistent memory or storage allocation for parsed trees
-               (libxml docs suggest representation of a document might
-                be 4 times the size of the text)
-
-Cons:
-       Slow/ CPU intensive to parse.
-       Makes it difficult for PLs to apply libxml manipulations to create
-               new documents or amend existing ones.
-
-
-2. XQuery 
-
-I'm not sure if the addition of XQuery would be best as a function or
-as a new front-end parser. This is one to think about, but with a
-decent implementation of XPath, one of the prerequisites is covered.
-
-3. DOM Interfaces
-
-Expose more aspects of the DOM to user functions/ PLs. This would
-allow a procedure in a PL to run some queries and then use exposed
-interfaces to libxml to create an XML document out of the query
-results. I accept the argument that this might be more properly
-performed on the client side.
-
-4. Returning sets of documents from XPath queries.
-
-Although the current implementation allows you to amalgamate the
-returned results into a single document, it's quite possible that
-you'd like to use the returned set of nodes as a source for FROM.
-Is there a good way to optimise/index the results of certain XPath
-operations to make them faster?:
-
-select docid, pgxml_xpath(document,'//site/location/text()','','') as location 
-where pgxml_xpath(document,'//site/name/text()','','') = 'Church Farm';
-
-and with multiple element occurences in a document?
-
-select d.docid, pgxml_xpath(d.document,'//site/location/text()','','') 
-from docstore d, 
-pgxml_xpaths('docstore','document','//feature/type/text()','docid') ft 
-where ft.key = d.docid and ft.value ='Limekiln';
-
-pgxml_xpaths params are relname, attrname, xpath, returnkey. It would
-return a set of two-element tuples (key,value) consisting of the value of
-returnkey, and the cdata value of the xpath. The XML document would be
-defined by relname and attrname.
-
-The pgxml_xpaths function could be the basis of a functional index,
-which could speed up the above query very substantially, working
-through the normal query planner mechanism.
-
-5. Return type support.
-
-Better support for returning e.g. numeric or boolean values. I need to
-get to grips with the returned data from libxml first.
-
-John Gray <jgray@azuli.co.uk> 16 August 2001
-
-
-
-
-
-
diff --git a/contrib/xml/pgxml.c b/contrib/xml/pgxml.c
deleted file mode 100644 (file)
index 4d8c3b9..0000000
+++ /dev/null
@@ -1,352 +0,0 @@
-/********************************************************
- * Interface code to parse an XML document using expat
- ********************************************************/
-
-#include "postgres.h"
-#include "fmgr.h"
-
-#include "expat.h"
-#include "pgxml.h"
-
-/* Memory management - we make expat use standard pg MM */
-
-XML_Memory_Handling_Suite mhs;
-
-/* passthrough functions (palloc is a macro) */
-
-static void *
-pgxml_palloc(size_t size)
-{
-       return palloc(size);
-}
-
-static void *
-pgxml_repalloc(void *ptr, size_t size)
-{
-       return repalloc(ptr, size);
-}
-
-static void
-pgxml_pfree(void *ptr)
-{
-       return pfree(ptr);
-}
-
-static void
-pgxml_mhs_init()
-{
-       mhs.malloc_fcn = pgxml_palloc;
-       mhs.realloc_fcn = pgxml_repalloc;
-       mhs.free_fcn = pgxml_pfree;
-}
-
-static void
-pgxml_handler_init()
-{
-       /*
-        * This code should set up the relevant handlers from  user-supplied
-        * settings. Quite how these settings are made is another matter :)
-        */
-}
-
-/* Returns true if document is well-formed */
-
-PG_FUNCTION_INFO_V1(pgxml_parse);
-
-Datum
-pgxml_parse(PG_FUNCTION_ARGS)
-{
-       /* called as pgxml_parse(document) */
-       XML_Parser      p;
-       text       *t = PG_GETARG_TEXT_P(0);            /* document buffer */
-       int32           docsize = VARSIZE(t) - VARHDRSZ;
-
-       pgxml_mhs_init();
-
-       pgxml_handler_init();
-
-       p = XML_ParserCreate_MM(NULL, &mhs, NULL);
-       if (!p)
-       {
-               ereport(ERROR,
-                               (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
-                                errmsg("could not create expat parser")));
-               PG_RETURN_NULL();               /* seems appropriate if we couldn't parse */
-       }
-
-       if (!XML_Parse(p, (char *) VARDATA(t), docsize, 1))
-       {
-               /*
-                * elog(WARNING, "Parse error at line %d:%s",
-                * XML_GetCurrentLineNumber(p),
-                * XML_ErrorString(XML_GetErrorCode(p)));
-                */
-               XML_ParserFree(p);
-               PG_RETURN_BOOL(false);
-       }
-
-       XML_ParserFree(p);
-       PG_RETURN_BOOL(true);
-}
-
-/* XPath handling functions */
-
-/* XPath support here is for a very skeletal kind of XPath!
-   It was easy to program though... */
-
-/* This first is the core function that builds a result set. The
-   actual functions called by the user manipulate that result set
-   in various ways.
-*/
-
-static XPath_Results *
-build_xpath_results(text *doc, text *pathstr)
-{
-       XPath_Results *xpr;
-       char       *res;
-       pgxml_udata *udata;
-       XML_Parser      p;
-       int32           docsize;
-
-       xpr = (XPath_Results *) palloc((sizeof(XPath_Results)));
-       memset((void *) xpr, 0, sizeof(XPath_Results));
-       xpr->rescount = 0;
-
-       docsize = VARSIZE(doc) - VARHDRSZ;
-
-       /* res isn't going to be the real return type, it is just a buffer */
-
-       res = (char *) palloc(docsize);
-       memset((void *) res, 0, docsize);
-
-       xpr->resbuf = res;
-
-       udata = (pgxml_udata *) palloc((sizeof(pgxml_udata)));
-       memset((void *) udata, 0, sizeof(pgxml_udata));
-
-       udata->currentpath[0] = '\0';
-       udata->textgrab = 0;
-
-       udata->path = (char *) palloc(VARSIZE(pathstr));
-       memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr) - VARHDRSZ);
-
-       udata->path[VARSIZE(pathstr) - VARHDRSZ] = '\0';
-
-       udata->resptr = res;
-       udata->reslen = 0;
-
-       udata->xpres = xpr;
-
-       /* Now fire up the parser */
-       pgxml_mhs_init();
-
-       p = XML_ParserCreate_MM(NULL, &mhs, NULL);
-       if (!p)
-       {
-               ereport(ERROR,
-                               (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
-                                errmsg("could not create expat parser")));
-               pfree(xpr);
-               pfree(udata->path);
-               pfree(udata);
-               pfree(res);
-               return NULL;
-       }
-       XML_SetUserData(p, (void *) udata);
-
-       /* Set the handlers */
-
-       XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler);
-       XML_SetCharacterDataHandler(p, pgxml_charhandler);
-
-       if (!XML_Parse(p, (char *) VARDATA(doc), docsize, 1))
-       {
-               /*
-                * elog(WARNING, "Parse error at line %d:%s",
-                * XML_GetCurrentLineNumber(p),
-                * XML_ErrorString(XML_GetErrorCode(p)));
-                */
-               XML_ParserFree(p);
-               pfree(xpr);
-               pfree(udata->path);
-               pfree(udata);
-
-               return NULL;
-       }
-
-       pfree(udata->path);
-       pfree(udata);
-       XML_ParserFree(p);
-       return xpr;
-}
-
-
-PG_FUNCTION_INFO_V1(pgxml_xpath);
-
-Datum
-pgxml_xpath(PG_FUNCTION_ARGS)
-{
-       /* called as pgxml_xpath(document,pathstr, index) for the moment */
-
-       XPath_Results *xpresults;
-       text       *restext;
-
-       text       *t = PG_GETARG_TEXT_P(0);            /* document buffer */
-       text       *t2 = PG_GETARG_TEXT_P(1);
-       int32           ind = PG_GETARG_INT32(2) - 1;
-
-       xpresults = build_xpath_results(t, t2);
-
-       /*
-        * This needs to be changed depending on the mechanism for returning
-        * our set of results.
-        */
-
-       if (xpresults == NULL)          /* parse error (not WF or parser failure) */
-               PG_RETURN_NULL();
-
-       if (ind >= (xpresults->rescount))
-               PG_RETURN_NULL();
-
-       restext = (text *) palloc(xpresults->reslens[ind] + VARHDRSZ);
-       memcpy(VARDATA(restext), xpresults->results[ind], xpresults->reslens[ind]);
-
-       VARATT_SIZEP(restext) = xpresults->reslens[ind] + VARHDRSZ;
-
-       pfree(xpresults->resbuf);
-       pfree(xpresults);
-
-       PG_RETURN_TEXT_P(restext);
-}
-
-
-static void
-pgxml_pathcompare(void *userData)
-{
-       char       *matchpos;
-
-       matchpos = strstr(UD->currentpath, UD->path);
-
-       if (matchpos == NULL)
-       {                                                       /* Should we have more logic here ? */
-               if (UD->textgrab)
-               {
-                       UD->textgrab = 0;
-                       pgxml_finalisegrabbedtext(userData);
-               }
-               return;
-       }
-
-       /*
-        * OK, we have a match of some sort. Now we need to check that our
-        * match is anchored to the *end* of the string AND that it is
-        * immediately preceded by a '/'
-        */
-
-       /*
-        * This test wouldn't work if strlen (UD->path) overran the length of
-        * the currentpath, but that's not possible because we got a match!
-        */
-
-       if ((matchpos + strlen(UD->path))[0] == '\0')
-       {
-               if ((UD->path)[0] == '/')
-               {
-                       if (matchpos == UD->currentpath)
-                               UD->textgrab = 1;
-               }
-               else
-               {
-                       if ((matchpos - 1)[0] == '/')
-                               UD->textgrab = 1;
-               }
-       }
-}
-
-static void
-pgxml_starthandler(void *userData, const XML_Char * name,
-                                  const XML_Char ** atts)
-{
-
-       char            sepstr[] = "/";
-
-       if ((strlen(name) + strlen(UD->currentpath)) > MAXPATHLENGTH - 2)
-               elog(WARNING, "path too long");
-       else
-       {
-               strncat(UD->currentpath, sepstr, 1);
-               strcat(UD->currentpath, name);
-       }
-       if (UD->textgrab)
-       {
-               /*
-                * Depending on user preference, should we "reconstitute" the
-                * element into the result text?
-                */
-       }
-       else
-               pgxml_pathcompare(userData);
-}
-
-static void
-pgxml_endhandler(void *userData, const XML_Char * name)
-{
-       /*
-        * Start by removing the current element off the end of the
-        * currentpath
-        */
-
-       char       *sepptr;
-
-       sepptr = strrchr(UD->currentpath, '/');
-       if (sepptr == NULL)
-       {
-               /* internal error */
-               elog(ERROR, "did not find '/'");
-               sepptr = UD->currentpath;
-       }
-       if (strcmp(name, sepptr + 1) != 0)
-       {
-               elog(WARNING, "wanted [%s], got [%s]", sepptr, name);
-               /* unmatched entry, so do nothing */
-       }
-       else
-       {
-               sepptr[0] = '\0';               /* Chop that element off the end */
-       }
-
-       if (UD->textgrab)
-               pgxml_pathcompare(userData);
-
-}
-
-static void
-pgxml_charhandler(void *userData, const XML_Char * s, int len)
-{
-       if (UD->textgrab)
-       {
-               if (len > 0)
-               {
-                       memcpy(UD->resptr, s, len);
-                       UD->resptr += len;
-                       UD->reslen += len;
-               }
-       }
-}
-
-/* Should I be using PG list types here? */
-
-static void
-pgxml_finalisegrabbedtext(void *userData)
-{
-       /* In res/reslen, we have a single result. */
-       UD->xpres->results[UD->xpres->rescount] = UD->resptr - UD->reslen;
-       UD->xpres->reslens[UD->xpres->rescount] = UD->reslen;
-       UD->reslen = 0;
-       UD->xpres->rescount++;
-
-       /*
-        * This effectively concatenates all the results together but we do
-        * know where one ends and the next begins
-        */
-}
diff --git a/contrib/xml/pgxml.h b/contrib/xml/pgxml.h
deleted file mode 100644 (file)
index 2b80124..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-/* Header for pg xml parser interface */
-
-static void *pgxml_palloc(size_t size);
-static void *pgxml_repalloc(void *ptr, size_t size);
-static void pgxml_pfree(void *ptr);
-static void pgxml_mhs_init();
-static void pgxml_handler_init();
-Datum          pgxml_parse(PG_FUNCTION_ARGS);
-Datum          pgxml_xpath(PG_FUNCTION_ARGS);
-static void pgxml_starthandler(void *userData, const XML_Char * name,
-                                  const XML_Char ** atts);
-static void pgxml_endhandler(void *userData, const XML_Char * name);
-static void pgxml_charhandler(void *userData, const XML_Char * s, int len);
-static void pgxml_pathcompare(void *userData);
-static void pgxml_finalisegrabbedtext(void *userData);
-
-#define MAXPATHLENGTH 512
-#define MAXRESULTS 100
-
-
-typedef struct
-{
-       int                     rescount;
-       char       *results[MAXRESULTS];
-       int32           reslens[MAXRESULTS];
-       char       *resbuf;                     /* pointer to the result buffer for pfree */
-}      XPath_Results;
-
-
-
-typedef struct
-{
-       char            currentpath[MAXPATHLENGTH];
-       char       *path;
-       int                     textgrab;
-       char       *resptr;
-       int32           reslen;
-       XPath_Results *xpres;
-}      pgxml_udata;
-
-
-#define UD ((pgxml_udata *) userData)
index 514643b936e69c7cb46b309c28a8f7d12bb22403..ff46e845b17157409f8cd3cf2e711ccef94c0ed0 100644 (file)
@@ -1,10 +1,57 @@
--- SQL for XML parser
+--SQL for XML parser
 
--- Adjust this setting to control where the objects get created.
-SET search_path TO public;
+CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS bool
+       AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
 
-CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS boolean
-    AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
+CREATE OR REPLACE FUNCTION xpath_string(text,text) RETURNS text
+       AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
 
-CREATE OR REPLACE FUNCTION pgxml_xpath(text, text, text, text) RETURNS text
-    AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
+CREATE OR REPLACE FUNCTION xpath_nodeset(text,text,text,text) RETURNS text
+       AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
+
+CREATE OR REPLACE FUNCTION xpath_number(text,text) RETURNS float4
+       AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
+
+CREATE OR REPLACE FUNCTION xpath_bool(text,text) RETURNS boolean
+       AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
+
+-- List function
+
+CREATE OR REPLACE FUNCTION xpath_list(text,text,text) RETURNS text
+       AS 'MODULE_PATHNAME'
+       LANGUAGE 'c' WITH (isStrict);
+
+
+CREATE OR REPLACE FUNCTION xpath_list(text,text) RETURNS text 
+AS 'SELECT xpath_list($1,$2,'','')' language 'SQL' WITH (isStrict);
+
+
+
+-- Wrapper functions for nodeset where no tags needed.
+
+
+CREATE OR REPLACE FUNCTION xpath_nodeset(text,text) RETURNS text AS
+'SELECT xpath_nodeset($1,$2,'''','''')' language 'SQL' WITH (isStrict);
+
+
+CREATE OR REPLACE FUNCTION xpath_nodeset(text,text,text) RETURNS text AS
+'SELECT xpath_nodeset($1,$2,'''',$3)' language 'SQL' WITH (isStrict);
+
+-- Table function
+
+CREATE OR REPLACE FUNCTION xpath_table(text,text,text,text,text) RETURNS setof record
+       AS 'MODULE_PATHNAME'
+       LANGUAGE 'c' WITH (isStrict);
+
+-- XSLT functions
+-- Delete from here to the end of the file if you are not compiling with
+-- XSLT support.
+
+
+CREATE OR REPLACE FUNCTION xslt_process(text,text,text) RETURNS text 
+       AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
+
+-- the function checks for the correct argument count
+
+CREATE OR REPLACE FUNCTION xslt_process(text,text) RETURNS text 
+       AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
diff --git a/contrib/xml/pgxml_dom.c b/contrib/xml/pgxml_dom.c
deleted file mode 100644 (file)
index 2b11b1d..0000000
+++ /dev/null
@@ -1,265 +0,0 @@
-/* Parser interface for DOM-based parser (libxml) rather than
-   stream-based SAX-type parser */
-
-#include "postgres.h"
-#include "fmgr.h"
-
-/* libxml includes */
-
-#include <libxml/xpath.h>
-#include <libxml/tree.h>
-#include <libxml/xmlmemory.h>
-
-/* declarations */
-
-static void *pgxml_palloc(size_t size);
-static void *pgxml_repalloc(void *ptr, size_t size);
-static void pgxml_pfree(void *ptr);
-static char *pgxml_pstrdup(const char *string);
-
-static void pgxml_parser_init();
-
-static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset, xmlDocPtr doc,
-                                  xmlChar * toptagname, xmlChar * septagname,
-                                  int format);
-
-static xmlChar *pgxml_texttoxmlchar(text *textstring);
-
-
-Datum          pgxml_parse(PG_FUNCTION_ARGS);
-Datum          pgxml_xpath(PG_FUNCTION_ARGS);
-
-/* memory handling passthrough functions (e.g. palloc, pstrdup are
-   currently macros, and the others might become so...) */
-
-static void *
-pgxml_palloc(size_t size)
-{
-       return palloc(size);
-}
-
-static void *
-pgxml_repalloc(void *ptr, size_t size)
-{
-       return repalloc(ptr, size);
-}
-
-static void
-pgxml_pfree(void *ptr)
-{
-       return pfree(ptr);
-}
-
-static char *
-pgxml_pstrdup(const char *string)
-{
-       return pstrdup(string);
-}
-
-static void
-pgxml_parser_init()
-{
-       /*
-        * This code should also set parser settings from  user-supplied info.
-        * Quite how these settings are made is another matter :)
-        */
-
-       xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup);
-       xmlInitParser();
-
-}
-
-
-/* Returns true if document is well-formed */
-
-PG_FUNCTION_INFO_V1(pgxml_parse);
-
-Datum
-pgxml_parse(PG_FUNCTION_ARGS)
-{
-       /* called as pgxml_parse(document) */
-       xmlDocPtr       doctree;
-       text       *t = PG_GETARG_TEXT_P(0);            /* document buffer */
-       int32           docsize = VARSIZE(t) - VARHDRSZ;
-
-       pgxml_parser_init();
-
-       doctree = xmlParseMemory((char *) VARDATA(t), docsize);
-       if (doctree == NULL)
-       {
-               xmlCleanupParser();
-               PG_RETURN_BOOL(false);  /* i.e. not well-formed */
-       }
-       xmlCleanupParser();
-       xmlFreeDoc(doctree);
-       PG_RETURN_BOOL(true);
-}
-
-static xmlChar
-*
-pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
-                                  xmlDocPtr doc,
-                                  xmlChar * toptagname,
-                                  xmlChar * septagname,
-                                  int format)
-{
-       /* Function translates a nodeset into a text representation */
-
-       /*
-        * iterates over each node in the set and calls xmlNodeDump to write
-        * it to an xmlBuffer -from which an xmlChar * string is returned.
-        */
-       /* each representation is surrounded by <tagname> ... </tagname> */
-       /* if format==0, add a newline between nodes?? */
-
-       xmlBufferPtr buf;
-       xmlChar    *result;
-       int                     i;
-
-       buf = xmlBufferCreate();
-
-       if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
-       {
-               xmlBufferWriteChar(buf, "<");
-               xmlBufferWriteCHAR(buf, toptagname);
-               xmlBufferWriteChar(buf, ">");
-       }
-       if (nodeset != NULL)
-       {
-               for (i = 0; i < nodeset->nodeNr; i++)
-               {
-                       if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
-                       {
-                               xmlBufferWriteChar(buf, "<");
-                               xmlBufferWriteCHAR(buf, septagname);
-                               xmlBufferWriteChar(buf, ">");
-                       }
-                       xmlNodeDump(buf, doc, nodeset->nodeTab[i], 1, (format == 2));
-
-                       if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
-                       {
-                               xmlBufferWriteChar(buf, "</");
-                               xmlBufferWriteCHAR(buf, septagname);
-                               xmlBufferWriteChar(buf, ">");
-                       }
-                       if (format)
-                               xmlBufferWriteChar(buf, "\n");
-               }
-       }
-
-       if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
-       {
-               xmlBufferWriteChar(buf, "</");
-               xmlBufferWriteCHAR(buf, toptagname);
-               xmlBufferWriteChar(buf, ">");
-       }
-       result = xmlStrdup(buf->content);
-       xmlBufferFree(buf);
-       return result;
-}
-
-static xmlChar *
-pgxml_texttoxmlchar(text *textstring)
-{
-       xmlChar    *res;
-       int32           txsize;
-
-       txsize = VARSIZE(textstring) - VARHDRSZ;
-       res = (xmlChar *) palloc(txsize + 1);
-       memcpy((char *) res, VARDATA(textstring), txsize);
-       res[txsize] = '\0';
-       return res;
-}
-
-
-PG_FUNCTION_INFO_V1(pgxml_xpath);
-
-Datum
-pgxml_xpath(PG_FUNCTION_ARGS)
-{
-       xmlDocPtr       doctree;
-       xmlXPathContextPtr ctxt;
-       xmlXPathObjectPtr res;
-       xmlChar    *xpath,
-                          *xpresstr,
-                          *toptag,
-                          *septag;
-       xmlXPathCompExprPtr comppath;
-
-       int32           docsize,
-                               ressize;
-       text       *t,
-                          *xpres;
-
-       t = PG_GETARG_TEXT_P(0);        /* document buffer */
-       xpath = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(1));       /* XPath expression */
-       toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
-       septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(3));
-
-       docsize = VARSIZE(t) - VARHDRSZ;
-
-       pgxml_parser_init();
-
-       doctree = xmlParseMemory((char *) VARDATA(t), docsize);
-       if (doctree == NULL)
-       {                                                       /* not well-formed */
-               xmlCleanupParser();
-               PG_RETURN_NULL();
-       }
-
-       ctxt = xmlXPathNewContext(doctree);
-       ctxt->node = xmlDocGetRootElement(doctree);
-
-       /* compile the path */
-       comppath = xmlXPathCompile(xpath);
-       if (comppath == NULL)
-       {
-               elog(WARNING, "XPath syntax error");
-               xmlFreeDoc(doctree);
-               pfree((void *) xpath);
-               xmlCleanupParser();
-               PG_RETURN_NULL();
-       }
-
-       /* Now evaluate the path expression. */
-       res = xmlXPathCompiledEval(comppath, ctxt);
-       xmlXPathFreeCompExpr(comppath);
-
-       if (res == NULL)
-       {
-               xmlFreeDoc(doctree);
-               pfree((void *) xpath);
-               xmlCleanupParser();
-               PG_RETURN_NULL();               /* seems appropriate */
-       }
-       /* now we dump this node, ?surrounding by tags? */
-       /* To do this, we look first at the type */
-       switch (res->type)
-       {
-               case XPATH_NODESET:
-                       xpresstr = pgxmlNodeSetToText(res->nodesetval,
-                                                                                 doctree,
-                                                                                 toptag, septag, 0);
-                       break;
-               case XPATH_STRING:
-                       xpresstr = xmlStrdup(res->stringval);
-                       break;
-               default:
-                       elog(WARNING, "Unsupported XQuery result: %d", res->type);
-                       xpresstr = xmlStrdup("<unsupported/>");
-       }
-
-
-       /* Now convert this result back to text */
-       ressize = strlen(xpresstr);
-       xpres = (text *) palloc(ressize + VARHDRSZ);
-       memcpy(VARDATA(xpres), xpresstr, ressize);
-       VARATT_SIZEP(xpres) = ressize + VARHDRSZ;
-
-       /* Free various storage */
-       xmlFreeDoc(doctree);
-       pfree((void *) xpath);
-       xmlFree(xpresstr);
-       xmlCleanupParser();
-       PG_RETURN_TEXT_P(xpres);
-}
diff --git a/contrib/xml/pgxml_dom.sql.in b/contrib/xml/pgxml_dom.sql.in
deleted file mode 100644 (file)
index 514643b..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
--- SQL for XML parser
-
--- Adjust this setting to control where the objects get created.
-SET search_path TO public;
-
-CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS boolean
-    AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
-
-CREATE OR REPLACE FUNCTION pgxml_xpath(text, text, text, text) RETURNS text
-    AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
diff --git a/contrib/xml/xpath.c b/contrib/xml/xpath.c
new file mode 100644 (file)
index 0000000..b4fc828
--- /dev/null
@@ -0,0 +1,893 @@
+/* Parser interface for DOM-based parser (libxml) rather than
+   stream-based SAX-type parser */
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "lib/stringinfo.h"
+
+/* libxml includes */
+
+#include <libxml/xpath.h>
+#include <libxml/tree.h>
+#include <libxml/xmlmemory.h>
+#include <libxml/xmlerror.h>
+#include <libxml/parserInternals.h>
+
+/* declarations */
+
+static void *pgxml_palloc(size_t size);
+static void *pgxml_repalloc(void *ptr, size_t size);
+static void pgxml_pfree(void *ptr);
+static char *pgxml_pstrdup(const char *string);
+static void pgxml_errorHandler (void * ctxt, const char *msg, ...);
+
+void elog_error(int level, char *explain, int force);
+void pgxml_parser_init(void);
+
+static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
+                                  xmlChar * toptagname, xmlChar * septagname,
+                                  xmlChar * plainsep);
+
+text *pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar *toptag, 
+                          xmlChar *septag, xmlChar *plainsep);
+
+xmlChar *pgxml_texttoxmlchar(text *textstring);
+
+static xmlXPathObjectPtr pgxml_xpath(text *document, xmlChar* xpath);
+
+
+Datum          pgxml_parse(PG_FUNCTION_ARGS);
+Datum           xpath_nodeset(PG_FUNCTION_ARGS);
+Datum          xpath_string(PG_FUNCTION_ARGS);
+Datum          xpath_number(PG_FUNCTION_ARGS);
+Datum           xpath_bool(PG_FUNCTION_ARGS);
+Datum           xpath_list(PG_FUNCTION_ARGS);
+Datum           xpath_table(PG_FUNCTION_ARGS);
+
+/* Global variables */
+char *errbuf; /* per line error buffer */
+char *pgxml_errorMsg = NULL; /* overall error message */
+
+/* Convenience macros */
+
+#define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
+#define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
+
+#define ERRBUF_SIZE 200
+
+/* memory handling passthrough functions (e.g. palloc, pstrdup are
+   currently macros, and the others might become so...) */
+
+static void *
+pgxml_palloc(size_t size)
+{
+/*     elog(DEBUG1,"Alloc %d in CMC %x",size,CurrentMemoryContext); */
+       return palloc(size);
+}
+
+static void *
+pgxml_repalloc(void *ptr, size_t size)
+{
+/*     elog(DEBUG1,"ReAlloc in CMC %x",CurrentMemoryContext);*/
+       return repalloc(ptr, size);
+}
+
+static void
+pgxml_pfree(void *ptr)
+{
+/*     elog(DEBUG1,"Free in CMC %x",CurrentMemoryContext); */
+       return pfree(ptr);
+}
+
+static char *
+pgxml_pstrdup(const char *string)
+{
+       return pstrdup(string);
+}
+
+/* The error handling function. This formats an error message and sets
+ * a flag - an ereport will be issued prior to return
+ */
+
+static void
+pgxml_errorHandler (void * ctxt, const char *msg, ...)
+{
+  va_list args;
+
+  va_start(args, msg);
+  vsnprintf(errbuf, ERRBUF_SIZE, msg, args);
+  va_end(args);
+  /* Now copy the argument across */
+  if (pgxml_errorMsg == NULL)
+    {
+      pgxml_errorMsg = pstrdup(errbuf);
+    }
+else
+  {
+    int32 xsize = strlen(pgxml_errorMsg);
+    pgxml_errorMsg = repalloc(pgxml_errorMsg, 
+                      (size_t) (xsize + strlen(errbuf) + 1));
+    strncpy(&pgxml_errorMsg[xsize-1],errbuf,strlen(errbuf));
+    pgxml_errorMsg[xsize+strlen(errbuf)-1]='\0';
+
+  }
+    memset(errbuf,0,ERRBUF_SIZE);
+}
+
+/* This function reports the current message at the level specified */
+void elog_error(int level, char *explain, int force)
+{
+  if (force || (pgxml_errorMsg != NULL))
+    {
+      if (pgxml_errorMsg == NULL) 
+       {
+         ereport(level,(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
+                                        errmsg(explain)));
+       }
+      else
+       {
+         ereport(level,(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
+                                        errmsg("%s:%s",explain,pgxml_errorMsg)));
+         pfree(pgxml_errorMsg);
+       }
+    }
+}
+
+void
+pgxml_parser_init()
+{
+       /*
+        * This code could also set parser settings from  user-supplied info.
+        * Quite how these settings are made is another matter :)
+        */
+
+       xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup);
+       xmlInitParser();
+
+       xmlSetGenericErrorFunc(NULL, pgxml_errorHandler);
+
+       xmlSubstituteEntitiesDefault(1);
+       xmlLoadExtDtdDefaultValue = 1;
+
+       pgxml_errorMsg = NULL;
+
+       errbuf = palloc(200);
+       memset(errbuf,0,200);
+
+}
+
+
+/* Returns true if document is well-formed */
+
+PG_FUNCTION_INFO_V1(pgxml_parse);
+
+Datum
+pgxml_parse(PG_FUNCTION_ARGS)
+{
+       /* called as pgxml_parse(document) */
+       xmlDocPtr       doctree;
+       text       *t = PG_GETARG_TEXT_P(0);            /* document buffer */
+       int32           docsize = VARSIZE(t) - VARHDRSZ;
+
+       pgxml_parser_init();
+
+       doctree = xmlParseMemory((char *) VARDATA(t), docsize);
+       if (doctree == NULL)
+       {
+               xmlCleanupParser();
+               PG_RETURN_BOOL(false);  /* i.e. not well-formed */
+       }
+       xmlCleanupParser();
+       xmlFreeDoc(doctree);
+       PG_RETURN_BOOL(true);
+}
+
+
+static xmlChar
+*
+pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
+                                  xmlChar * toptagname,
+                                  xmlChar * septagname,
+                                  xmlChar * plainsep)
+{
+       /* Function translates a nodeset into a text representation */
+
+       /*
+        * iterates over each node in the set and calls xmlNodeDump to write
+        * it to an xmlBuffer -from which an xmlChar * string is returned.
+        */
+
+       /* each representation is surrounded by <tagname> ... </tagname> */
+       /* plainsep is an ordinary (not tag) seperator - if used, then
+       * nodes are cast to string as output method */
+        
+
+       xmlBufferPtr buf;
+       xmlChar    *result;
+       int                     i;
+
+       buf = xmlBufferCreate();
+
+       if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
+       {
+               xmlBufferWriteChar(buf, "<");
+               xmlBufferWriteCHAR(buf, toptagname);
+               xmlBufferWriteChar(buf, ">");
+       }
+       if (nodeset != NULL)
+       {
+               for (i = 0; i < nodeset->nodeNr; i++)
+               {
+
+                 if (plainsep != NULL) {
+                   xmlBufferWriteCHAR(buf,
+                         xmlXPathCastNodeToString(nodeset->nodeTab[i]));
+                        
+                       /* If this isn't the last entry, write the plain sep. */
+                   if (i < (nodeset->nodeNr)-1) {
+                         xmlBufferWriteChar(buf, plainsep);
+                       }
+                 } else {
+
+
+                       if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
+                       {
+                               xmlBufferWriteChar(buf, "<");
+                               xmlBufferWriteCHAR(buf, septagname);
+                               xmlBufferWriteChar(buf, ">");
+                       }
+                                               xmlNodeDump(buf, 
+                                   nodeset->nodeTab[i]->doc, 
+                                   nodeset->nodeTab[i], 
+                                   1, 0);
+
+                       if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
+                       {
+                               xmlBufferWriteChar(buf, "</");
+                               xmlBufferWriteCHAR(buf, septagname);
+                               xmlBufferWriteChar(buf, ">");
+                       }
+                 }
+               }
+       }
+
+       if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
+       {
+               xmlBufferWriteChar(buf, "</");
+               xmlBufferWriteCHAR(buf, toptagname);
+               xmlBufferWriteChar(buf, ">");
+       }
+       result = xmlStrdup(buf->content);
+       xmlBufferFree(buf);
+       return result;
+}
+
+
+/* Translate a PostgreSQL "varlena" -i.e. a variable length parameter
+ * into the libxml2 representation
+ */
+
+xmlChar *
+pgxml_texttoxmlchar(text *textstring)
+{
+       xmlChar    *res;
+       int32           txsize;
+
+       txsize = VARSIZE(textstring) - VARHDRSZ;
+       res = (xmlChar *) palloc(txsize + 1);
+       memcpy((char *) res, VARDATA(textstring), txsize);
+       res[txsize] = '\0';
+       return res;
+}
+
+/* Public visible XPath functions */
+
+/* This is a "raw" xpath function. Check that it returns child elements
+ * properly
+ */
+
+PG_FUNCTION_INFO_V1(xpath_nodeset);
+
+Datum
+xpath_nodeset(PG_FUNCTION_ARGS)
+{
+  xmlChar    *xpath, *toptag, *septag;
+  int32 pathsize;
+       text       
+                  *xpathsupp,
+                  *xpres;
+
+       /* PG_GETARG_TEXT_P(0) is document buffer */
+       xpathsupp = PG_GETARG_TEXT_P(1);        /* XPath expression */
+
+       toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
+       septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(3));
+
+       pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
+
+       xpath = pgxml_texttoxmlchar(xpathsupp);
+
+       xpres = pgxml_result_to_text(
+                                    pgxml_xpath(PG_GETARG_TEXT_P(0),xpath),
+                                    toptag,septag,NULL);
+
+       /* xmlCleanupParser(); done by result_to_text routine */
+       pfree((void *) xpath);
+
+       if (xpres == NULL) 
+         {
+           PG_RETURN_NULL();
+         }
+       PG_RETURN_TEXT_P(xpres);
+}
+
+// The following function is almost identical, but returns the elements in
+// a list.
+
+PG_FUNCTION_INFO_V1(xpath_list);
+
+Datum
+xpath_list(PG_FUNCTION_ARGS)
+{
+  xmlChar    *xpath, *plainsep;
+  int32 pathsize;
+       text       
+                  *xpathsupp,
+                  *xpres;
+
+       /* PG_GETARG_TEXT_P(0) is document buffer */
+       xpathsupp = PG_GETARG_TEXT_P(1);        /* XPath expression */
+
+       plainsep = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
+
+       pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
+
+       xpath = pgxml_texttoxmlchar(xpathsupp);
+
+       xpres = pgxml_result_to_text(
+                                    pgxml_xpath(PG_GETARG_TEXT_P(0),xpath),
+                                    NULL,NULL,plainsep);
+
+       /* xmlCleanupParser(); done by result_to_text routine */
+       pfree((void *) xpath);
+
+       if (xpres == NULL) 
+         {
+           PG_RETURN_NULL();
+         }
+       PG_RETURN_TEXT_P(xpres);
+}
+
+
+PG_FUNCTION_INFO_V1(xpath_string);
+
+Datum
+xpath_string(PG_FUNCTION_ARGS)
+{
+  xmlChar    *xpath;
+  int32 pathsize;
+       text       
+                  *xpathsupp,
+                  *xpres;
+
+       /* PG_GETARG_TEXT_P(0) is document buffer */
+       xpathsupp = PG_GETARG_TEXT_P(1);        /* XPath expression */
+
+       pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
+
+       /* We encapsulate the supplied path with "string()"
+        * = 8 chars + 1 for NUL at end */
+       /* We could try casting to string using the libxml function? */
+
+       xpath =(xmlChar *) palloc(pathsize + 9);
+       memcpy((char *) (xpath+7), VARDATA(xpathsupp), pathsize);
+       strncpy((char *) xpath, "string(",7);
+       xpath[pathsize+7] = ')';
+       xpath[pathsize+8] = '\0';
+
+       xpres = pgxml_result_to_text(
+                                    pgxml_xpath(PG_GETARG_TEXT_P(0),xpath),
+                                    NULL,NULL,NULL);
+
+       xmlCleanupParser();
+       pfree((void *) xpath);
+
+       if (xpres == NULL) 
+         {
+           PG_RETURN_NULL();
+         }
+       PG_RETURN_TEXT_P(xpres);
+}
+
+
+PG_FUNCTION_INFO_V1(xpath_number);
+
+Datum
+xpath_number(PG_FUNCTION_ARGS)
+{
+  xmlChar    *xpath;
+  int32 pathsize;
+       text       
+         *xpathsupp;
+                  
+       float4 fRes;
+
+       xmlXPathObjectPtr res;
+
+       /* PG_GETARG_TEXT_P(0) is document buffer */
+       xpathsupp = PG_GETARG_TEXT_P(1);        /* XPath expression */
+
+       pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
+
+       xpath = pgxml_texttoxmlchar(xpathsupp);
+
+       res = pgxml_xpath(PG_GETARG_TEXT_P(0),xpath);
+       pfree((void *) xpath);
+
+       if (res == NULL)
+         {
+           xmlCleanupParser();
+           PG_RETURN_NULL();
+         }
+
+       fRes = xmlXPathCastToNumber(res);
+       xmlCleanupParser();
+       if (xmlXPathIsNaN(fRes))
+         {
+           PG_RETURN_NULL();
+         }
+
+       PG_RETURN_FLOAT4(fRes);
+
+}
+
+
+PG_FUNCTION_INFO_V1(xpath_bool);
+
+Datum
+xpath_bool(PG_FUNCTION_ARGS)
+{
+  xmlChar    *xpath;
+  int32 pathsize;
+       text       
+         *xpathsupp;
+                  
+       int bRes;
+
+       xmlXPathObjectPtr res;
+
+       /* PG_GETARG_TEXT_P(0) is document buffer */
+       xpathsupp = PG_GETARG_TEXT_P(1);        /* XPath expression */
+
+       pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
+
+       xpath = pgxml_texttoxmlchar(xpathsupp);
+
+       res = pgxml_xpath(PG_GETARG_TEXT_P(0),xpath);
+       pfree((void *) xpath);
+
+       if (res == NULL)
+         {
+           xmlCleanupParser();
+           PG_RETURN_BOOL(false);
+         }
+
+       bRes = xmlXPathCastToBoolean(res);
+       xmlCleanupParser();
+       PG_RETURN_BOOL(bRes);
+
+}
+
+
+
+/* Core function to evaluate XPath query */
+
+xmlXPathObjectPtr
+  pgxml_xpath(text *document, xmlChar *xpath)
+  {
+
+       xmlDocPtr       doctree;
+       xmlXPathContextPtr ctxt;
+       xmlXPathObjectPtr res;
+
+       xmlXPathCompExprPtr comppath;
+
+       int32           docsize;
+
+    
+       docsize = VARSIZE(document) - VARHDRSZ;
+
+       pgxml_parser_init();
+
+       doctree = xmlParseMemory((char *) VARDATA(document), docsize);
+       if (doctree == NULL)
+       {                                       /* not well-formed */
+               return NULL;
+       }
+
+       ctxt = xmlXPathNewContext(doctree);
+       ctxt->node = xmlDocGetRootElement(doctree);
+
+
+       /* compile the path */
+       comppath = xmlXPathCompile(xpath);
+       if (comppath == NULL)
+       {
+               xmlCleanupParser();
+               xmlFreeDoc(doctree);
+               elog_error(ERROR,"XPath Syntax Error",1);
+
+               return NULL;
+       }
+
+       /* Now evaluate the path expression. */
+       res = xmlXPathCompiledEval(comppath, ctxt);
+       xmlXPathFreeCompExpr(comppath);
+
+       if (res == NULL)
+       {
+         xmlXPathFreeContext(ctxt);
+         // xmlCleanupParser();
+               xmlFreeDoc(doctree);
+
+               return NULL;
+       }
+       /* xmlFreeDoc(doctree); */
+       return res;
+  }
+
+text 
+*pgxml_result_to_text(xmlXPathObjectPtr res, 
+                     xmlChar *toptag, 
+                     xmlChar *septag,
+                     xmlChar *plainsep)
+{
+       xmlChar *xpresstr;
+       int32 ressize;
+       text *xpres;
+
+  if (res == NULL)
+    {
+      return NULL;
+    }
+       switch (res->type)
+       {
+               case XPATH_NODESET:
+                       xpresstr = pgxmlNodeSetToText(res->nodesetval,
+                                                     toptag,
+                                                     septag, plainsep);
+                       break;
+
+               case XPATH_STRING:
+                       xpresstr = xmlStrdup(res->stringval);
+                       break;
+
+               default:
+                       elog(NOTICE, "Unsupported XQuery result: %d", res->type);
+                       xpresstr = xmlStrdup("<unsupported/>");
+       }
+
+
+       /* Now convert this result back to text */
+       ressize = strlen(xpresstr);
+       xpres = (text *) palloc(ressize + VARHDRSZ);
+       memcpy(VARDATA(xpres), xpresstr, ressize);
+       VARATT_SIZEP(xpres) = ressize + VARHDRSZ;
+
+       /* Free various storage */
+       xmlCleanupParser();
+       /* xmlFreeDoc(doctree);  -- will die at end of tuple anyway */
+
+       xmlFree(xpresstr);
+
+       elog_error(ERROR,"XPath error",0);
+
+
+       return xpres;
+}
+
+/* xpath_table is a table function. It needs some tidying (as do the
+ * other functions here!
+ */
+
+PG_FUNCTION_INFO_V1(xpath_table);
+
+Datum xpath_table(PG_FUNCTION_ARGS)
+{
+/* SPI (input tuple) support */
+  SPITupleTable *tuptable;
+  HeapTuple  spi_tuple;
+  TupleDesc  spi_tupdesc;
+
+/* Output tuple (tuplestore) support */
+  Tuplestorestate *tupstore = NULL;
+  TupleDesc ret_tupdesc;
+  HeapTuple  ret_tuple;
+
+  ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+  AttInMetadata *attinmeta;
+  MemoryContext per_query_ctx;
+  MemoryContext oldcontext;
+
+/* Function parameters */              
+  char *pkeyfield = GET_STR(PG_GETARG_TEXT_P(0));
+  char *xmlfield = GET_STR(PG_GETARG_TEXT_P(1));
+  char *relname = GET_STR(PG_GETARG_TEXT_P(2));
+  char *xpathset = GET_STR(PG_GETARG_TEXT_P(3));
+  char *condition = GET_STR(PG_GETARG_TEXT_P(4));
+
+  char **values;
+  xmlChar **xpaths; 
+  xmlChar *pos;
+  xmlChar *pathsep= "|";
+
+  int numpaths;
+  int ret;
+  int proc;
+  int i;
+  int j;
+  int rownr; /* For issuing multiple rows from one original document */
+  int had_values; /* To determine end of nodeset results */
+
+  StringInfo querysql;
+
+/* We only have a valid tuple description in table function mode */
+  if (rsinfo->expectedDesc == NULL) {
+         ereport(ERROR,(errcode(ERRCODE_SYNTAX_ERROR),
+                                        errmsg("xpath_table must be called as a table function")));
+  }
+/* The tuplestore must exist in a higher context than 
+ * this function call (per_query_ctx is used) */
+
+  per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
+  oldcontext = MemoryContextSwitchTo(per_query_ctx);
+
+/* Create the tuplestore - SortMem is the max in-memory size before it is
+ * shipped to a disk heap file. Just like ... SortMem!
+ */
+
+  tupstore = tuplestore_begin_heap(true, false, SortMem);
+
+  MemoryContextSwitchTo(oldcontext);
+
+  /* get the requested return tuple description */
+  ret_tupdesc = CreateTupleDescCopy(rsinfo->expectedDesc);
+
+  /* At the moment we assume that the returned attributes make sense
+   * for the XPath specififed (i.e. we trust the caller). 
+   * It's not fatal if they get it wrong - the input function for the
+   * column type will raise an error if the path result can't be converted
+   * into the correct binary representation.
+   */
+
+  attinmeta = TupleDescGetAttInMetadata(ret_tupdesc);
+
+  /* We want to materialise because it means that we don't have to
+   * carry libxml2 parser state between invocations of this function
+   */
+
+  /* check to see if caller supports us returning a tuplestore */
+  if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
+         ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR),
+                                         errmsg("xpath_table requires Materialize mode, but it is not "
+                                                        "allowed in this context")));
+
+  // Set return mode and allocate value space.
+  rsinfo->returnMode = SFRM_Materialize;
+  rsinfo->setDesc = ret_tupdesc;
+  
+  values = (char **) palloc(ret_tupdesc->natts * sizeof(char *));
+
+  xpaths = (xmlChar **) palloc(ret_tupdesc->natts * sizeof(xmlChar *));
+
+  /* Split XPaths. xpathset is a writable CString. */
+
+  /* Note that we stop splitting once we've done all needed for tupdesc */
+
+  numpaths=0;
+  pos = xpathset;
+  do {
+    xpaths[numpaths] = pos;
+    pos = strstr(pos,pathsep);
+    if (pos != NULL) {
+      *pos = '\0';
+      pos++;
+    }
+    numpaths++;
+  } while ((pos != NULL) && (numpaths < (ret_tupdesc->natts - 1) ));
+
+  /* Now build query */
+
+  querysql = makeStringInfo();
+
+  /* Build initial sql statement */
+  appendStringInfo(querysql, "SELECT %s, %s FROM %s WHERE %s",
+                  pkeyfield,
+                  xmlfield,
+                  relname,
+                  condition
+                  );
+
+
+  if ((ret = SPI_connect()) < 0) {
+    elog(ERROR, "xpath_table: SPI_connect returned %d", ret);
+  }
+
+  if ((ret = SPI_exec(querysql->data,0)) != SPI_OK_SELECT) {
+    elog(ERROR,"xpath_table: SPI execution failed for query %s",querysql->data);
+  }
+
+  proc= SPI_processed;
+  /*  elog(DEBUG1,"xpath_table: SPI returned %d rows",proc); */
+  tuptable = SPI_tuptable;
+  spi_tupdesc = tuptable->tupdesc;
+
+/* Switch out of SPI context */
+  MemoryContextSwitchTo(oldcontext);
+
+
+/* Check that SPI returned correct result. If you put a comma into one of
+ * the function parameters, this will catch it when the SPI query returns
+ * e.g. 3 columns. 
+ */
+
+  if (spi_tupdesc->natts != 2) {
+    ereport(ERROR,(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                  errmsg("Expression returning multiple columns is not valid in parameter list"),
+                                  errdetail("Expected two columns in SPI result, got %d",spi_tupdesc->natts)));
+  }
+
+/* Setup the parser. Beware that this must happen in the same context as the
+ * cleanup - which means that any error from here on must do cleanup to
+ * ensure that the entity table doesn't get freed by being out of context.
+ */
+  pgxml_parser_init();
+
+   /* For each row i.e. document returned from SPI */
+  for (i=0; i < proc; i++) {
+         char *pkey;
+         char *xmldoc;
+
+         xmlDocPtr     doctree;
+         xmlXPathContextPtr ctxt;
+         xmlXPathObjectPtr res;
+         xmlChar *resstr;
+
+    
+         xmlXPathCompExprPtr comppath;
+
+         /* Extract the row data as C Strings */
+         
+         spi_tuple = tuptable->vals[i]; 
+         pkey = SPI_getvalue(spi_tuple, spi_tupdesc,1);
+         xmldoc = SPI_getvalue(spi_tuple, spi_tupdesc,2);
+
+
+         /* Clear the values array, so that not-well-formed documents
+          * return NULL in all columns.
+          */
+    
+         /* Note that this also means that spare columns will be NULL. */
+         for (j=0; j < ret_tupdesc->natts; j++) {
+                 values[j]= NULL;
+         }
+    
+         /* Insert primary key */
+         values[0]=pkey;
+    
+         /* Parse the document */
+         doctree = xmlParseMemory(xmldoc, strlen(xmldoc));
+
+         if (doctree == NULL)
+      {                                        /* not well-formed, so output all-NULL tuple */
+       
+                 ret_tuple = BuildTupleFromCStrings(attinmeta, values);
+                 oldcontext = MemoryContextSwitchTo(per_query_ctx);
+                 tuplestore_puttuple(tupstore, ret_tuple);
+                 MemoryContextSwitchTo(oldcontext);
+                 heap_freetuple(ret_tuple);
+      }
+         else 
+      {
+                 /* New loop here - we have to deal with nodeset results */
+                 rownr=0;
+                 
+                 do {
+                         /* Now evaluate the set of xpaths. */
+                         had_values=0;
+                         for (j=0; j < numpaths; j++) {
+                                 
+                                 ctxt = xmlXPathNewContext(doctree);
+                                 ctxt->node = xmlDocGetRootElement(doctree);
+                                 xmlSetGenericErrorFunc(ctxt, pgxml_errorHandler);
+                                 
+                                 /* compile the path */
+                                 comppath = xmlXPathCompile(xpaths[j]);
+                                 if (comppath == NULL)
+                                 {
+                                         xmlCleanupParser();
+                                         xmlFreeDoc(doctree);
+                                         
+                                         elog_error(ERROR,"XPath Syntax Error",1);
+                                         
+                                         PG_RETURN_NULL();  /* Keep compiler happy */
+                                 }
+                                 
+                                 /* Now evaluate the path expression. */
+                                 res = xmlXPathCompiledEval(comppath, ctxt);
+                                 xmlXPathFreeCompExpr(comppath);
+                                 
+                                 if (res != NULL) 
+                                 {
+                                         switch (res->type)
+                                         {
+                                         case XPATH_NODESET:
+                                                 /* We see if this nodeset has enough nodes */
+                                                 if ((res->nodesetval != NULL) && (rownr < res->nodesetval->nodeNr)) {
+                                                         resstr = 
+                                                                 xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
+                                                         had_values=1;
+                                                 } else {
+                                                         resstr = NULL;
+                                                 }
+                                                 
+                                                 break;
+                                                 
+                                         case XPATH_STRING:
+                                                 resstr = xmlStrdup(res->stringval);
+                                                 break;
+                                                 
+                                         default:
+                                                 elog(NOTICE, "Unsupported XQuery result: %d", res->type);
+                                                 resstr = xmlStrdup("<unsupported/>");
+                                         }
+                                         
+                                         
+                                         // Insert this into the appropriate column in the result tuple.
+                                         values[j+1] = resstr;
+                                 }
+                                 xmlXPathFreeContext(ctxt);
+                         }
+                         // Now add the tuple to the output, if there is one.
+                         if (had_values) {
+                                 ret_tuple = BuildTupleFromCStrings(attinmeta, values);
+                                 oldcontext = MemoryContextSwitchTo(per_query_ctx);
+                                 tuplestore_puttuple(tupstore, ret_tuple);
+                                 MemoryContextSwitchTo(oldcontext);
+                                 heap_freetuple(ret_tuple);
+                         }
+                         
+                         rownr++;
+                         
+                 } while (had_values);
+                 
+      }
+         
+    xmlFreeDoc(doctree);    
+    
+    pfree(pkey);
+    pfree(xmldoc);
+  }
+
+  xmlCleanupParser();
+/* Needed to flag completeness in 7.3.1. 7.4 defines it as a no-op. */
+  tuplestore_donestoring(tupstore);
+
+  SPI_finish();
+
+  rsinfo->setResult=tupstore;
+  
+  /*
+   * SFRM_Materialize mode expects us to return a NULL Datum. The actual
+   * tuples are in our tuplestore and passed back through
+   * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
+   * that we actually used to build our tuples with, so the caller can
+   * verify we did what it was expecting.
+   */
+  return (Datum) 0;
+  
+}
diff --git a/contrib/xml/xslt_proc.c b/contrib/xml/xslt_proc.c
new file mode 100644 (file)
index 0000000..64f9736
--- /dev/null
@@ -0,0 +1,184 @@
+/* XSLT processing functions (requiring libxslt) */
+/* John Gray, for Torchbox 2003-04-01 */
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+
+/* libxml includes */
+
+#include <libxml/xpath.h>
+#include <libxml/tree.h>
+#include <libxml/xmlmemory.h>
+
+/* libxslt includes */
+
+#include <libxslt/xslt.h>
+#include <libxslt/xsltInternals.h>
+#include <libxslt/transform.h>
+#include <libxslt/xsltutils.h>
+
+
+/* declarations to come from xpath.c */
+
+extern void elog_error(int level, char *explain, int force);
+extern void pgxml_parser_init();
+extern xmlChar *pgxml_texttoxmlchar(text *textstring);
+
+#define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
+
+/* local defs */
+static void parse_params(const char **params, text *paramstr);
+
+Datum xslt_process(PG_FUNCTION_ARGS);
+
+
+#define MAXPARAMS 20
+
+PG_FUNCTION_INFO_V1(xslt_process);
+
+Datum xslt_process(PG_FUNCTION_ARGS) {
+
+
+  const char *params[MAXPARAMS + 1]; /* +1 for the terminator */
+  xsltStylesheetPtr stylesheet = NULL;
+  xmlDocPtr doctree;
+  xmlDocPtr restree;
+  xmlDocPtr ssdoc = NULL;
+  xmlChar *resstr;
+  int resstat;
+  int reslen;
+
+  text *doct  = PG_GETARG_TEXT_P(0);
+  text *ssheet  = PG_GETARG_TEXT_P(1);
+  text *paramstr;
+  text *tres;
+
+
+  if (fcinfo->nargs == 3)
+    {
+      paramstr = PG_GETARG_TEXT_P(2);
+      parse_params(params,paramstr);
+    }
+  else /* No parameters */
+    {
+      params[0] = NULL;
+    }
+
+  /* Setup parser */
+  pgxml_parser_init();
+
+  /* Check to see if document is a file or a literal */
+
+  if (VARDATA(doct)[0] == '<')
+    {
+      doctree = xmlParseMemory((char *) VARDATA(doct), VARSIZE(doct)-VARHDRSZ);
+    } 
+  else 
+    {
+      doctree = xmlParseFile(GET_STR(doct));
+    }
+
+  if (doctree == NULL)
+    {
+      xmlCleanupParser();
+      elog_error(ERROR,"Error parsing XML document",0);
+
+      PG_RETURN_NULL();
+    }
+
+  /* Same for stylesheet */
+  if (VARDATA(ssheet)[0] == '<')
+    {
+      ssdoc = xmlParseMemory((char *) VARDATA(ssheet),
+                                           VARSIZE(ssheet)-VARHDRSZ);
+      if (ssdoc == NULL) 
+       {
+         xmlFreeDoc(doctree);
+         xmlCleanupParser();
+         elog_error(ERROR,"Error parsing stylesheet as XML document",0);         
+         PG_RETURN_NULL();
+       }
+
+      stylesheet = xsltParseStylesheetDoc(ssdoc);
+    }
+  else 
+   {
+      stylesheet = xsltParseStylesheetFile(GET_STR(ssheet));
+    }
+
+
+  if (stylesheet == NULL)
+    {
+      xmlFreeDoc(doctree);
+      xsltCleanupGlobals();
+      xmlCleanupParser();
+      elog_error(ERROR,"Failed to parse stylesheet",0);
+      PG_RETURN_NULL();
+    }
+
+  restree = xsltApplyStylesheet(stylesheet, doctree, params);
+  resstat = xsltSaveResultToString(&resstr, &reslen, restree, stylesheet);
+
+  xsltFreeStylesheet(stylesheet);
+  xmlFreeDoc(restree);
+  xmlFreeDoc(doctree);
+  
+  xsltCleanupGlobals();
+  xmlCleanupParser();
+  
+  if (resstat < 0) {
+    PG_RETURN_NULL();
+  }
+  
+  tres = palloc(reslen + VARHDRSZ);
+  memcpy(VARDATA(tres),resstr,reslen);
+  VARATT_SIZEP(tres) = reslen + VARHDRSZ;
+  
+  PG_RETURN_TEXT_P(tres);
+}
+
+
+void parse_params(const char **params, text *paramstr)
+{
+  char *pos;
+  char *pstr;
+
+  int i;
+  char *nvsep="=";
+  char *itsep=",";
+
+  pstr = GET_STR(paramstr);
+
+  pos=pstr;
+  
+  for (i=0; i < MAXPARAMS; i++) 
+    {
+      params[i] = pos;
+      pos = strstr(pos,nvsep);
+      if (pos != NULL) {
+       *pos = '\0';
+       pos++;
+      } else {
+       params[i]=NULL;
+       break;
+      }
+      /* Value */
+      i++;
+      params[i]=pos;
+      pos = strstr(pos,itsep);
+      if (pos != NULL) {
+       *pos = '\0';
+       pos++;
+      } else {
+       break;
+      }
+
+    }
+  if (i < MAXPARAMS) 
+    {
+      params[i+1]=NULL;
+    }
+}