-# $PostgreSQL: pgsql/contrib/xml/Makefile,v 1.4 2003/11/29 19:51:36 pgsql Exp $
-
+# This makefile will build the new XML and XSLT routines.
subdir = contrib/xml
-top_builddir = ../..
+top_builddir = ../../
include $(top_builddir)/src/Makefile.global
-MODULE_big = pgxml_dom
-OBJS = pgxml_dom.o
-SHLIB_LINK = -lxml2
-DATA_built = pgxml_dom.sql
+MODULE_big = pgxml
+
+# Remove xslt_proc.o from the following line if you don't have libxslt
+OBJS = xpath.o xslt_proc.o
+
+# Remove -lxslt from the following line if you don't have libxslt.
+SHLIB_LINK = -lxml2 -lxslt
+
+DATA_built = pgxml.sql
DOCS = README.pgxml
-include $(top_srcdir)/contrib/contrib-global.mk
+include $(top_builddir)contrib/contrib-global.mk
+
-This package contains some simple routines for manipulating XML
-documents stored in PostgreSQL. This is a work-in-progress and
-somewhat basic at the moment (see the file TODO for some outline of
-what remains to be done).
+XML-handling functions for PostgreSQL
+=====================================
-At present, two modules (based on different XML handling libraries)
-are provided.
+Development of this module was sponsored by Torchbox Ltd. (www.torchbox.com)
-Prerequisite:
+This version of the XML functions provides both XPath querying and
+XSLT functionality. There is also a new table function which allows
+the straightforward return of multiple XML results. Note that the current code
+doesn't take any particular care over character sets - this is
+something that should be fixed at some point!
-pgxml.c:
-expat parser 1.95.0 or newer (http://expat.sourceforge.net)
+Installation
+------------
-or
+The current build process will only work if the files are in
+contrib/xml in a PostgreSQL 7.3 or 7.4 source tree which has been
+configured and built (If you alter the subdir value in the Makefile
+you can place it in a different directory in a PostgreSQL tree).
-pgxml_dom.c:
-libxml2 (http://xmlsoft.org)
+Before you begin, just check the Makefile, and then just 'make' and
+'make install'.
-The libxml2 version provides more complete XPath functionality, and
-seems like a good way to go. I've left the old versions in there for
-comparison.
+This code requires libxml to be previously installed.
-Compiling and loading:
-----------------------
+Description of functions
+------------------------
-The Makefile only builds the libxml2 version.
+The first set of functions are straightforward XML parsing and XPath queries:
-To compile, just type make.
+pgxml_parse(document) RETURNS bool
-Then you can use psql to load the two function definitions:
-\i pgxml_dom.sql
+This parses the document text in its parameter and returns true if the
+document is well-formed XML.
+xpath_string(document,query) RETURNS text
+xpath_number(document,query) RETURNS float4
+xpath_bool(document,query) RETURNS bool
-Function documentation and usage:
----------------------------------
+These functions evaluate the XPath query on the supplied document, and
+cast the result to the specified type.
-pgxml_parse(text) returns bool
- parses the provided text and returns true or false if it is
-well-formed or not. It returns NULL if the parser couldn't be
-created for any reason.
-pgxml_xpath (XQuery functions) - differs between the versions:
+xpath_nodeset(document,query,toptag,itemtag) RETURNS text
-pgxml.c (expat version) has:
+This evaluates query on document and wraps the result in XML tags. If
+the result is multivalued, the output will look like:
-pgxml_xpath(text doc, text xpath, int n) returns text
- parses doc and returns the cdata of the nth occurence of
-the "simple path" entry.
+<toptag>
+<itemtag>Value 1 which could be an XML fragment</itemtag>
+<itemtag>Value 2....</itemtag>
+</toptag>
-However, the remainder of this document will cover the pgxml_dom.c version.
+If either toptag or itemtag is an empty string, the relevant tag is omitted.
+There are also wrapper functions for this operation:
-pgxml_xpath(text doc, text xpath, text toptag, text septag) returns text
- evaluates xpath on doc, and returns the result wrapped in
-<toptag>...</toptag> and each result node wrapped in
-<septag></septag>. toptag and septag may be empty strings, in which
-case the respective tag will be omitted.
+xpath_nodeset(document,query) RETURNS text omits both tags.
+xpath_nodeset(document,query,itemtag) RETURNS text omits toptag.
-Example:
-Given a table docstore:
+xpath_list(document,query,seperator) RETURNS text
- Attribute | Type | Modifier
------------+---------+----------
- docid | integer |
- document | text |
+This function returns multiple values seperated by the specified
+seperator, e.g. Value 1,Value 2,Value 3 if seperator=','.
-containing documents such as (these are archaeological site
-descriptions, in case anyone is wondering):
+xpath_list(document,query) RETURNS text
-<?XML version="1.0"?>
-<site provider="Foundations" sitecode="ak97" version="1">
- <name>Church Farm, Ashton Keynes</name>
- <invtype>watching brief</invtype>
- <location scheme="osgb">SU04209424</location>
-</site>
+This is a wrapper for the above function that uses ',' as the seperator.
-one can type:
-select docid,
-pgxml_xpath(document,'//site/name/text()','','') as sitename,
-pgxml_xpath(document,'//site/location/text()','','') as location
- from docstore;
-
-and get as output:
+xpath_table
+-----------
- docid | sitename | location
--------+--------------------------------------+------------
- 1 | Church Farm, Ashton Keynes | SU04209424
- 2 | Glebe Farm, Long Itchington | SP41506500
- 3 | The Bungalow, Thames Lane, Cricklade | SU10229362
-(3 rows)
+This is a table function which evaluates a set of XPath queries on
+each of a set of documents and returns the results as a table. The
+primary key field from the original document table is returned as the
+first column of the result so that the resultset from xpath_table can
+be readily used in joins.
-or, to illustrate the use of the extra tags:
+The function itself takes 5 arguments, all text.
-select docid as id,
-pgxml_xpath(document,'//find/type/text()','set','findtype')
-from docstore;
+xpath_table(key,document,relation,xpaths,criteria)
- id | pgxml_xpath
-----+-------------------------------------------------------------------------
- 1 | <set></set>
- 2 | <set><findtype>Urn</findtype></set>
- 3 | <set><findtype>Pottery</findtype><findtype>Animal bone</findtype></set>
-(3 rows)
+key - the name of the "key" field - this is just a field to be used as
+the first column of the output table i.e. it identifies the record from
+which each output row came.
-Which produces a new, well-formed document. Note that document 1 had
-no matching instances, so the set returned contains no
-elements. document 2 has 1 matching element and document 3 has 2.
+document - the name of the field containing the XML document
-This is just scratching the surface because XPath allows all sorts of
-operations.
+relation - the name of the table or view containing the documents
-Note: I've only implemented the return of nodeset and string values so
-far. This covers (I think) many types of queries, however.
+xpaths - multiple xpath expressions separated by |
-John Gray <jgray@azuli.co.uk> 16 August 2001
+criteria - The contents of the where clause. This needs to be specified,
+so use "true" or "1=1" here if you want to process all the rows in the
+relation.
+NB These parameters (except the XPath strings) are just substituted
+into a plain SQL SELECT statement, so you have some flexibility - the
+statement is
+SELECT <key>,<document> FROM <relation> WHERE <criteria>
+
+so those parameters can be *anything* valid in those particular
+locations. The result from this SELECT needs to return exactly two
+columns (which it will unless you try to list multiple fields for key
+or document). Beware that this simplistic approach requires that you
+validate any user-supplied values to avoid SQL injection attacks.
+
+Using the function
+
+The function has to be used in a FROM expression. This gives the following
+form:
+
+SELECT * FROM
+xpath_table('article_id',
+ 'article_xml',
+ 'articles',
+ '/article/author|/article/pages|/article/title',
+ 'date_entered > ''2003-01-01'' ')
+AS t(article_id integer, author text, page_count integer, title text);
+
+The AS clause defines the names and types of the columns in the
+virtual table. If there are more XPath queries than result columns,
+the extra queries will be ignored. If there are more result columns
+than XPath queries, the extra columns will be NULL.
+
+Note that I've said in this example that pages is an integer. The
+function deals internally with string representations, so when you say
+you want an integer in the output, it will take the string
+representation of the XPath result and use PostgreSQL input functions
+to transform it into an integer (or whatever type the AS clause
+requests). An error will result if it can't do this - for example if
+the result is empty - so you may wish to just stick to 'text' as the
+column type if you think your data has any problems.
+
+The select statement doesn't need to use * alone - it can reference the
+columns by name or join them to other tables. The function produces a
+virtual table with which you can perform any operation you wish (e.g.
+aggregation, joining, sorting etc). So we could also have:
+
+SELECT t.title, p.fullname, p.email
+FROM xpath_table('article_id','article_xml','articles',
+ '/article/title|/article/author/@id',
+ 'xpath_string(article_xml,''/article/@date'') > ''2003-03-20'' ')
+ AS t(article_id integer, title text, author_id integer),
+ tblPeopleInfo AS p
+WHERE t.author_id = p.person_id;
+
+as a more complicated example. Of course, you could wrap all
+of this in a view for convenience.
+
+XSLT functions
+--------------
+
+The following functions are available if libxslt is installed (this is
+not currently detected automatically, so you will have to amend the
+Makefile)
+
+xslt_process(document,stylesheet,paramlist) RETURNS text
+
+This function appplies the XSL stylesheet to the document and returns
+the transformed result. The paramlist is a list of parameter
+assignments to be used in the transformation, specified in the form
+'a=1,b=2'. Note that this is also proof-of-concept code and the
+parameter parsing is very simple-minded (e.g. parameter values cannot
+contain commas!)
+
+Also note that if either the document or stylesheet values do not
+begin with a < then they will be treated as URLs and libxslt will
+fetch them. It thus follows that you can use xslt_process as a means
+to fetch the contents of URLs - you should be aware of the security
+implications of this.
+
+There is also a two-parameter version of xslt_process which does not
+pass any parameters to the transformation.
+
+If you have any comments or suggestions, please do contact me at
+jgray@azuli.co.uk. Unfortunately, this isn't my main job, so I can't
+guarantee a rapid response to your query!
+++ /dev/null
-PGXML TODO List
-===============
-
-Some of these items still require much more thought! Since the first
-release, the XPath support has improved (because I'm no longer using a
-homemade algorithm!).
-
-1. Performance considerations
-
-At present each document is parsed to produce the DOM tree on every query.
-
-Pros:
- Easy
- No persistent memory or storage allocation for parsed trees
- (libxml docs suggest representation of a document might
- be 4 times the size of the text)
-
-Cons:
- Slow/ CPU intensive to parse.
- Makes it difficult for PLs to apply libxml manipulations to create
- new documents or amend existing ones.
-
-
-2. XQuery
-
-I'm not sure if the addition of XQuery would be best as a function or
-as a new front-end parser. This is one to think about, but with a
-decent implementation of XPath, one of the prerequisites is covered.
-
-3. DOM Interfaces
-
-Expose more aspects of the DOM to user functions/ PLs. This would
-allow a procedure in a PL to run some queries and then use exposed
-interfaces to libxml to create an XML document out of the query
-results. I accept the argument that this might be more properly
-performed on the client side.
-
-4. Returning sets of documents from XPath queries.
-
-Although the current implementation allows you to amalgamate the
-returned results into a single document, it's quite possible that
-you'd like to use the returned set of nodes as a source for FROM.
-
-Is there a good way to optimise/index the results of certain XPath
-operations to make them faster?:
-
-select docid, pgxml_xpath(document,'//site/location/text()','','') as location
-where pgxml_xpath(document,'//site/name/text()','','') = 'Church Farm';
-
-and with multiple element occurences in a document?
-
-select d.docid, pgxml_xpath(d.document,'//site/location/text()','','')
-from docstore d,
-pgxml_xpaths('docstore','document','//feature/type/text()','docid') ft
-where ft.key = d.docid and ft.value ='Limekiln';
-
-pgxml_xpaths params are relname, attrname, xpath, returnkey. It would
-return a set of two-element tuples (key,value) consisting of the value of
-returnkey, and the cdata value of the xpath. The XML document would be
-defined by relname and attrname.
-
-The pgxml_xpaths function could be the basis of a functional index,
-which could speed up the above query very substantially, working
-through the normal query planner mechanism.
-
-5. Return type support.
-
-Better support for returning e.g. numeric or boolean values. I need to
-get to grips with the returned data from libxml first.
-
-
-John Gray <jgray@azuli.co.uk> 16 August 2001
-
-
-
-
-
-
+++ /dev/null
-/********************************************************
- * Interface code to parse an XML document using expat
- ********************************************************/
-
-#include "postgres.h"
-#include "fmgr.h"
-
-#include "expat.h"
-#include "pgxml.h"
-
-/* Memory management - we make expat use standard pg MM */
-
-XML_Memory_Handling_Suite mhs;
-
-/* passthrough functions (palloc is a macro) */
-
-static void *
-pgxml_palloc(size_t size)
-{
- return palloc(size);
-}
-
-static void *
-pgxml_repalloc(void *ptr, size_t size)
-{
- return repalloc(ptr, size);
-}
-
-static void
-pgxml_pfree(void *ptr)
-{
- return pfree(ptr);
-}
-
-static void
-pgxml_mhs_init()
-{
- mhs.malloc_fcn = pgxml_palloc;
- mhs.realloc_fcn = pgxml_repalloc;
- mhs.free_fcn = pgxml_pfree;
-}
-
-static void
-pgxml_handler_init()
-{
- /*
- * This code should set up the relevant handlers from user-supplied
- * settings. Quite how these settings are made is another matter :)
- */
-}
-
-/* Returns true if document is well-formed */
-
-PG_FUNCTION_INFO_V1(pgxml_parse);
-
-Datum
-pgxml_parse(PG_FUNCTION_ARGS)
-{
- /* called as pgxml_parse(document) */
- XML_Parser p;
- text *t = PG_GETARG_TEXT_P(0); /* document buffer */
- int32 docsize = VARSIZE(t) - VARHDRSZ;
-
- pgxml_mhs_init();
-
- pgxml_handler_init();
-
- p = XML_ParserCreate_MM(NULL, &mhs, NULL);
- if (!p)
- {
- ereport(ERROR,
- (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
- errmsg("could not create expat parser")));
- PG_RETURN_NULL(); /* seems appropriate if we couldn't parse */
- }
-
- if (!XML_Parse(p, (char *) VARDATA(t), docsize, 1))
- {
- /*
- * elog(WARNING, "Parse error at line %d:%s",
- * XML_GetCurrentLineNumber(p),
- * XML_ErrorString(XML_GetErrorCode(p)));
- */
- XML_ParserFree(p);
- PG_RETURN_BOOL(false);
- }
-
- XML_ParserFree(p);
- PG_RETURN_BOOL(true);
-}
-
-/* XPath handling functions */
-
-/* XPath support here is for a very skeletal kind of XPath!
- It was easy to program though... */
-
-/* This first is the core function that builds a result set. The
- actual functions called by the user manipulate that result set
- in various ways.
-*/
-
-static XPath_Results *
-build_xpath_results(text *doc, text *pathstr)
-{
- XPath_Results *xpr;
- char *res;
- pgxml_udata *udata;
- XML_Parser p;
- int32 docsize;
-
- xpr = (XPath_Results *) palloc((sizeof(XPath_Results)));
- memset((void *) xpr, 0, sizeof(XPath_Results));
- xpr->rescount = 0;
-
- docsize = VARSIZE(doc) - VARHDRSZ;
-
- /* res isn't going to be the real return type, it is just a buffer */
-
- res = (char *) palloc(docsize);
- memset((void *) res, 0, docsize);
-
- xpr->resbuf = res;
-
- udata = (pgxml_udata *) palloc((sizeof(pgxml_udata)));
- memset((void *) udata, 0, sizeof(pgxml_udata));
-
- udata->currentpath[0] = '\0';
- udata->textgrab = 0;
-
- udata->path = (char *) palloc(VARSIZE(pathstr));
- memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr) - VARHDRSZ);
-
- udata->path[VARSIZE(pathstr) - VARHDRSZ] = '\0';
-
- udata->resptr = res;
- udata->reslen = 0;
-
- udata->xpres = xpr;
-
- /* Now fire up the parser */
- pgxml_mhs_init();
-
- p = XML_ParserCreate_MM(NULL, &mhs, NULL);
- if (!p)
- {
- ereport(ERROR,
- (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
- errmsg("could not create expat parser")));
- pfree(xpr);
- pfree(udata->path);
- pfree(udata);
- pfree(res);
- return NULL;
- }
- XML_SetUserData(p, (void *) udata);
-
- /* Set the handlers */
-
- XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler);
- XML_SetCharacterDataHandler(p, pgxml_charhandler);
-
- if (!XML_Parse(p, (char *) VARDATA(doc), docsize, 1))
- {
- /*
- * elog(WARNING, "Parse error at line %d:%s",
- * XML_GetCurrentLineNumber(p),
- * XML_ErrorString(XML_GetErrorCode(p)));
- */
- XML_ParserFree(p);
- pfree(xpr);
- pfree(udata->path);
- pfree(udata);
-
- return NULL;
- }
-
- pfree(udata->path);
- pfree(udata);
- XML_ParserFree(p);
- return xpr;
-}
-
-
-PG_FUNCTION_INFO_V1(pgxml_xpath);
-
-Datum
-pgxml_xpath(PG_FUNCTION_ARGS)
-{
- /* called as pgxml_xpath(document,pathstr, index) for the moment */
-
- XPath_Results *xpresults;
- text *restext;
-
- text *t = PG_GETARG_TEXT_P(0); /* document buffer */
- text *t2 = PG_GETARG_TEXT_P(1);
- int32 ind = PG_GETARG_INT32(2) - 1;
-
- xpresults = build_xpath_results(t, t2);
-
- /*
- * This needs to be changed depending on the mechanism for returning
- * our set of results.
- */
-
- if (xpresults == NULL) /* parse error (not WF or parser failure) */
- PG_RETURN_NULL();
-
- if (ind >= (xpresults->rescount))
- PG_RETURN_NULL();
-
- restext = (text *) palloc(xpresults->reslens[ind] + VARHDRSZ);
- memcpy(VARDATA(restext), xpresults->results[ind], xpresults->reslens[ind]);
-
- VARATT_SIZEP(restext) = xpresults->reslens[ind] + VARHDRSZ;
-
- pfree(xpresults->resbuf);
- pfree(xpresults);
-
- PG_RETURN_TEXT_P(restext);
-}
-
-
-static void
-pgxml_pathcompare(void *userData)
-{
- char *matchpos;
-
- matchpos = strstr(UD->currentpath, UD->path);
-
- if (matchpos == NULL)
- { /* Should we have more logic here ? */
- if (UD->textgrab)
- {
- UD->textgrab = 0;
- pgxml_finalisegrabbedtext(userData);
- }
- return;
- }
-
- /*
- * OK, we have a match of some sort. Now we need to check that our
- * match is anchored to the *end* of the string AND that it is
- * immediately preceded by a '/'
- */
-
- /*
- * This test wouldn't work if strlen (UD->path) overran the length of
- * the currentpath, but that's not possible because we got a match!
- */
-
- if ((matchpos + strlen(UD->path))[0] == '\0')
- {
- if ((UD->path)[0] == '/')
- {
- if (matchpos == UD->currentpath)
- UD->textgrab = 1;
- }
- else
- {
- if ((matchpos - 1)[0] == '/')
- UD->textgrab = 1;
- }
- }
-}
-
-static void
-pgxml_starthandler(void *userData, const XML_Char * name,
- const XML_Char ** atts)
-{
-
- char sepstr[] = "/";
-
- if ((strlen(name) + strlen(UD->currentpath)) > MAXPATHLENGTH - 2)
- elog(WARNING, "path too long");
- else
- {
- strncat(UD->currentpath, sepstr, 1);
- strcat(UD->currentpath, name);
- }
- if (UD->textgrab)
- {
- /*
- * Depending on user preference, should we "reconstitute" the
- * element into the result text?
- */
- }
- else
- pgxml_pathcompare(userData);
-}
-
-static void
-pgxml_endhandler(void *userData, const XML_Char * name)
-{
- /*
- * Start by removing the current element off the end of the
- * currentpath
- */
-
- char *sepptr;
-
- sepptr = strrchr(UD->currentpath, '/');
- if (sepptr == NULL)
- {
- /* internal error */
- elog(ERROR, "did not find '/'");
- sepptr = UD->currentpath;
- }
- if (strcmp(name, sepptr + 1) != 0)
- {
- elog(WARNING, "wanted [%s], got [%s]", sepptr, name);
- /* unmatched entry, so do nothing */
- }
- else
- {
- sepptr[0] = '\0'; /* Chop that element off the end */
- }
-
- if (UD->textgrab)
- pgxml_pathcompare(userData);
-
-}
-
-static void
-pgxml_charhandler(void *userData, const XML_Char * s, int len)
-{
- if (UD->textgrab)
- {
- if (len > 0)
- {
- memcpy(UD->resptr, s, len);
- UD->resptr += len;
- UD->reslen += len;
- }
- }
-}
-
-/* Should I be using PG list types here? */
-
-static void
-pgxml_finalisegrabbedtext(void *userData)
-{
- /* In res/reslen, we have a single result. */
- UD->xpres->results[UD->xpres->rescount] = UD->resptr - UD->reslen;
- UD->xpres->reslens[UD->xpres->rescount] = UD->reslen;
- UD->reslen = 0;
- UD->xpres->rescount++;
-
- /*
- * This effectively concatenates all the results together but we do
- * know where one ends and the next begins
- */
-}
+++ /dev/null
-/* Header for pg xml parser interface */
-
-static void *pgxml_palloc(size_t size);
-static void *pgxml_repalloc(void *ptr, size_t size);
-static void pgxml_pfree(void *ptr);
-static void pgxml_mhs_init();
-static void pgxml_handler_init();
-Datum pgxml_parse(PG_FUNCTION_ARGS);
-Datum pgxml_xpath(PG_FUNCTION_ARGS);
-static void pgxml_starthandler(void *userData, const XML_Char * name,
- const XML_Char ** atts);
-static void pgxml_endhandler(void *userData, const XML_Char * name);
-static void pgxml_charhandler(void *userData, const XML_Char * s, int len);
-static void pgxml_pathcompare(void *userData);
-static void pgxml_finalisegrabbedtext(void *userData);
-
-#define MAXPATHLENGTH 512
-#define MAXRESULTS 100
-
-
-typedef struct
-{
- int rescount;
- char *results[MAXRESULTS];
- int32 reslens[MAXRESULTS];
- char *resbuf; /* pointer to the result buffer for pfree */
-} XPath_Results;
-
-
-
-typedef struct
-{
- char currentpath[MAXPATHLENGTH];
- char *path;
- int textgrab;
- char *resptr;
- int32 reslen;
- XPath_Results *xpres;
-} pgxml_udata;
-
-
-#define UD ((pgxml_udata *) userData)
--- SQL for XML parser
+--SQL for XML parser
--- Adjust this setting to control where the objects get created.
-SET search_path TO public;
+CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS bool
+ AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
-CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS boolean
- AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
+CREATE OR REPLACE FUNCTION xpath_string(text,text) RETURNS text
+ AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
-CREATE OR REPLACE FUNCTION pgxml_xpath(text, text, text, text) RETURNS text
- AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
+CREATE OR REPLACE FUNCTION xpath_nodeset(text,text,text,text) RETURNS text
+ AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
+
+CREATE OR REPLACE FUNCTION xpath_number(text,text) RETURNS float4
+ AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
+
+CREATE OR REPLACE FUNCTION xpath_bool(text,text) RETURNS boolean
+ AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
+
+-- List function
+
+CREATE OR REPLACE FUNCTION xpath_list(text,text,text) RETURNS text
+ AS 'MODULE_PATHNAME'
+ LANGUAGE 'c' WITH (isStrict);
+
+
+CREATE OR REPLACE FUNCTION xpath_list(text,text) RETURNS text
+AS 'SELECT xpath_list($1,$2,'','')' language 'SQL' WITH (isStrict);
+
+
+
+-- Wrapper functions for nodeset where no tags needed.
+
+
+CREATE OR REPLACE FUNCTION xpath_nodeset(text,text) RETURNS text AS
+'SELECT xpath_nodeset($1,$2,'''','''')' language 'SQL' WITH (isStrict);
+
+
+CREATE OR REPLACE FUNCTION xpath_nodeset(text,text,text) RETURNS text AS
+'SELECT xpath_nodeset($1,$2,'''',$3)' language 'SQL' WITH (isStrict);
+
+-- Table function
+
+CREATE OR REPLACE FUNCTION xpath_table(text,text,text,text,text) RETURNS setof record
+ AS 'MODULE_PATHNAME'
+ LANGUAGE 'c' WITH (isStrict);
+
+-- XSLT functions
+-- Delete from here to the end of the file if you are not compiling with
+-- XSLT support.
+
+
+CREATE OR REPLACE FUNCTION xslt_process(text,text,text) RETURNS text
+ AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
+
+-- the function checks for the correct argument count
+
+CREATE OR REPLACE FUNCTION xslt_process(text,text) RETURNS text
+ AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
+++ /dev/null
-/* Parser interface for DOM-based parser (libxml) rather than
- stream-based SAX-type parser */
-
-#include "postgres.h"
-#include "fmgr.h"
-
-/* libxml includes */
-
-#include <libxml/xpath.h>
-#include <libxml/tree.h>
-#include <libxml/xmlmemory.h>
-
-/* declarations */
-
-static void *pgxml_palloc(size_t size);
-static void *pgxml_repalloc(void *ptr, size_t size);
-static void pgxml_pfree(void *ptr);
-static char *pgxml_pstrdup(const char *string);
-
-static void pgxml_parser_init();
-
-static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset, xmlDocPtr doc,
- xmlChar * toptagname, xmlChar * septagname,
- int format);
-
-static xmlChar *pgxml_texttoxmlchar(text *textstring);
-
-
-Datum pgxml_parse(PG_FUNCTION_ARGS);
-Datum pgxml_xpath(PG_FUNCTION_ARGS);
-
-/* memory handling passthrough functions (e.g. palloc, pstrdup are
- currently macros, and the others might become so...) */
-
-static void *
-pgxml_palloc(size_t size)
-{
- return palloc(size);
-}
-
-static void *
-pgxml_repalloc(void *ptr, size_t size)
-{
- return repalloc(ptr, size);
-}
-
-static void
-pgxml_pfree(void *ptr)
-{
- return pfree(ptr);
-}
-
-static char *
-pgxml_pstrdup(const char *string)
-{
- return pstrdup(string);
-}
-
-static void
-pgxml_parser_init()
-{
- /*
- * This code should also set parser settings from user-supplied info.
- * Quite how these settings are made is another matter :)
- */
-
- xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup);
- xmlInitParser();
-
-}
-
-
-/* Returns true if document is well-formed */
-
-PG_FUNCTION_INFO_V1(pgxml_parse);
-
-Datum
-pgxml_parse(PG_FUNCTION_ARGS)
-{
- /* called as pgxml_parse(document) */
- xmlDocPtr doctree;
- text *t = PG_GETARG_TEXT_P(0); /* document buffer */
- int32 docsize = VARSIZE(t) - VARHDRSZ;
-
- pgxml_parser_init();
-
- doctree = xmlParseMemory((char *) VARDATA(t), docsize);
- if (doctree == NULL)
- {
- xmlCleanupParser();
- PG_RETURN_BOOL(false); /* i.e. not well-formed */
- }
- xmlCleanupParser();
- xmlFreeDoc(doctree);
- PG_RETURN_BOOL(true);
-}
-
-static xmlChar
-*
-pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
- xmlDocPtr doc,
- xmlChar * toptagname,
- xmlChar * septagname,
- int format)
-{
- /* Function translates a nodeset into a text representation */
-
- /*
- * iterates over each node in the set and calls xmlNodeDump to write
- * it to an xmlBuffer -from which an xmlChar * string is returned.
- */
- /* each representation is surrounded by <tagname> ... </tagname> */
- /* if format==0, add a newline between nodes?? */
-
- xmlBufferPtr buf;
- xmlChar *result;
- int i;
-
- buf = xmlBufferCreate();
-
- if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
- {
- xmlBufferWriteChar(buf, "<");
- xmlBufferWriteCHAR(buf, toptagname);
- xmlBufferWriteChar(buf, ">");
- }
- if (nodeset != NULL)
- {
- for (i = 0; i < nodeset->nodeNr; i++)
- {
- if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
- {
- xmlBufferWriteChar(buf, "<");
- xmlBufferWriteCHAR(buf, septagname);
- xmlBufferWriteChar(buf, ">");
- }
- xmlNodeDump(buf, doc, nodeset->nodeTab[i], 1, (format == 2));
-
- if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
- {
- xmlBufferWriteChar(buf, "</");
- xmlBufferWriteCHAR(buf, septagname);
- xmlBufferWriteChar(buf, ">");
- }
- if (format)
- xmlBufferWriteChar(buf, "\n");
- }
- }
-
- if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
- {
- xmlBufferWriteChar(buf, "</");
- xmlBufferWriteCHAR(buf, toptagname);
- xmlBufferWriteChar(buf, ">");
- }
- result = xmlStrdup(buf->content);
- xmlBufferFree(buf);
- return result;
-}
-
-static xmlChar *
-pgxml_texttoxmlchar(text *textstring)
-{
- xmlChar *res;
- int32 txsize;
-
- txsize = VARSIZE(textstring) - VARHDRSZ;
- res = (xmlChar *) palloc(txsize + 1);
- memcpy((char *) res, VARDATA(textstring), txsize);
- res[txsize] = '\0';
- return res;
-}
-
-
-PG_FUNCTION_INFO_V1(pgxml_xpath);
-
-Datum
-pgxml_xpath(PG_FUNCTION_ARGS)
-{
- xmlDocPtr doctree;
- xmlXPathContextPtr ctxt;
- xmlXPathObjectPtr res;
- xmlChar *xpath,
- *xpresstr,
- *toptag,
- *septag;
- xmlXPathCompExprPtr comppath;
-
- int32 docsize,
- ressize;
- text *t,
- *xpres;
-
- t = PG_GETARG_TEXT_P(0); /* document buffer */
- xpath = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(1)); /* XPath expression */
- toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
- septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(3));
-
- docsize = VARSIZE(t) - VARHDRSZ;
-
- pgxml_parser_init();
-
- doctree = xmlParseMemory((char *) VARDATA(t), docsize);
- if (doctree == NULL)
- { /* not well-formed */
- xmlCleanupParser();
- PG_RETURN_NULL();
- }
-
- ctxt = xmlXPathNewContext(doctree);
- ctxt->node = xmlDocGetRootElement(doctree);
-
- /* compile the path */
- comppath = xmlXPathCompile(xpath);
- if (comppath == NULL)
- {
- elog(WARNING, "XPath syntax error");
- xmlFreeDoc(doctree);
- pfree((void *) xpath);
- xmlCleanupParser();
- PG_RETURN_NULL();
- }
-
- /* Now evaluate the path expression. */
- res = xmlXPathCompiledEval(comppath, ctxt);
- xmlXPathFreeCompExpr(comppath);
-
- if (res == NULL)
- {
- xmlFreeDoc(doctree);
- pfree((void *) xpath);
- xmlCleanupParser();
- PG_RETURN_NULL(); /* seems appropriate */
- }
- /* now we dump this node, ?surrounding by tags? */
- /* To do this, we look first at the type */
- switch (res->type)
- {
- case XPATH_NODESET:
- xpresstr = pgxmlNodeSetToText(res->nodesetval,
- doctree,
- toptag, septag, 0);
- break;
- case XPATH_STRING:
- xpresstr = xmlStrdup(res->stringval);
- break;
- default:
- elog(WARNING, "Unsupported XQuery result: %d", res->type);
- xpresstr = xmlStrdup("<unsupported/>");
- }
-
-
- /* Now convert this result back to text */
- ressize = strlen(xpresstr);
- xpres = (text *) palloc(ressize + VARHDRSZ);
- memcpy(VARDATA(xpres), xpresstr, ressize);
- VARATT_SIZEP(xpres) = ressize + VARHDRSZ;
-
- /* Free various storage */
- xmlFreeDoc(doctree);
- pfree((void *) xpath);
- xmlFree(xpresstr);
- xmlCleanupParser();
- PG_RETURN_TEXT_P(xpres);
-}
+++ /dev/null
--- SQL for XML parser
-
--- Adjust this setting to control where the objects get created.
-SET search_path TO public;
-
-CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS boolean
- AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
-
-CREATE OR REPLACE FUNCTION pgxml_xpath(text, text, text, text) RETURNS text
- AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
--- /dev/null
+/* Parser interface for DOM-based parser (libxml) rather than
+ stream-based SAX-type parser */
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "lib/stringinfo.h"
+
+/* libxml includes */
+
+#include <libxml/xpath.h>
+#include <libxml/tree.h>
+#include <libxml/xmlmemory.h>
+#include <libxml/xmlerror.h>
+#include <libxml/parserInternals.h>
+
+/* declarations */
+
+static void *pgxml_palloc(size_t size);
+static void *pgxml_repalloc(void *ptr, size_t size);
+static void pgxml_pfree(void *ptr);
+static char *pgxml_pstrdup(const char *string);
+static void pgxml_errorHandler (void * ctxt, const char *msg, ...);
+
+void elog_error(int level, char *explain, int force);
+void pgxml_parser_init(void);
+
+static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
+ xmlChar * toptagname, xmlChar * septagname,
+ xmlChar * plainsep);
+
+text *pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar *toptag,
+ xmlChar *septag, xmlChar *plainsep);
+
+xmlChar *pgxml_texttoxmlchar(text *textstring);
+
+static xmlXPathObjectPtr pgxml_xpath(text *document, xmlChar* xpath);
+
+
+Datum pgxml_parse(PG_FUNCTION_ARGS);
+Datum xpath_nodeset(PG_FUNCTION_ARGS);
+Datum xpath_string(PG_FUNCTION_ARGS);
+Datum xpath_number(PG_FUNCTION_ARGS);
+Datum xpath_bool(PG_FUNCTION_ARGS);
+Datum xpath_list(PG_FUNCTION_ARGS);
+Datum xpath_table(PG_FUNCTION_ARGS);
+
+/* Global variables */
+char *errbuf; /* per line error buffer */
+char *pgxml_errorMsg = NULL; /* overall error message */
+
+/* Convenience macros */
+
+#define GET_TEXT(cstrp) DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(cstrp)))
+#define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
+
+#define ERRBUF_SIZE 200
+
+/* memory handling passthrough functions (e.g. palloc, pstrdup are
+ currently macros, and the others might become so...) */
+
+static void *
+pgxml_palloc(size_t size)
+{
+/* elog(DEBUG1,"Alloc %d in CMC %x",size,CurrentMemoryContext); */
+ return palloc(size);
+}
+
+static void *
+pgxml_repalloc(void *ptr, size_t size)
+{
+/* elog(DEBUG1,"ReAlloc in CMC %x",CurrentMemoryContext);*/
+ return repalloc(ptr, size);
+}
+
+static void
+pgxml_pfree(void *ptr)
+{
+/* elog(DEBUG1,"Free in CMC %x",CurrentMemoryContext); */
+ return pfree(ptr);
+}
+
+static char *
+pgxml_pstrdup(const char *string)
+{
+ return pstrdup(string);
+}
+
+/* The error handling function. This formats an error message and sets
+ * a flag - an ereport will be issued prior to return
+ */
+
+static void
+pgxml_errorHandler (void * ctxt, const char *msg, ...)
+{
+ va_list args;
+
+ va_start(args, msg);
+ vsnprintf(errbuf, ERRBUF_SIZE, msg, args);
+ va_end(args);
+ /* Now copy the argument across */
+ if (pgxml_errorMsg == NULL)
+ {
+ pgxml_errorMsg = pstrdup(errbuf);
+ }
+else
+ {
+ int32 xsize = strlen(pgxml_errorMsg);
+ pgxml_errorMsg = repalloc(pgxml_errorMsg,
+ (size_t) (xsize + strlen(errbuf) + 1));
+ strncpy(&pgxml_errorMsg[xsize-1],errbuf,strlen(errbuf));
+ pgxml_errorMsg[xsize+strlen(errbuf)-1]='\0';
+
+ }
+ memset(errbuf,0,ERRBUF_SIZE);
+}
+
+/* This function reports the current message at the level specified */
+void elog_error(int level, char *explain, int force)
+{
+ if (force || (pgxml_errorMsg != NULL))
+ {
+ if (pgxml_errorMsg == NULL)
+ {
+ ereport(level,(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
+ errmsg(explain)));
+ }
+ else
+ {
+ ereport(level,(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
+ errmsg("%s:%s",explain,pgxml_errorMsg)));
+ pfree(pgxml_errorMsg);
+ }
+ }
+}
+
+void
+pgxml_parser_init()
+{
+ /*
+ * This code could also set parser settings from user-supplied info.
+ * Quite how these settings are made is another matter :)
+ */
+
+ xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup);
+ xmlInitParser();
+
+ xmlSetGenericErrorFunc(NULL, pgxml_errorHandler);
+
+ xmlSubstituteEntitiesDefault(1);
+ xmlLoadExtDtdDefaultValue = 1;
+
+ pgxml_errorMsg = NULL;
+
+ errbuf = palloc(200);
+ memset(errbuf,0,200);
+
+}
+
+
+/* Returns true if document is well-formed */
+
+PG_FUNCTION_INFO_V1(pgxml_parse);
+
+Datum
+pgxml_parse(PG_FUNCTION_ARGS)
+{
+ /* called as pgxml_parse(document) */
+ xmlDocPtr doctree;
+ text *t = PG_GETARG_TEXT_P(0); /* document buffer */
+ int32 docsize = VARSIZE(t) - VARHDRSZ;
+
+ pgxml_parser_init();
+
+ doctree = xmlParseMemory((char *) VARDATA(t), docsize);
+ if (doctree == NULL)
+ {
+ xmlCleanupParser();
+ PG_RETURN_BOOL(false); /* i.e. not well-formed */
+ }
+ xmlCleanupParser();
+ xmlFreeDoc(doctree);
+ PG_RETURN_BOOL(true);
+}
+
+
+static xmlChar
+*
+pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
+ xmlChar * toptagname,
+ xmlChar * septagname,
+ xmlChar * plainsep)
+{
+ /* Function translates a nodeset into a text representation */
+
+ /*
+ * iterates over each node in the set and calls xmlNodeDump to write
+ * it to an xmlBuffer -from which an xmlChar * string is returned.
+ */
+
+ /* each representation is surrounded by <tagname> ... </tagname> */
+ /* plainsep is an ordinary (not tag) seperator - if used, then
+ * nodes are cast to string as output method */
+
+
+ xmlBufferPtr buf;
+ xmlChar *result;
+ int i;
+
+ buf = xmlBufferCreate();
+
+ if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
+ {
+ xmlBufferWriteChar(buf, "<");
+ xmlBufferWriteCHAR(buf, toptagname);
+ xmlBufferWriteChar(buf, ">");
+ }
+ if (nodeset != NULL)
+ {
+ for (i = 0; i < nodeset->nodeNr; i++)
+ {
+
+ if (plainsep != NULL) {
+ xmlBufferWriteCHAR(buf,
+ xmlXPathCastNodeToString(nodeset->nodeTab[i]));
+
+ /* If this isn't the last entry, write the plain sep. */
+ if (i < (nodeset->nodeNr)-1) {
+ xmlBufferWriteChar(buf, plainsep);
+ }
+ } else {
+
+
+ if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
+ {
+ xmlBufferWriteChar(buf, "<");
+ xmlBufferWriteCHAR(buf, septagname);
+ xmlBufferWriteChar(buf, ">");
+ }
+ xmlNodeDump(buf,
+ nodeset->nodeTab[i]->doc,
+ nodeset->nodeTab[i],
+ 1, 0);
+
+ if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
+ {
+ xmlBufferWriteChar(buf, "</");
+ xmlBufferWriteCHAR(buf, septagname);
+ xmlBufferWriteChar(buf, ">");
+ }
+ }
+ }
+ }
+
+ if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
+ {
+ xmlBufferWriteChar(buf, "</");
+ xmlBufferWriteCHAR(buf, toptagname);
+ xmlBufferWriteChar(buf, ">");
+ }
+ result = xmlStrdup(buf->content);
+ xmlBufferFree(buf);
+ return result;
+}
+
+
+/* Translate a PostgreSQL "varlena" -i.e. a variable length parameter
+ * into the libxml2 representation
+ */
+
+xmlChar *
+pgxml_texttoxmlchar(text *textstring)
+{
+ xmlChar *res;
+ int32 txsize;
+
+ txsize = VARSIZE(textstring) - VARHDRSZ;
+ res = (xmlChar *) palloc(txsize + 1);
+ memcpy((char *) res, VARDATA(textstring), txsize);
+ res[txsize] = '\0';
+ return res;
+}
+
+/* Public visible XPath functions */
+
+/* This is a "raw" xpath function. Check that it returns child elements
+ * properly
+ */
+
+PG_FUNCTION_INFO_V1(xpath_nodeset);
+
+Datum
+xpath_nodeset(PG_FUNCTION_ARGS)
+{
+ xmlChar *xpath, *toptag, *septag;
+ int32 pathsize;
+ text
+ *xpathsupp,
+ *xpres;
+
+ /* PG_GETARG_TEXT_P(0) is document buffer */
+ xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
+
+ toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
+ septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(3));
+
+ pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
+
+ xpath = pgxml_texttoxmlchar(xpathsupp);
+
+ xpres = pgxml_result_to_text(
+ pgxml_xpath(PG_GETARG_TEXT_P(0),xpath),
+ toptag,septag,NULL);
+
+ /* xmlCleanupParser(); done by result_to_text routine */
+ pfree((void *) xpath);
+
+ if (xpres == NULL)
+ {
+ PG_RETURN_NULL();
+ }
+ PG_RETURN_TEXT_P(xpres);
+}
+
+// The following function is almost identical, but returns the elements in
+// a list.
+
+PG_FUNCTION_INFO_V1(xpath_list);
+
+Datum
+xpath_list(PG_FUNCTION_ARGS)
+{
+ xmlChar *xpath, *plainsep;
+ int32 pathsize;
+ text
+ *xpathsupp,
+ *xpres;
+
+ /* PG_GETARG_TEXT_P(0) is document buffer */
+ xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
+
+ plainsep = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
+
+ pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
+
+ xpath = pgxml_texttoxmlchar(xpathsupp);
+
+ xpres = pgxml_result_to_text(
+ pgxml_xpath(PG_GETARG_TEXT_P(0),xpath),
+ NULL,NULL,plainsep);
+
+ /* xmlCleanupParser(); done by result_to_text routine */
+ pfree((void *) xpath);
+
+ if (xpres == NULL)
+ {
+ PG_RETURN_NULL();
+ }
+ PG_RETURN_TEXT_P(xpres);
+}
+
+
+PG_FUNCTION_INFO_V1(xpath_string);
+
+Datum
+xpath_string(PG_FUNCTION_ARGS)
+{
+ xmlChar *xpath;
+ int32 pathsize;
+ text
+ *xpathsupp,
+ *xpres;
+
+ /* PG_GETARG_TEXT_P(0) is document buffer */
+ xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
+
+ pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
+
+ /* We encapsulate the supplied path with "string()"
+ * = 8 chars + 1 for NUL at end */
+ /* We could try casting to string using the libxml function? */
+
+ xpath =(xmlChar *) palloc(pathsize + 9);
+ memcpy((char *) (xpath+7), VARDATA(xpathsupp), pathsize);
+ strncpy((char *) xpath, "string(",7);
+ xpath[pathsize+7] = ')';
+ xpath[pathsize+8] = '\0';
+
+ xpres = pgxml_result_to_text(
+ pgxml_xpath(PG_GETARG_TEXT_P(0),xpath),
+ NULL,NULL,NULL);
+
+ xmlCleanupParser();
+ pfree((void *) xpath);
+
+ if (xpres == NULL)
+ {
+ PG_RETURN_NULL();
+ }
+ PG_RETURN_TEXT_P(xpres);
+}
+
+
+PG_FUNCTION_INFO_V1(xpath_number);
+
+Datum
+xpath_number(PG_FUNCTION_ARGS)
+{
+ xmlChar *xpath;
+ int32 pathsize;
+ text
+ *xpathsupp;
+
+ float4 fRes;
+
+ xmlXPathObjectPtr res;
+
+ /* PG_GETARG_TEXT_P(0) is document buffer */
+ xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
+
+ pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
+
+ xpath = pgxml_texttoxmlchar(xpathsupp);
+
+ res = pgxml_xpath(PG_GETARG_TEXT_P(0),xpath);
+ pfree((void *) xpath);
+
+ if (res == NULL)
+ {
+ xmlCleanupParser();
+ PG_RETURN_NULL();
+ }
+
+ fRes = xmlXPathCastToNumber(res);
+ xmlCleanupParser();
+ if (xmlXPathIsNaN(fRes))
+ {
+ PG_RETURN_NULL();
+ }
+
+ PG_RETURN_FLOAT4(fRes);
+
+}
+
+
+PG_FUNCTION_INFO_V1(xpath_bool);
+
+Datum
+xpath_bool(PG_FUNCTION_ARGS)
+{
+ xmlChar *xpath;
+ int32 pathsize;
+ text
+ *xpathsupp;
+
+ int bRes;
+
+ xmlXPathObjectPtr res;
+
+ /* PG_GETARG_TEXT_P(0) is document buffer */
+ xpathsupp = PG_GETARG_TEXT_P(1); /* XPath expression */
+
+ pathsize = VARSIZE(xpathsupp) - VARHDRSZ;
+
+ xpath = pgxml_texttoxmlchar(xpathsupp);
+
+ res = pgxml_xpath(PG_GETARG_TEXT_P(0),xpath);
+ pfree((void *) xpath);
+
+ if (res == NULL)
+ {
+ xmlCleanupParser();
+ PG_RETURN_BOOL(false);
+ }
+
+ bRes = xmlXPathCastToBoolean(res);
+ xmlCleanupParser();
+ PG_RETURN_BOOL(bRes);
+
+}
+
+
+
+/* Core function to evaluate XPath query */
+
+xmlXPathObjectPtr
+ pgxml_xpath(text *document, xmlChar *xpath)
+ {
+
+ xmlDocPtr doctree;
+ xmlXPathContextPtr ctxt;
+ xmlXPathObjectPtr res;
+
+ xmlXPathCompExprPtr comppath;
+
+ int32 docsize;
+
+
+ docsize = VARSIZE(document) - VARHDRSZ;
+
+ pgxml_parser_init();
+
+ doctree = xmlParseMemory((char *) VARDATA(document), docsize);
+ if (doctree == NULL)
+ { /* not well-formed */
+ return NULL;
+ }
+
+ ctxt = xmlXPathNewContext(doctree);
+ ctxt->node = xmlDocGetRootElement(doctree);
+
+
+ /* compile the path */
+ comppath = xmlXPathCompile(xpath);
+ if (comppath == NULL)
+ {
+ xmlCleanupParser();
+ xmlFreeDoc(doctree);
+ elog_error(ERROR,"XPath Syntax Error",1);
+
+ return NULL;
+ }
+
+ /* Now evaluate the path expression. */
+ res = xmlXPathCompiledEval(comppath, ctxt);
+ xmlXPathFreeCompExpr(comppath);
+
+ if (res == NULL)
+ {
+ xmlXPathFreeContext(ctxt);
+ // xmlCleanupParser();
+ xmlFreeDoc(doctree);
+
+ return NULL;
+ }
+ /* xmlFreeDoc(doctree); */
+ return res;
+ }
+
+text
+*pgxml_result_to_text(xmlXPathObjectPtr res,
+ xmlChar *toptag,
+ xmlChar *septag,
+ xmlChar *plainsep)
+{
+ xmlChar *xpresstr;
+ int32 ressize;
+ text *xpres;
+
+ if (res == NULL)
+ {
+ return NULL;
+ }
+ switch (res->type)
+ {
+ case XPATH_NODESET:
+ xpresstr = pgxmlNodeSetToText(res->nodesetval,
+ toptag,
+ septag, plainsep);
+ break;
+
+ case XPATH_STRING:
+ xpresstr = xmlStrdup(res->stringval);
+ break;
+
+ default:
+ elog(NOTICE, "Unsupported XQuery result: %d", res->type);
+ xpresstr = xmlStrdup("<unsupported/>");
+ }
+
+
+ /* Now convert this result back to text */
+ ressize = strlen(xpresstr);
+ xpres = (text *) palloc(ressize + VARHDRSZ);
+ memcpy(VARDATA(xpres), xpresstr, ressize);
+ VARATT_SIZEP(xpres) = ressize + VARHDRSZ;
+
+ /* Free various storage */
+ xmlCleanupParser();
+ /* xmlFreeDoc(doctree); -- will die at end of tuple anyway */
+
+ xmlFree(xpresstr);
+
+ elog_error(ERROR,"XPath error",0);
+
+
+ return xpres;
+}
+
+/* xpath_table is a table function. It needs some tidying (as do the
+ * other functions here!
+ */
+
+PG_FUNCTION_INFO_V1(xpath_table);
+
+Datum xpath_table(PG_FUNCTION_ARGS)
+{
+/* SPI (input tuple) support */
+ SPITupleTable *tuptable;
+ HeapTuple spi_tuple;
+ TupleDesc spi_tupdesc;
+
+/* Output tuple (tuplestore) support */
+ Tuplestorestate *tupstore = NULL;
+ TupleDesc ret_tupdesc;
+ HeapTuple ret_tuple;
+
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ AttInMetadata *attinmeta;
+ MemoryContext per_query_ctx;
+ MemoryContext oldcontext;
+
+/* Function parameters */
+ char *pkeyfield = GET_STR(PG_GETARG_TEXT_P(0));
+ char *xmlfield = GET_STR(PG_GETARG_TEXT_P(1));
+ char *relname = GET_STR(PG_GETARG_TEXT_P(2));
+ char *xpathset = GET_STR(PG_GETARG_TEXT_P(3));
+ char *condition = GET_STR(PG_GETARG_TEXT_P(4));
+
+ char **values;
+ xmlChar **xpaths;
+ xmlChar *pos;
+ xmlChar *pathsep= "|";
+
+ int numpaths;
+ int ret;
+ int proc;
+ int i;
+ int j;
+ int rownr; /* For issuing multiple rows from one original document */
+ int had_values; /* To determine end of nodeset results */
+
+ StringInfo querysql;
+
+/* We only have a valid tuple description in table function mode */
+ if (rsinfo->expectedDesc == NULL) {
+ ereport(ERROR,(errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("xpath_table must be called as a table function")));
+ }
+
+/* The tuplestore must exist in a higher context than
+ * this function call (per_query_ctx is used) */
+
+ per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
+ oldcontext = MemoryContextSwitchTo(per_query_ctx);
+
+/* Create the tuplestore - SortMem is the max in-memory size before it is
+ * shipped to a disk heap file. Just like ... SortMem!
+ */
+
+ tupstore = tuplestore_begin_heap(true, false, SortMem);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ /* get the requested return tuple description */
+ ret_tupdesc = CreateTupleDescCopy(rsinfo->expectedDesc);
+
+ /* At the moment we assume that the returned attributes make sense
+ * for the XPath specififed (i.e. we trust the caller).
+ * It's not fatal if they get it wrong - the input function for the
+ * column type will raise an error if the path result can't be converted
+ * into the correct binary representation.
+ */
+
+ attinmeta = TupleDescGetAttInMetadata(ret_tupdesc);
+
+ /* We want to materialise because it means that we don't have to
+ * carry libxml2 parser state between invocations of this function
+ */
+
+ /* check to see if caller supports us returning a tuplestore */
+ if (!rsinfo || !(rsinfo->allowedModes & SFRM_Materialize))
+ ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("xpath_table requires Materialize mode, but it is not "
+ "allowed in this context")));
+
+ // Set return mode and allocate value space.
+ rsinfo->returnMode = SFRM_Materialize;
+ rsinfo->setDesc = ret_tupdesc;
+
+ values = (char **) palloc(ret_tupdesc->natts * sizeof(char *));
+
+ xpaths = (xmlChar **) palloc(ret_tupdesc->natts * sizeof(xmlChar *));
+
+ /* Split XPaths. xpathset is a writable CString. */
+
+ /* Note that we stop splitting once we've done all needed for tupdesc */
+
+ numpaths=0;
+ pos = xpathset;
+ do {
+ xpaths[numpaths] = pos;
+ pos = strstr(pos,pathsep);
+ if (pos != NULL) {
+ *pos = '\0';
+ pos++;
+ }
+ numpaths++;
+ } while ((pos != NULL) && (numpaths < (ret_tupdesc->natts - 1) ));
+
+ /* Now build query */
+
+ querysql = makeStringInfo();
+
+ /* Build initial sql statement */
+ appendStringInfo(querysql, "SELECT %s, %s FROM %s WHERE %s",
+ pkeyfield,
+ xmlfield,
+ relname,
+ condition
+ );
+
+
+ if ((ret = SPI_connect()) < 0) {
+ elog(ERROR, "xpath_table: SPI_connect returned %d", ret);
+ }
+
+ if ((ret = SPI_exec(querysql->data,0)) != SPI_OK_SELECT) {
+ elog(ERROR,"xpath_table: SPI execution failed for query %s",querysql->data);
+ }
+
+ proc= SPI_processed;
+ /* elog(DEBUG1,"xpath_table: SPI returned %d rows",proc); */
+ tuptable = SPI_tuptable;
+ spi_tupdesc = tuptable->tupdesc;
+
+/* Switch out of SPI context */
+ MemoryContextSwitchTo(oldcontext);
+
+
+/* Check that SPI returned correct result. If you put a comma into one of
+ * the function parameters, this will catch it when the SPI query returns
+ * e.g. 3 columns.
+ */
+
+ if (spi_tupdesc->natts != 2) {
+ ereport(ERROR,(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("Expression returning multiple columns is not valid in parameter list"),
+ errdetail("Expected two columns in SPI result, got %d",spi_tupdesc->natts)));
+ }
+
+/* Setup the parser. Beware that this must happen in the same context as the
+ * cleanup - which means that any error from here on must do cleanup to
+ * ensure that the entity table doesn't get freed by being out of context.
+ */
+ pgxml_parser_init();
+
+ /* For each row i.e. document returned from SPI */
+ for (i=0; i < proc; i++) {
+ char *pkey;
+ char *xmldoc;
+
+ xmlDocPtr doctree;
+ xmlXPathContextPtr ctxt;
+ xmlXPathObjectPtr res;
+ xmlChar *resstr;
+
+
+ xmlXPathCompExprPtr comppath;
+
+ /* Extract the row data as C Strings */
+
+ spi_tuple = tuptable->vals[i];
+ pkey = SPI_getvalue(spi_tuple, spi_tupdesc,1);
+ xmldoc = SPI_getvalue(spi_tuple, spi_tupdesc,2);
+
+
+ /* Clear the values array, so that not-well-formed documents
+ * return NULL in all columns.
+ */
+
+ /* Note that this also means that spare columns will be NULL. */
+ for (j=0; j < ret_tupdesc->natts; j++) {
+ values[j]= NULL;
+ }
+
+ /* Insert primary key */
+ values[0]=pkey;
+
+ /* Parse the document */
+ doctree = xmlParseMemory(xmldoc, strlen(xmldoc));
+
+ if (doctree == NULL)
+ { /* not well-formed, so output all-NULL tuple */
+
+ ret_tuple = BuildTupleFromCStrings(attinmeta, values);
+ oldcontext = MemoryContextSwitchTo(per_query_ctx);
+ tuplestore_puttuple(tupstore, ret_tuple);
+ MemoryContextSwitchTo(oldcontext);
+ heap_freetuple(ret_tuple);
+ }
+ else
+ {
+ /* New loop here - we have to deal with nodeset results */
+ rownr=0;
+
+ do {
+ /* Now evaluate the set of xpaths. */
+ had_values=0;
+ for (j=0; j < numpaths; j++) {
+
+ ctxt = xmlXPathNewContext(doctree);
+ ctxt->node = xmlDocGetRootElement(doctree);
+ xmlSetGenericErrorFunc(ctxt, pgxml_errorHandler);
+
+ /* compile the path */
+ comppath = xmlXPathCompile(xpaths[j]);
+ if (comppath == NULL)
+ {
+ xmlCleanupParser();
+ xmlFreeDoc(doctree);
+
+ elog_error(ERROR,"XPath Syntax Error",1);
+
+ PG_RETURN_NULL(); /* Keep compiler happy */
+ }
+
+ /* Now evaluate the path expression. */
+ res = xmlXPathCompiledEval(comppath, ctxt);
+ xmlXPathFreeCompExpr(comppath);
+
+ if (res != NULL)
+ {
+ switch (res->type)
+ {
+ case XPATH_NODESET:
+ /* We see if this nodeset has enough nodes */
+ if ((res->nodesetval != NULL) && (rownr < res->nodesetval->nodeNr)) {
+ resstr =
+ xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
+ had_values=1;
+ } else {
+ resstr = NULL;
+ }
+
+ break;
+
+ case XPATH_STRING:
+ resstr = xmlStrdup(res->stringval);
+ break;
+
+ default:
+ elog(NOTICE, "Unsupported XQuery result: %d", res->type);
+ resstr = xmlStrdup("<unsupported/>");
+ }
+
+
+ // Insert this into the appropriate column in the result tuple.
+ values[j+1] = resstr;
+ }
+ xmlXPathFreeContext(ctxt);
+ }
+ // Now add the tuple to the output, if there is one.
+ if (had_values) {
+ ret_tuple = BuildTupleFromCStrings(attinmeta, values);
+ oldcontext = MemoryContextSwitchTo(per_query_ctx);
+ tuplestore_puttuple(tupstore, ret_tuple);
+ MemoryContextSwitchTo(oldcontext);
+ heap_freetuple(ret_tuple);
+ }
+
+ rownr++;
+
+ } while (had_values);
+
+ }
+
+ xmlFreeDoc(doctree);
+
+ pfree(pkey);
+ pfree(xmldoc);
+ }
+
+ xmlCleanupParser();
+/* Needed to flag completeness in 7.3.1. 7.4 defines it as a no-op. */
+ tuplestore_donestoring(tupstore);
+
+ SPI_finish();
+
+ rsinfo->setResult=tupstore;
+
+ /*
+ * SFRM_Materialize mode expects us to return a NULL Datum. The actual
+ * tuples are in our tuplestore and passed back through
+ * rsinfo->setResult. rsinfo->setDesc is set to the tuple description
+ * that we actually used to build our tuples with, so the caller can
+ * verify we did what it was expecting.
+ */
+ return (Datum) 0;
+
+}
--- /dev/null
+/* XSLT processing functions (requiring libxslt) */
+/* John Gray, for Torchbox 2003-04-01 */
+
+#include "postgres.h"
+#include "fmgr.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+
+/* libxml includes */
+
+#include <libxml/xpath.h>
+#include <libxml/tree.h>
+#include <libxml/xmlmemory.h>
+
+/* libxslt includes */
+
+#include <libxslt/xslt.h>
+#include <libxslt/xsltInternals.h>
+#include <libxslt/transform.h>
+#include <libxslt/xsltutils.h>
+
+
+/* declarations to come from xpath.c */
+
+extern void elog_error(int level, char *explain, int force);
+extern void pgxml_parser_init();
+extern xmlChar *pgxml_texttoxmlchar(text *textstring);
+
+#define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
+
+/* local defs */
+static void parse_params(const char **params, text *paramstr);
+
+Datum xslt_process(PG_FUNCTION_ARGS);
+
+
+#define MAXPARAMS 20
+
+PG_FUNCTION_INFO_V1(xslt_process);
+
+Datum xslt_process(PG_FUNCTION_ARGS) {
+
+
+ const char *params[MAXPARAMS + 1]; /* +1 for the terminator */
+ xsltStylesheetPtr stylesheet = NULL;
+ xmlDocPtr doctree;
+ xmlDocPtr restree;
+ xmlDocPtr ssdoc = NULL;
+ xmlChar *resstr;
+ int resstat;
+ int reslen;
+
+ text *doct = PG_GETARG_TEXT_P(0);
+ text *ssheet = PG_GETARG_TEXT_P(1);
+ text *paramstr;
+ text *tres;
+
+
+ if (fcinfo->nargs == 3)
+ {
+ paramstr = PG_GETARG_TEXT_P(2);
+ parse_params(params,paramstr);
+ }
+ else /* No parameters */
+ {
+ params[0] = NULL;
+ }
+
+ /* Setup parser */
+ pgxml_parser_init();
+
+ /* Check to see if document is a file or a literal */
+
+ if (VARDATA(doct)[0] == '<')
+ {
+ doctree = xmlParseMemory((char *) VARDATA(doct), VARSIZE(doct)-VARHDRSZ);
+ }
+ else
+ {
+ doctree = xmlParseFile(GET_STR(doct));
+ }
+
+ if (doctree == NULL)
+ {
+ xmlCleanupParser();
+ elog_error(ERROR,"Error parsing XML document",0);
+
+ PG_RETURN_NULL();
+ }
+
+ /* Same for stylesheet */
+ if (VARDATA(ssheet)[0] == '<')
+ {
+ ssdoc = xmlParseMemory((char *) VARDATA(ssheet),
+ VARSIZE(ssheet)-VARHDRSZ);
+ if (ssdoc == NULL)
+ {
+ xmlFreeDoc(doctree);
+ xmlCleanupParser();
+ elog_error(ERROR,"Error parsing stylesheet as XML document",0);
+ PG_RETURN_NULL();
+ }
+
+ stylesheet = xsltParseStylesheetDoc(ssdoc);
+ }
+ else
+ {
+ stylesheet = xsltParseStylesheetFile(GET_STR(ssheet));
+ }
+
+
+ if (stylesheet == NULL)
+ {
+ xmlFreeDoc(doctree);
+ xsltCleanupGlobals();
+ xmlCleanupParser();
+ elog_error(ERROR,"Failed to parse stylesheet",0);
+ PG_RETURN_NULL();
+ }
+
+ restree = xsltApplyStylesheet(stylesheet, doctree, params);
+ resstat = xsltSaveResultToString(&resstr, &reslen, restree, stylesheet);
+
+ xsltFreeStylesheet(stylesheet);
+ xmlFreeDoc(restree);
+ xmlFreeDoc(doctree);
+
+ xsltCleanupGlobals();
+ xmlCleanupParser();
+
+ if (resstat < 0) {
+ PG_RETURN_NULL();
+ }
+
+ tres = palloc(reslen + VARHDRSZ);
+ memcpy(VARDATA(tres),resstr,reslen);
+ VARATT_SIZEP(tres) = reslen + VARHDRSZ;
+
+ PG_RETURN_TEXT_P(tres);
+}
+
+
+void parse_params(const char **params, text *paramstr)
+{
+ char *pos;
+ char *pstr;
+
+ int i;
+ char *nvsep="=";
+ char *itsep=",";
+
+ pstr = GET_STR(paramstr);
+
+ pos=pstr;
+
+ for (i=0; i < MAXPARAMS; i++)
+ {
+ params[i] = pos;
+ pos = strstr(pos,nvsep);
+ if (pos != NULL) {
+ *pos = '\0';
+ pos++;
+ } else {
+ params[i]=NULL;
+ break;
+ }
+ /* Value */
+ i++;
+ params[i]=pos;
+ pos = strstr(pos,itsep);
+ if (pos != NULL) {
+ *pos = '\0';
+ pos++;
+ } else {
+ break;
+ }
+
+ }
+ if (i < MAXPARAMS)
+ {
+ params[i+1]=NULL;
+ }
+}