Commit adca025c authored by Bruce Momjian's avatar Bruce Momjian

Thanks to the generous support of Torchbox (http://www.torchbox.com), I

have been able to significantly improve the contrib/xml XPath
integration code.

New features:

* XPath set-returning function allows multiple results from an several
XPath queries to be used as a virtual table.
* Using libxslt, XSLT transformations (with and without parameters) are
supported. (Caution: This support allows generic URL fetching from
within the backend as well).

I've removed the old code so that it is all libxml based. Rather than
attach as a patch, I've put the tar.gz (10k!) at
http://www.azuli.co.uk/pgxml-1.0.tar.gz
(all files in archive are xml/....).

I think this is worth replacing the contrib version with, even though
the function names have changed (though the same functionality is
there), because it includes a SRF and some SPI usage, in addition to
linking to an external library. And it isn't a big module! Obviously, I
understand that people might prefer to move it elsewhere, or might have
reservations about replacing an existing contrib module with an
incompatible one. I'm open to suggestions.

John Gray
parent 19739718
# $PostgreSQL: pgsql/contrib/xml/Makefile,v 1.4 2003/11/29 19:51:36 pgsql Exp $ # This makefile will build the new XML and XSLT routines.
subdir = contrib/xml subdir = contrib/xml
top_builddir = ../.. top_builddir = ../../
include $(top_builddir)/src/Makefile.global include $(top_builddir)/src/Makefile.global
MODULE_big = pgxml_dom MODULE_big = pgxml
OBJS = pgxml_dom.o
SHLIB_LINK = -lxml2 # Remove xslt_proc.o from the following line if you don't have libxslt
DATA_built = pgxml_dom.sql OBJS = xpath.o xslt_proc.o
# Remove -lxslt from the following line if you don't have libxslt.
SHLIB_LINK = -lxml2 -lxslt
DATA_built = pgxml.sql
DOCS = README.pgxml DOCS = README.pgxml
include $(top_srcdir)/contrib/contrib-global.mk include $(top_builddir)contrib/contrib-global.mk
This diff is collapsed.
PGXML TODO List
===============
Some of these items still require much more thought! Since the first
release, the XPath support has improved (because I'm no longer using a
homemade algorithm!).
1. Performance considerations
At present each document is parsed to produce the DOM tree on every query.
Pros:
Easy
No persistent memory or storage allocation for parsed trees
(libxml docs suggest representation of a document might
be 4 times the size of the text)
Cons:
Slow/ CPU intensive to parse.
Makes it difficult for PLs to apply libxml manipulations to create
new documents or amend existing ones.
2. XQuery
I'm not sure if the addition of XQuery would be best as a function or
as a new front-end parser. This is one to think about, but with a
decent implementation of XPath, one of the prerequisites is covered.
3. DOM Interfaces
Expose more aspects of the DOM to user functions/ PLs. This would
allow a procedure in a PL to run some queries and then use exposed
interfaces to libxml to create an XML document out of the query
results. I accept the argument that this might be more properly
performed on the client side.
4. Returning sets of documents from XPath queries.
Although the current implementation allows you to amalgamate the
returned results into a single document, it's quite possible that
you'd like to use the returned set of nodes as a source for FROM.
Is there a good way to optimise/index the results of certain XPath
operations to make them faster?:
select docid, pgxml_xpath(document,'//site/location/text()','','') as location
where pgxml_xpath(document,'//site/name/text()','','') = 'Church Farm';
and with multiple element occurences in a document?
select d.docid, pgxml_xpath(d.document,'//site/location/text()','','')
from docstore d,
pgxml_xpaths('docstore','document','//feature/type/text()','docid') ft
where ft.key = d.docid and ft.value ='Limekiln';
pgxml_xpaths params are relname, attrname, xpath, returnkey. It would
return a set of two-element tuples (key,value) consisting of the value of
returnkey, and the cdata value of the xpath. The XML document would be
defined by relname and attrname.
The pgxml_xpaths function could be the basis of a functional index,
which could speed up the above query very substantially, working
through the normal query planner mechanism.
5. Return type support.
Better support for returning e.g. numeric or boolean values. I need to
get to grips with the returned data from libxml first.
John Gray <jgray@azuli.co.uk> 16 August 2001
/********************************************************
* Interface code to parse an XML document using expat
********************************************************/
#include "postgres.h"
#include "fmgr.h"
#include "expat.h"
#include "pgxml.h"
/* Memory management - we make expat use standard pg MM */
XML_Memory_Handling_Suite mhs;
/* passthrough functions (palloc is a macro) */
static void *
pgxml_palloc(size_t size)
{
return palloc(size);
}
static void *
pgxml_repalloc(void *ptr, size_t size)
{
return repalloc(ptr, size);
}
static void
pgxml_pfree(void *ptr)
{
return pfree(ptr);
}
static void
pgxml_mhs_init()
{
mhs.malloc_fcn = pgxml_palloc;
mhs.realloc_fcn = pgxml_repalloc;
mhs.free_fcn = pgxml_pfree;
}
static void
pgxml_handler_init()
{
/*
* This code should set up the relevant handlers from user-supplied
* settings. Quite how these settings are made is another matter :)
*/
}
/* Returns true if document is well-formed */
PG_FUNCTION_INFO_V1(pgxml_parse);
Datum
pgxml_parse(PG_FUNCTION_ARGS)
{
/* called as pgxml_parse(document) */
XML_Parser p;
text *t = PG_GETARG_TEXT_P(0); /* document buffer */
int32 docsize = VARSIZE(t) - VARHDRSZ;
pgxml_mhs_init();
pgxml_handler_init();
p = XML_ParserCreate_MM(NULL, &mhs, NULL);
if (!p)
{
ereport(ERROR,
(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
errmsg("could not create expat parser")));
PG_RETURN_NULL(); /* seems appropriate if we couldn't parse */
}
if (!XML_Parse(p, (char *) VARDATA(t), docsize, 1))
{
/*
* elog(WARNING, "Parse error at line %d:%s",
* XML_GetCurrentLineNumber(p),
* XML_ErrorString(XML_GetErrorCode(p)));
*/
XML_ParserFree(p);
PG_RETURN_BOOL(false);
}
XML_ParserFree(p);
PG_RETURN_BOOL(true);
}
/* XPath handling functions */
/* XPath support here is for a very skeletal kind of XPath!
It was easy to program though... */
/* This first is the core function that builds a result set. The
actual functions called by the user manipulate that result set
in various ways.
*/
static XPath_Results *
build_xpath_results(text *doc, text *pathstr)
{
XPath_Results *xpr;
char *res;
pgxml_udata *udata;
XML_Parser p;
int32 docsize;
xpr = (XPath_Results *) palloc((sizeof(XPath_Results)));
memset((void *) xpr, 0, sizeof(XPath_Results));
xpr->rescount = 0;
docsize = VARSIZE(doc) - VARHDRSZ;
/* res isn't going to be the real return type, it is just a buffer */
res = (char *) palloc(docsize);
memset((void *) res, 0, docsize);
xpr->resbuf = res;
udata = (pgxml_udata *) palloc((sizeof(pgxml_udata)));
memset((void *) udata, 0, sizeof(pgxml_udata));
udata->currentpath[0] = '\0';
udata->textgrab = 0;
udata->path = (char *) palloc(VARSIZE(pathstr));
memcpy(udata->path, VARDATA(pathstr), VARSIZE(pathstr) - VARHDRSZ);
udata->path[VARSIZE(pathstr) - VARHDRSZ] = '\0';
udata->resptr = res;
udata->reslen = 0;
udata->xpres = xpr;
/* Now fire up the parser */
pgxml_mhs_init();
p = XML_ParserCreate_MM(NULL, &mhs, NULL);
if (!p)
{
ereport(ERROR,
(errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
errmsg("could not create expat parser")));
pfree(xpr);
pfree(udata->path);
pfree(udata);
pfree(res);
return NULL;
}
XML_SetUserData(p, (void *) udata);
/* Set the handlers */
XML_SetElementHandler(p, pgxml_starthandler, pgxml_endhandler);
XML_SetCharacterDataHandler(p, pgxml_charhandler);
if (!XML_Parse(p, (char *) VARDATA(doc), docsize, 1))
{
/*
* elog(WARNING, "Parse error at line %d:%s",
* XML_GetCurrentLineNumber(p),
* XML_ErrorString(XML_GetErrorCode(p)));
*/
XML_ParserFree(p);
pfree(xpr);
pfree(udata->path);
pfree(udata);
return NULL;
}
pfree(udata->path);
pfree(udata);
XML_ParserFree(p);
return xpr;
}
PG_FUNCTION_INFO_V1(pgxml_xpath);
Datum
pgxml_xpath(PG_FUNCTION_ARGS)
{
/* called as pgxml_xpath(document,pathstr, index) for the moment */
XPath_Results *xpresults;
text *restext;
text *t = PG_GETARG_TEXT_P(0); /* document buffer */
text *t2 = PG_GETARG_TEXT_P(1);
int32 ind = PG_GETARG_INT32(2) - 1;
xpresults = build_xpath_results(t, t2);
/*
* This needs to be changed depending on the mechanism for returning
* our set of results.
*/
if (xpresults == NULL) /* parse error (not WF or parser failure) */
PG_RETURN_NULL();
if (ind >= (xpresults->rescount))
PG_RETURN_NULL();
restext = (text *) palloc(xpresults->reslens[ind] + VARHDRSZ);
memcpy(VARDATA(restext), xpresults->results[ind], xpresults->reslens[ind]);
VARATT_SIZEP(restext) = xpresults->reslens[ind] + VARHDRSZ;
pfree(xpresults->resbuf);
pfree(xpresults);
PG_RETURN_TEXT_P(restext);
}
static void
pgxml_pathcompare(void *userData)
{
char *matchpos;
matchpos = strstr(UD->currentpath, UD->path);
if (matchpos == NULL)
{ /* Should we have more logic here ? */
if (UD->textgrab)
{
UD->textgrab = 0;
pgxml_finalisegrabbedtext(userData);
}
return;
}
/*
* OK, we have a match of some sort. Now we need to check that our
* match is anchored to the *end* of the string AND that it is
* immediately preceded by a '/'
*/
/*
* This test wouldn't work if strlen (UD->path) overran the length of
* the currentpath, but that's not possible because we got a match!
*/
if ((matchpos + strlen(UD->path))[0] == '\0')
{
if ((UD->path)[0] == '/')
{
if (matchpos == UD->currentpath)
UD->textgrab = 1;
}
else
{
if ((matchpos - 1)[0] == '/')
UD->textgrab = 1;
}
}
}
static void
pgxml_starthandler(void *userData, const XML_Char * name,
const XML_Char ** atts)
{
char sepstr[] = "/";
if ((strlen(name) + strlen(UD->currentpath)) > MAXPATHLENGTH - 2)
elog(WARNING, "path too long");
else
{
strncat(UD->currentpath, sepstr, 1);
strcat(UD->currentpath, name);
}
if (UD->textgrab)
{
/*
* Depending on user preference, should we "reconstitute" the
* element into the result text?
*/
}
else
pgxml_pathcompare(userData);
}
static void
pgxml_endhandler(void *userData, const XML_Char * name)
{
/*
* Start by removing the current element off the end of the
* currentpath
*/
char *sepptr;
sepptr = strrchr(UD->currentpath, '/');
if (sepptr == NULL)
{
/* internal error */
elog(ERROR, "did not find '/'");
sepptr = UD->currentpath;
}
if (strcmp(name, sepptr + 1) != 0)
{
elog(WARNING, "wanted [%s], got [%s]", sepptr, name);
/* unmatched entry, so do nothing */
}
else
{
sepptr[0] = '\0'; /* Chop that element off the end */
}
if (UD->textgrab)
pgxml_pathcompare(userData);
}
static void
pgxml_charhandler(void *userData, const XML_Char * s, int len)
{
if (UD->textgrab)
{
if (len > 0)
{
memcpy(UD->resptr, s, len);
UD->resptr += len;
UD->reslen += len;
}
}
}
/* Should I be using PG list types here? */
static void
pgxml_finalisegrabbedtext(void *userData)
{
/* In res/reslen, we have a single result. */
UD->xpres->results[UD->xpres->rescount] = UD->resptr - UD->reslen;
UD->xpres->reslens[UD->xpres->rescount] = UD->reslen;
UD->reslen = 0;
UD->xpres->rescount++;
/*
* This effectively concatenates all the results together but we do
* know where one ends and the next begins
*/
}
/* Header for pg xml parser interface */
static void *pgxml_palloc(size_t size);
static void *pgxml_repalloc(void *ptr, size_t size);
static void pgxml_pfree(void *ptr);
static void pgxml_mhs_init();
static void pgxml_handler_init();
Datum pgxml_parse(PG_FUNCTION_ARGS);
Datum pgxml_xpath(PG_FUNCTION_ARGS);
static void pgxml_starthandler(void *userData, const XML_Char * name,
const XML_Char ** atts);
static void pgxml_endhandler(void *userData, const XML_Char * name);
static void pgxml_charhandler(void *userData, const XML_Char * s, int len);
static void pgxml_pathcompare(void *userData);
static void pgxml_finalisegrabbedtext(void *userData);
#define MAXPATHLENGTH 512
#define MAXRESULTS 100
typedef struct
{
int rescount;
char *results[MAXRESULTS];
int32 reslens[MAXRESULTS];
char *resbuf; /* pointer to the result buffer for pfree */
} XPath_Results;
typedef struct
{
char currentpath[MAXPATHLENGTH];
char *path;
int textgrab;
char *resptr;
int32 reslen;
XPath_Results *xpres;
} pgxml_udata;
#define UD ((pgxml_udata *) userData)
-- SQL for XML parser --SQL for XML parser
-- Adjust this setting to control where the objects get created. CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS bool
SET search_path TO public; AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS boolean CREATE OR REPLACE FUNCTION xpath_string(text,text) RETURNS text
AS 'MODULE_PATHNAME' LANGUAGE c STRICT; AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
CREATE OR REPLACE FUNCTION pgxml_xpath(text, text, text, text) RETURNS text CREATE OR REPLACE FUNCTION xpath_nodeset(text,text,text,text) RETURNS text
AS 'MODULE_PATHNAME' LANGUAGE c STRICT; AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
CREATE OR REPLACE FUNCTION xpath_number(text,text) RETURNS float4
AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
CREATE OR REPLACE FUNCTION xpath_bool(text,text) RETURNS boolean
AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
-- List function
CREATE OR REPLACE FUNCTION xpath_list(text,text,text) RETURNS text
AS 'MODULE_PATHNAME'
LANGUAGE 'c' WITH (isStrict);
CREATE OR REPLACE FUNCTION xpath_list(text,text) RETURNS text
AS 'SELECT xpath_list($1,$2,'','')' language 'SQL' WITH (isStrict);
-- Wrapper functions for nodeset where no tags needed.
CREATE OR REPLACE FUNCTION xpath_nodeset(text,text) RETURNS text AS
'SELECT xpath_nodeset($1,$2,'''','''')' language 'SQL' WITH (isStrict);
CREATE OR REPLACE FUNCTION xpath_nodeset(text,text,text) RETURNS text AS
'SELECT xpath_nodeset($1,$2,'''',$3)' language 'SQL' WITH (isStrict);
-- Table function
CREATE OR REPLACE FUNCTION xpath_table(text,text,text,text,text) RETURNS setof record
AS 'MODULE_PATHNAME'
LANGUAGE 'c' WITH (isStrict);
-- XSLT functions
-- Delete from here to the end of the file if you are not compiling with
-- XSLT support.
CREATE OR REPLACE FUNCTION xslt_process(text,text,text) RETURNS text
AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
-- the function checks for the correct argument count
CREATE OR REPLACE FUNCTION xslt_process(text,text) RETURNS text
AS 'MODULE_PATHNAME' LANGUAGE 'c' WITH (isStrict);
/* Parser interface for DOM-based parser (libxml) rather than
stream-based SAX-type parser */
#include "postgres.h"
#include "fmgr.h"
/* libxml includes */
#include <libxml/xpath.h>
#include <libxml/tree.h>
#include <libxml/xmlmemory.h>
/* declarations */
static void *pgxml_palloc(size_t size);
static void *pgxml_repalloc(void *ptr, size_t size);
static void pgxml_pfree(void *ptr);
static char *pgxml_pstrdup(const char *string);
static void pgxml_parser_init();
static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset, xmlDocPtr doc,
xmlChar * toptagname, xmlChar * septagname,
int format);
static xmlChar *pgxml_texttoxmlchar(text *textstring);
Datum pgxml_parse(PG_FUNCTION_ARGS);
Datum pgxml_xpath(PG_FUNCTION_ARGS);
/* memory handling passthrough functions (e.g. palloc, pstrdup are
currently macros, and the others might become so...) */
static void *
pgxml_palloc(size_t size)
{
return palloc(size);
}
static void *
pgxml_repalloc(void *ptr, size_t size)
{
return repalloc(ptr, size);
}
static void
pgxml_pfree(void *ptr)
{
return pfree(ptr);
}
static char *
pgxml_pstrdup(const char *string)
{
return pstrdup(string);
}
static void
pgxml_parser_init()
{
/*
* This code should also set parser settings from user-supplied info.
* Quite how these settings are made is another matter :)
*/
xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup);
xmlInitParser();
}
/* Returns true if document is well-formed */
PG_FUNCTION_INFO_V1(pgxml_parse);
Datum
pgxml_parse(PG_FUNCTION_ARGS)
{
/* called as pgxml_parse(document) */
xmlDocPtr doctree;
text *t = PG_GETARG_TEXT_P(0); /* document buffer */
int32 docsize = VARSIZE(t) - VARHDRSZ;
pgxml_parser_init();
doctree = xmlParseMemory((char *) VARDATA(t), docsize);
if (doctree == NULL)
{
xmlCleanupParser();
PG_RETURN_BOOL(false); /* i.e. not well-formed */
}
xmlCleanupParser();
xmlFreeDoc(doctree);
PG_RETURN_BOOL(true);
}
static xmlChar
*
pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
xmlDocPtr doc,
xmlChar * toptagname,
xmlChar * septagname,
int format)
{
/* Function translates a nodeset into a text representation */
/*
* iterates over each node in the set and calls xmlNodeDump to write
* it to an xmlBuffer -from which an xmlChar * string is returned.
*/
/* each representation is surrounded by <tagname> ... </tagname> */
/* if format==0, add a newline between nodes?? */
xmlBufferPtr buf;
xmlChar *result;
int i;
buf = xmlBufferCreate();
if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
{
xmlBufferWriteChar(buf, "<");
xmlBufferWriteCHAR(buf, toptagname);
xmlBufferWriteChar(buf, ">");
}
if (nodeset != NULL)
{
for (i = 0; i < nodeset->nodeNr; i++)
{
if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
{
xmlBufferWriteChar(buf, "<");
xmlBufferWriteCHAR(buf, septagname);
xmlBufferWriteChar(buf, ">");
}
xmlNodeDump(buf, doc, nodeset->nodeTab[i], 1, (format == 2));
if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
{
xmlBufferWriteChar(buf, "</");
xmlBufferWriteCHAR(buf, septagname);
xmlBufferWriteChar(buf, ">");
}
if (format)
xmlBufferWriteChar(buf, "\n");
}
}
if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
{
xmlBufferWriteChar(buf, "</");
xmlBufferWriteCHAR(buf, toptagname);
xmlBufferWriteChar(buf, ">");
}
result = xmlStrdup(buf->content);
xmlBufferFree(buf);
return result;
}
static xmlChar *
pgxml_texttoxmlchar(text *textstring)
{
xmlChar *res;
int32 txsize;
txsize = VARSIZE(textstring) - VARHDRSZ;
res = (xmlChar *) palloc(txsize + 1);
memcpy((char *) res, VARDATA(textstring), txsize);
res[txsize] = '\0';
return res;
}
PG_FUNCTION_INFO_V1(pgxml_xpath);
Datum
pgxml_xpath(PG_FUNCTION_ARGS)
{
xmlDocPtr doctree;
xmlXPathContextPtr ctxt;
xmlXPathObjectPtr res;
xmlChar *xpath,
*xpresstr,
*toptag,
*septag;
xmlXPathCompExprPtr comppath;
int32 docsize,
ressize;
text *t,
*xpres;
t = PG_GETARG_TEXT_P(0); /* document buffer */
xpath = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(1)); /* XPath expression */
toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(2));
septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_P(3));
docsize = VARSIZE(t) - VARHDRSZ;
pgxml_parser_init();
doctree = xmlParseMemory((char *) VARDATA(t), docsize);
if (doctree == NULL)
{ /* not well-formed */
xmlCleanupParser();
PG_RETURN_NULL();
}
ctxt = xmlXPathNewContext(doctree);
ctxt->node = xmlDocGetRootElement(doctree);
/* compile the path */
comppath = xmlXPathCompile(xpath);
if (comppath == NULL)
{
elog(WARNING, "XPath syntax error");
xmlFreeDoc(doctree);
pfree((void *) xpath);
xmlCleanupParser();
PG_RETURN_NULL();
}
/* Now evaluate the path expression. */
res = xmlXPathCompiledEval(comppath, ctxt);
xmlXPathFreeCompExpr(comppath);
if (res == NULL)
{
xmlFreeDoc(doctree);
pfree((void *) xpath);
xmlCleanupParser();
PG_RETURN_NULL(); /* seems appropriate */
}
/* now we dump this node, ?surrounding by tags? */
/* To do this, we look first at the type */
switch (res->type)
{
case XPATH_NODESET:
xpresstr = pgxmlNodeSetToText(res->nodesetval,
doctree,
toptag, septag, 0);
break;
case XPATH_STRING:
xpresstr = xmlStrdup(res->stringval);
break;
default:
elog(WARNING, "Unsupported XQuery result: %d", res->type);
xpresstr = xmlStrdup("<unsupported/>");
}
/* Now convert this result back to text */
ressize = strlen(xpresstr);
xpres = (text *) palloc(ressize + VARHDRSZ);
memcpy(VARDATA(xpres), xpresstr, ressize);
VARATT_SIZEP(xpres) = ressize + VARHDRSZ;
/* Free various storage */
xmlFreeDoc(doctree);
pfree((void *) xpath);
xmlFree(xpresstr);
xmlCleanupParser();
PG_RETURN_TEXT_P(xpres);
}
-- SQL for XML parser
-- Adjust this setting to control where the objects get created.
SET search_path TO public;
CREATE OR REPLACE FUNCTION pgxml_parse(text) RETURNS boolean
AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
CREATE OR REPLACE FUNCTION pgxml_xpath(text, text, text, text) RETURNS text
AS 'MODULE_PATHNAME' LANGUAGE c STRICT;
This diff is collapsed.
/* XSLT processing functions (requiring libxslt) */
/* John Gray, for Torchbox 2003-04-01 */
#include "postgres.h"
#include "fmgr.h"
#include "executor/spi.h"
#include "funcapi.h"
#include "miscadmin.h"
/* libxml includes */
#include <libxml/xpath.h>
#include <libxml/tree.h>
#include <libxml/xmlmemory.h>
/* libxslt includes */
#include <libxslt/xslt.h>
#include <libxslt/xsltInternals.h>
#include <libxslt/transform.h>
#include <libxslt/xsltutils.h>
/* declarations to come from xpath.c */
extern void elog_error(int level, char *explain, int force);
extern void pgxml_parser_init();
extern xmlChar *pgxml_texttoxmlchar(text *textstring);
#define GET_STR(textp) DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp)))
/* local defs */
static void parse_params(const char **params, text *paramstr);
Datum xslt_process(PG_FUNCTION_ARGS);
#define MAXPARAMS 20
PG_FUNCTION_INFO_V1(xslt_process);
Datum xslt_process(PG_FUNCTION_ARGS) {
const char *params[MAXPARAMS + 1]; /* +1 for the terminator */
xsltStylesheetPtr stylesheet = NULL;
xmlDocPtr doctree;
xmlDocPtr restree;
xmlDocPtr ssdoc = NULL;
xmlChar *resstr;
int resstat;
int reslen;
text *doct = PG_GETARG_TEXT_P(0);
text *ssheet = PG_GETARG_TEXT_P(1);
text *paramstr;
text *tres;
if (fcinfo->nargs == 3)
{
paramstr = PG_GETARG_TEXT_P(2);
parse_params(params,paramstr);
}
else /* No parameters */
{
params[0] = NULL;
}
/* Setup parser */
pgxml_parser_init();
/* Check to see if document is a file or a literal */
if (VARDATA(doct)[0] == '<')
{
doctree = xmlParseMemory((char *) VARDATA(doct), VARSIZE(doct)-VARHDRSZ);
}
else
{
doctree = xmlParseFile(GET_STR(doct));
}
if (doctree == NULL)
{
xmlCleanupParser();
elog_error(ERROR,"Error parsing XML document",0);
PG_RETURN_NULL();
}
/* Same for stylesheet */
if (VARDATA(ssheet)[0] == '<')
{
ssdoc = xmlParseMemory((char *) VARDATA(ssheet),
VARSIZE(ssheet)-VARHDRSZ);
if (ssdoc == NULL)
{
xmlFreeDoc(doctree);
xmlCleanupParser();
elog_error(ERROR,"Error parsing stylesheet as XML document",0);
PG_RETURN_NULL();
}
stylesheet = xsltParseStylesheetDoc(ssdoc);
}
else
{
stylesheet = xsltParseStylesheetFile(GET_STR(ssheet));
}
if (stylesheet == NULL)
{
xmlFreeDoc(doctree);
xsltCleanupGlobals();
xmlCleanupParser();
elog_error(ERROR,"Failed to parse stylesheet",0);
PG_RETURN_NULL();
}
restree = xsltApplyStylesheet(stylesheet, doctree, params);
resstat = xsltSaveResultToString(&resstr, &reslen, restree, stylesheet);
xsltFreeStylesheet(stylesheet);
xmlFreeDoc(restree);
xmlFreeDoc(doctree);
xsltCleanupGlobals();
xmlCleanupParser();
if (resstat < 0) {
PG_RETURN_NULL();
}
tres = palloc(reslen + VARHDRSZ);
memcpy(VARDATA(tres),resstr,reslen);
VARATT_SIZEP(tres) = reslen + VARHDRSZ;
PG_RETURN_TEXT_P(tres);
}
void parse_params(const char **params, text *paramstr)
{
char *pos;
char *pstr;
int i;
char *nvsep="=";
char *itsep=",";
pstr = GET_STR(paramstr);
pos=pstr;
for (i=0; i < MAXPARAMS; i++)
{
params[i] = pos;
pos = strstr(pos,nvsep);
if (pos != NULL) {
*pos = '\0';
pos++;
} else {
params[i]=NULL;
break;
}
/* Value */
i++;
params[i]=pos;
pos = strstr(pos,itsep);
if (pos != NULL) {
*pos = '\0';
pos++;
} else {
break;
}
}
if (i < MAXPARAMS)
{
params[i+1]=NULL;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment