Commit 936c4af1 authored by Tom Lane's avatar Tom Lane

Fix up memory management problems in contrib/xml2.

Get rid of the code that attempted to funnel libxml2's memory allocations
into palloc.   We already knew from experience with the core xml datatype
that trying to do this is simply not reliable.  Unlike the core code, I
did not bother adding a lot of PG_TRY/PG_CATCH logic to try to ensure that
everything is cleaned up on error exit.  Hence, we might leak some memory
if one of these functions fails partway through.  Given the deprecated
status of this contrib module and the fact that errors partway through
the functions shouldn't be too common, it doesn't seem worth worrying about.

Also fix a separate bug in xpath_table, that it did the wrong things
if given a result tuple descriptor with less than 2 columns.  While
such a case isn't very useful in practice, we shouldn't fail or stomp
memory when it occurs.

Add some simple regression tests based on all the reported crash cases
that I have on hand.

This should be back-patched, but let's see if the buildfarm likes it first.
parent 7d7db18a
# $PostgreSQL: pgsql/contrib/xml2/Makefile,v 1.12 2008/05/08 16:49:37 tgl Exp $ # $PostgreSQL: pgsql/contrib/xml2/Makefile,v 1.13 2010/02/28 21:31:57 tgl Exp $
MODULE_big = pgxml MODULE_big = pgxml
...@@ -8,6 +8,7 @@ SHLIB_LINK += $(filter -lxslt, $(LIBS)) $(filter -lxml2, $(LIBS)) ...@@ -8,6 +8,7 @@ SHLIB_LINK += $(filter -lxslt, $(LIBS)) $(filter -lxml2, $(LIBS))
DATA_built = pgxml.sql DATA_built = pgxml.sql
DATA = uninstall_pgxml.sql DATA = uninstall_pgxml.sql
REGRESS = xml2
ifdef USE_PGXS ifdef USE_PGXS
PG_CONFIG = pg_config PG_CONFIG = pg_config
......
--
-- first, define the functions. Turn off echoing so that expected file
-- does not depend on contents of pgxml.sql.
--
SET client_min_messages = warning;
\set ECHO none
RESET client_min_messages;
select query_to_xml('select 1 as x',true,false,'');
query_to_xml
---------------------------------------------------------------
<table xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">+
+
<row> +
<x>1</x> +
</row> +
+
</table> +
(1 row)
select xslt_process( query_to_xml('select x from generate_series(1,5) as
x',true,false,'')::text,
$$<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" indent="yes" />
<xsl:template match="*">
<xsl:copy>
<xsl:copy-of select="@*" />
<xsl:apply-templates />
</xsl:copy>
</xsl:template>
<xsl:template match="comment()|processing-instruction()">
<xsl:copy />
</xsl:template>
</xsl:stylesheet>
$$::text);
xslt_process
---------------------------------------------------------------
<?xml version="1.0"?> +
<table xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">+
+
<row> +
<x>1</x> +
</row> +
+
<row> +
<x>2</x> +
</row> +
+
<row> +
<x>3</x> +
</row> +
+
<row> +
<x>4</x> +
</row> +
+
<row> +
<x>5</x> +
</row> +
+
</table> +
(1 row)
CREATE TABLE xpath_test (id integer NOT NULL, t xml);
INSERT INTO xpath_test VALUES (1, '<doc><int>1</int></doc>');
SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
as t(id int4);
id
----
(0 rows)
SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
as t(id int4, doc int4);
id | doc
----+-----
1 | 1
(1 row)
DROP TABLE xpath_test;
CREATE TABLE xpath_test (id integer NOT NULL, t text);
INSERT INTO xpath_test VALUES (1, '<doc><int>1</int></doc>');
SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
as t(id int4);
id
----
(0 rows)
SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
as t(id int4, doc int4);
id | doc
----+-----
1 | 1
(1 row)
create table articles (article_id integer, article_xml xml, date_entered date);
insert into articles (article_id, article_xml, date_entered)
values (2, '<article><author>test</author><pages>37</pages></article>', now());
SELECT * FROM
xpath_table('article_id',
'article_xml',
'articles',
'/article/author|/article/pages|/article/title',
'date_entered > ''2003-01-01'' ')
AS t(article_id integer, author text, page_count integer, title text);
article_id | author | page_count | title
------------+--------+------------+-------
2 | test | 37 |
(1 row)
-- this used to fail when invoked a second time
select xslt_process('<aaa/>',$$<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>$$)::xml;
xslt_process
--------------
<aaa/> +
(1 row)
select xslt_process('<aaa/>',$$<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>$$)::xml;
xslt_process
--------------
<aaa/> +
(1 row)
create table t1 (id integer, xml_data xml);
insert into t1 (id, xml_data)
values
(1, '<attributes><attribute name="attr_1">Some
Value</attribute></attributes>');
create index idx_xpath on t1 ( xpath_string
('/attributes/attribute[@name="attr_1"]/text()', xml_data::text));
--
-- first, define the functions. Turn off echoing so that expected file
-- does not depend on contents of pgxml.sql.
--
SET client_min_messages = warning;
\set ECHO none
\i pgxml.sql
\set ECHO all
RESET client_min_messages;
select query_to_xml('select 1 as x',true,false,'');
select xslt_process( query_to_xml('select x from generate_series(1,5) as
x',true,false,'')::text,
$$<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" indent="yes" />
<xsl:template match="*">
<xsl:copy>
<xsl:copy-of select="@*" />
<xsl:apply-templates />
</xsl:copy>
</xsl:template>
<xsl:template match="comment()|processing-instruction()">
<xsl:copy />
</xsl:template>
</xsl:stylesheet>
$$::text);
CREATE TABLE xpath_test (id integer NOT NULL, t xml);
INSERT INTO xpath_test VALUES (1, '<doc><int>1</int></doc>');
SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
as t(id int4);
SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
as t(id int4, doc int4);
DROP TABLE xpath_test;
CREATE TABLE xpath_test (id integer NOT NULL, t text);
INSERT INTO xpath_test VALUES (1, '<doc><int>1</int></doc>');
SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
as t(id int4);
SELECT * FROM xpath_table('id', 't', 'xpath_test', '/doc/int', 'true')
as t(id int4, doc int4);
create table articles (article_id integer, article_xml xml, date_entered date);
insert into articles (article_id, article_xml, date_entered)
values (2, '<article><author>test</author><pages>37</pages></article>', now());
SELECT * FROM
xpath_table('article_id',
'article_xml',
'articles',
'/article/author|/article/pages|/article/title',
'date_entered > ''2003-01-01'' ')
AS t(article_id integer, author text, page_count integer, title text);
-- this used to fail when invoked a second time
select xslt_process('<aaa/>',$$<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>$$)::xml;
select xslt_process('<aaa/>',$$<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>$$)::xml;
create table t1 (id integer, xml_data xml);
insert into t1 (id, xml_data)
values
(1, '<attributes><attribute name="attr_1">Some
Value</attribute></attributes>');
create index idx_xpath on t1 ( xpath_string
('/attributes/attribute[@name="attr_1"]/text()', xml_data::text));
/* /*
* $PostgreSQL: pgsql/contrib/xml2/xpath.c,v 1.26 2010/02/28 19:51:37 tgl Exp $ * $PostgreSQL: pgsql/contrib/xml2/xpath.c,v 1.27 2010/02/28 21:31:57 tgl Exp $
* *
* Parser interface for DOM-based parser (libxml) rather than * Parser interface for DOM-based parser (libxml) rather than
* stream-based SAX-type parser * stream-based SAX-type parser
...@@ -42,10 +42,6 @@ void pgxml_parser_init(void); ...@@ -42,10 +42,6 @@ void pgxml_parser_init(void);
/* local declarations */ /* local declarations */
static void *pgxml_palloc(size_t size);
static void *pgxml_repalloc(void *ptr, size_t size);
static void pgxml_pfree(void *ptr);
static char *pgxml_pstrdup(const char *string);
static void pgxml_errorHandler(void *ctxt, const char *msg,...); static void pgxml_errorHandler(void *ctxt, const char *msg,...);
static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset, static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
...@@ -59,41 +55,10 @@ static xmlChar *pgxml_texttoxmlchar(text *textstring); ...@@ -59,41 +55,10 @@ static xmlChar *pgxml_texttoxmlchar(text *textstring);
static xmlXPathObjectPtr pgxml_xpath(text *document, xmlChar *xpath); static xmlXPathObjectPtr pgxml_xpath(text *document, xmlChar *xpath);
/* Global variables */ /* Global variables */
static char *pgxml_errorMsg = NULL; /* overall error message */ static char *pgxml_errorMsg = NULL; /* overall error message */
/* memory handling passthrough functions (e.g. palloc, pstrdup are
currently macros, and the others might become so...) */
static void *
pgxml_palloc(size_t size)
{
/* elog(DEBUG1,"Alloc %d in CMC %p",size,CurrentMemoryContext); */
return palloc(size);
}
static void *
pgxml_repalloc(void *ptr, size_t size)
{
/* elog(DEBUG1,"ReAlloc in CMC %p",CurrentMemoryContext);*/
return repalloc(ptr, size);
}
static void
pgxml_pfree(void *ptr)
{
/* elog(DEBUG1,"Free in CMC %p",CurrentMemoryContext); */
pfree(ptr);
}
static char *
pgxml_pstrdup(const char *string)
{
return pstrdup(string);
}
/* /*
* The error handling function. This formats an error message and sets * The error handling function. This formats an error message and sets
* a flag - an ereport will be issued prior to return * a flag - an ereport will be issued prior to return
...@@ -157,9 +122,6 @@ pgxml_parser_init(void) ...@@ -157,9 +122,6 @@ pgxml_parser_init(void)
pgxml_errorMsg = NULL; pgxml_errorMsg = NULL;
xmlSetGenericErrorFunc(NULL, pgxml_errorHandler); xmlSetGenericErrorFunc(NULL, pgxml_errorHandler);
/* Replace libxml memory handling (DANGEROUS) */
xmlMemSetup(pgxml_pfree, pgxml_palloc, pgxml_repalloc, pgxml_pstrdup);
/* Initialize libxml */ /* Initialize libxml */
xmlInitParser(); xmlInitParser();
...@@ -627,7 +589,7 @@ xpath_table(PG_FUNCTION_ARGS) ...@@ -627,7 +589,7 @@ xpath_table(PG_FUNCTION_ARGS)
int j; int j;
int rownr; /* For issuing multiple rows from one original int rownr; /* For issuing multiple rows from one original
* document */ * document */
int had_values; /* To determine end of nodeset results */ bool had_values; /* To determine end of nodeset results */
StringInfoData query_buf; StringInfoData query_buf;
/* We only have a valid tuple description in table function mode */ /* We only have a valid tuple description in table function mode */
...@@ -654,7 +616,6 @@ xpath_table(PG_FUNCTION_ARGS) ...@@ -654,7 +616,6 @@ xpath_table(PG_FUNCTION_ARGS)
* The tuplestore must exist in a higher context than this function call * The tuplestore must exist in a higher context than this function call
* (per_query_ctx is used) * (per_query_ctx is used)
*/ */
per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
oldcontext = MemoryContextSwitchTo(per_query_ctx); oldcontext = MemoryContextSwitchTo(per_query_ctx);
...@@ -671,6 +632,12 @@ xpath_table(PG_FUNCTION_ARGS) ...@@ -671,6 +632,12 @@ xpath_table(PG_FUNCTION_ARGS)
/* get the requested return tuple description */ /* get the requested return tuple description */
ret_tupdesc = CreateTupleDescCopy(rsinfo->expectedDesc); ret_tupdesc = CreateTupleDescCopy(rsinfo->expectedDesc);
/* must have at least one output column (for the pkey) */
if (ret_tupdesc->natts < 1)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("xpath_table must have at least one output column")));
/* /*
* At the moment we assume that the returned attributes make sense for the * At the moment we assume that the returned attributes make sense for the
* XPath specififed (i.e. we trust the caller). It's not fatal if they get * XPath specififed (i.e. we trust the caller). It's not fatal if they get
...@@ -686,26 +653,27 @@ xpath_table(PG_FUNCTION_ARGS) ...@@ -686,26 +653,27 @@ xpath_table(PG_FUNCTION_ARGS)
rsinfo->setDesc = ret_tupdesc; rsinfo->setDesc = ret_tupdesc;
values = (char **) palloc(ret_tupdesc->natts * sizeof(char *)); values = (char **) palloc(ret_tupdesc->natts * sizeof(char *));
xpaths = (xmlChar **) palloc(ret_tupdesc->natts * sizeof(xmlChar *)); xpaths = (xmlChar **) palloc(ret_tupdesc->natts * sizeof(xmlChar *));
/* Split XPaths. xpathset is a writable CString. */ /*
* Split XPaths. xpathset is a writable CString.
/* Note that we stop splitting once we've done all needed for tupdesc */ *
* Note that we stop splitting once we've done all needed for tupdesc
*/
numpaths = 0; numpaths = 0;
pos = xpathset; pos = xpathset;
do while (numpaths < (ret_tupdesc->natts - 1))
{ {
xpaths[numpaths] = (xmlChar *) pos; xpaths[numpaths++] = (xmlChar *) pos;
pos = strstr(pos, pathsep); pos = strstr(pos, pathsep);
if (pos != NULL) if (pos != NULL)
{ {
*pos = '\0'; *pos = '\0';
pos++; pos++;
} }
numpaths++; else
} while ((pos != NULL) && (numpaths < (ret_tupdesc->natts - 1))); break;
}
/* Now build query */ /* Now build query */
initStringInfo(&query_buf); initStringInfo(&query_buf);
...@@ -800,7 +768,7 @@ xpath_table(PG_FUNCTION_ARGS) ...@@ -800,7 +768,7 @@ xpath_table(PG_FUNCTION_ARGS)
do do
{ {
/* Now evaluate the set of xpaths. */ /* Now evaluate the set of xpaths. */
had_values = 0; had_values = false;
for (j = 0; j < numpaths; j++) for (j = 0; j < numpaths; j++)
{ {
ctxt = xmlXPathNewContext(doctree); ctxt = xmlXPathNewContext(doctree);
...@@ -813,10 +781,7 @@ xpath_table(PG_FUNCTION_ARGS) ...@@ -813,10 +781,7 @@ xpath_table(PG_FUNCTION_ARGS)
{ {
xmlCleanupParser(); xmlCleanupParser();
xmlFreeDoc(doctree); xmlFreeDoc(doctree);
elog_error("XPath Syntax Error", true); elog_error("XPath Syntax Error", true);
PG_RETURN_NULL(); /* Keep compiler happy */
} }
/* Now evaluate the path expression. */ /* Now evaluate the path expression. */
...@@ -829,11 +794,12 @@ xpath_table(PG_FUNCTION_ARGS) ...@@ -829,11 +794,12 @@ xpath_table(PG_FUNCTION_ARGS)
{ {
case XPATH_NODESET: case XPATH_NODESET:
/* We see if this nodeset has enough nodes */ /* We see if this nodeset has enough nodes */
if ((res->nodesetval != NULL) && (rownr < res->nodesetval->nodeNr)) if (res->nodesetval != NULL &&
rownr < res->nodesetval->nodeNr)
{ {
resstr = resstr =
xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]); xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
had_values = 1; had_values = true;
} }
else else
resstr = NULL; resstr = NULL;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment