Commit 4e1f9860 authored by Bruce Momjian's avatar Bruce Momjian

I found the libpq function PGunescapeBytea a little slow. It was taking a

minute and a half to decode a 500Kb on a fairly fast machine. I think the
culprit is sscanf.

I attach a patch that replaces the function with one used to perform the same
task in pyPgSQL (a Python interface to PostgreSQL). This code was written by
Billy Allie, author of pyPgSQL. I've changed a few variable names to match
those in the original code and removed a bit of Pythonness.

Billy has kindly looked at the code and points out that it is slightly
stricter than the original implementation and if it encounters an invalid
bytea such as '\12C' it drops the unescape '\' and outputs '12C'.

The code is licensed by the author under a BSD license.

I've performed limited testing of the function by putting JPEGs into
PostgreSQL, extracting them using them using the new function and diffing
against the original files.

The new function is significantly faster on my machine with the JPEGs being
decoded in less than a second. I attach a modified libpq example program that
I used for my testing.

Ben Lamb.
parent 0abe7431
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/interfaces/libpq/fe-exec.c,v 1.137 2003/06/08 17:43:00 tgl Exp $ * $Header: /cvsroot/pgsql/src/interfaces/libpq/fe-exec.c,v 1.138 2003/06/12 01:17:19 momjian Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1690,6 +1690,8 @@ PQescapeBytea(const unsigned char *bintext, size_t binlen, size_t *bytealen) ...@@ -1690,6 +1690,8 @@ PQescapeBytea(const unsigned char *bintext, size_t binlen, size_t *bytealen)
return result; return result;
} }
#define VAL(CH) ((CH) - '0')
/* /*
* PQunescapeBytea - converts the null terminated string representation * PQunescapeBytea - converts the null terminated string representation
* of a bytea, strtext, into binary, filling a buffer. It returns a * of a bytea, strtext, into binary, filling a buffer. It returns a
...@@ -1697,99 +1699,64 @@ PQescapeBytea(const unsigned char *bintext, size_t binlen, size_t *bytealen) ...@@ -1697,99 +1699,64 @@ PQescapeBytea(const unsigned char *bintext, size_t binlen, size_t *bytealen)
* buffer in retbuflen. The pointer may subsequently be used as an * buffer in retbuflen. The pointer may subsequently be used as an
* argument to the function free(3). It is the reverse of PQescapeBytea. * argument to the function free(3). It is the reverse of PQescapeBytea.
* *
* The following transformations are reversed: * The following transformations are made:
* '\0' == ASCII 0 == \000 * \' == ASCII 39 == '
* '\'' == ASCII 39 == \' * \\ == ASCII 92 == \
* '\\' == ASCII 92 == \\ * \ooo == a byte whose value = ooo (ooo is an octal number)
* \x == x (x is any character not matched by the above transformations)
* *
* States:
* 0 normal 0->1->2->3->4
* 1 \ 1->5
* 2 \0 1->6
* 3 \00
* 4 \000
* 5 \'
* 6 \\
*/ */
unsigned char * unsigned char *
PQunescapeBytea(const unsigned char *strtext, size_t *retbuflen) PQunescapeBytea(const unsigned char *strtext, size_t *retbuflen)
{ {
size_t buflen; size_t strtextlen, buflen;
unsigned char *buffer, unsigned char *buffer, *tmpbuf;
*bp; int i, j, byte;
const unsigned char *sp;
unsigned int state = 0;
if (strtext == NULL) if (strtext == NULL) {
return NULL; return NULL;
buflen = strlen(strtext); /* will shrink, also we discover if }
* strtext */
buffer = (unsigned char *) malloc(buflen); /* isn't NULL terminated */ strtextlen = strlen(strtext); /* will shrink, also we discover if
* strtext isn't NULL terminated */
buffer = (unsigned char *)malloc(strtextlen);
if (buffer == NULL) if (buffer == NULL)
return NULL; return NULL;
for (bp = buffer, sp = strtext; *sp != '\0'; bp++, sp++)
for (i = j = buflen = 0; i < strtextlen;)
{ {
switch (state) switch (strtext[i])
{ {
case 0: case '\\':
if (*sp == '\\') i++;
state = 1; if (strtext[i] == '\\')
*bp = *sp; buffer[j++] = strtext[i++];
break;
case 1:
if (*sp == '\'') /* state=5 */
{ /* replace \' with 39 */
bp--;
*bp = '\'';
buflen--;
state = 0;
}
else if (*sp == '\\') /* state=6 */
{ /* replace \\ with 92 */
bp--;
*bp = '\\';
buflen--;
state = 0;
}
else else
{ {
if (isdigit(*sp)) if ((isdigit(strtext[i])) &&
state = 2; (isdigit(strtext[i+1])) &&
else (isdigit(strtext[i+2])))
state = 0; {
*bp = *sp; byte = VAL(strtext[i++]);
byte = (byte << 3) + VAL(strtext[i++]);
buffer[j++] = (byte << 3) + VAL(strtext[i++]);
}
} }
break; break;
case 2:
if (isdigit(*sp))
state = 3;
else
state = 0;
*bp = *sp;
break;
case 3:
if (isdigit(*sp)) /* state=4 */
{
int v;
bp -= 3; default:
sscanf(sp - 2, "%03o", &v); buffer[j++] = strtext[i++];
*bp = v;
buflen -= 3;
state = 0;
}
else
{
*bp = *sp;
state = 0;
}
break;
} }
} }
buffer = realloc(buffer, buflen); buflen = j; /* buflen is the length of the unquoted data */
if (buffer == NULL) tmpbuf = realloc(buffer, buflen);
return NULL;
if (!tmpbuf)
{
free(buffer);
return 0;
}
*retbuflen = buflen; *retbuflen = buflen;
return buffer; return tmpbuf;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment