Commit c7d1a8d4 authored by Tom Lane's avatar Tom Lane

Fix some corner-case bugs in _sendSQLLine's parsing of SQL commands

> found in a pg_dump archive.  It had problems with dollar-quote tags
broken across bufferload boundaries (this may explain bug report from
Rod Taylor), also with dollar-quote literals of the form $a$a$...,
and was also confused about the rules for backslash in double quoted
identifiers (ie, they're not special).  Also put in placeholder support
for E'...' literals --- this will need more work later.
No related merge requests found
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_archiver.h,v 1.66 2005/07/27 12:44:10 neilc Exp $ * $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_archiver.h,v 1.67 2005/09/11 04:10:25 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -136,22 +136,24 @@ typedef struct _outputContext ...@@ -136,22 +136,24 @@ typedef struct _outputContext
typedef enum typedef enum
{ {
SQL_SCAN = 0, SQL_SCAN = 0, /* normal */
SQL_IN_SQL_COMMENT, SQL_IN_SQL_COMMENT, /* -- comment */
SQL_IN_EXT_COMMENT, SQL_IN_EXT_COMMENT, /* slash-star comment */
SQL_IN_QUOTE, SQL_IN_SINGLE_QUOTE, /* '...' literal */
SQL_IN_DOLLARTAG, SQL_IN_E_QUOTE, /* E'...' literal */
SQL_IN_DOLLARQUOTE SQL_IN_DOUBLE_QUOTE, /* "..." identifier */
SQL_IN_DOLLAR_TAG, /* possible dollar-quote starting tag */
SQL_IN_DOLLAR_QUOTE /* body of dollar quote */
} sqlparseState; } sqlparseState;
typedef struct typedef struct
{ {
int backSlash; sqlparseState state; /* see above */
sqlparseState state; char lastChar; /* preceding char, or '\0' initially */
char lastChar; bool backSlash; /* next char is backslash quoted? */
char quoteChar; int braceDepth; /* parenthesis nesting depth */
int braceDepth; PQExpBuffer tagBuf; /* dollar quote tag (NULL if not created) */
PQExpBuffer tagBuf; int minTagEndPos; /* first possible end position of $-quote */
} sqlparseInfo; } sqlparseInfo;
typedef enum typedef enum
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* Implements the basic DB functions used by the archiver. * Implements the basic DB functions used by the archiver.
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_db.c,v 1.64 2005/07/27 05:14:12 neilc Exp $ * $PostgreSQL: pgsql/src/bin/pg_dump/pg_backup_db.c,v 1.65 2005/09/11 04:10:25 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -36,8 +36,8 @@ static void notice_processor(void *arg, const char *message); ...@@ -36,8 +36,8 @@ static void notice_processor(void *arg, const char *message);
static char *_sendSQLLine(ArchiveHandle *AH, char *qry, char *eos); static char *_sendSQLLine(ArchiveHandle *AH, char *qry, char *eos);
static char *_sendCopyLine(ArchiveHandle *AH, char *qry, char *eos); static char *_sendCopyLine(ArchiveHandle *AH, char *qry, char *eos);
static int _isIdentChar(unsigned char c); static bool _isIdentChar(unsigned char c);
static int _isDQChar(unsigned char c, int atStart); static bool _isDQChar(unsigned char c, bool atStart);
#define DB_MAX_ERR_STMT 128 #define DB_MAX_ERR_STMT 128
...@@ -410,215 +410,187 @@ _sendCopyLine(ArchiveHandle *AH, char *qry, char *eos) ...@@ -410,215 +410,187 @@ _sendCopyLine(ArchiveHandle *AH, char *qry, char *eos)
} }
/* /*
* Used by ExecuteSqlCommandBuf to send one buffered line of SQL (not data for the copy command). * Used by ExecuteSqlCommandBuf to send one buffered line of SQL
* (not data for the copy command).
*/ */
static char * static char *
_sendSQLLine(ArchiveHandle *AH, char *qry, char *eos) _sendSQLLine(ArchiveHandle *AH, char *qry, char *eos)
{ {
int pos = 0; /* Current position */
char *sqlPtr;
int consumed;
int startDT = 0;
/* /*
* The following is a mini state machine to assess the end of an SQL * The following is a mini state machine to assess the end of an SQL
* statement. It really only needs to parse good SQL, or at least * statement. It really only needs to parse good SQL, or at least
* that's the theory... End-of-statement is assumed to be an unquoted, * that's the theory... End-of-statement is assumed to be an unquoted,
* un commented semi-colon. * un-commented semi-colon that's not within any parentheses.
*/ *
* Note: the input can be split into bufferloads at arbitrary boundaries.
/* * Therefore all state must be kept in AH->sqlparse, not in local
* fprintf(stderr, "Buffer at start is: '%s'\n\n", AH->sqlBuf->data); * variables of this routine. We assume that AH->sqlparse was
* filled with zeroes when created.
*/ */
for (; qry < eos; qry++)
for (pos = 0; pos < (eos - qry); pos++)
{ {
appendPQExpBufferChar(AH->sqlBuf, qry[pos]); switch (AH->sqlparse.state)
/* fprintf(stderr, " %c",qry[pos]); */
/* Loop until character consumed */
do
{ {
/* case SQL_SCAN: /* Default state == 0, set in _allocAH */
* If a character needs to be scanned in a different state, if (*qry == ';' && AH->sqlparse.braceDepth == 0)
* consumed can be set to 0 to avoid advancing. Care must be {
* taken to ensure internal state is not damaged.
*/
consumed = 1;
switch (AH->sqlparse.state)
{
case SQL_SCAN: /* Default state == 0, set in _allocAH */
if (qry[pos] == ';' && AH->sqlparse.braceDepth == 0)
{
/*
* We've got the end of a statement. Send It &
* reset the buffer.
*/
/*
* fprintf(stderr, " sending: '%s'\n\n",
* AH->sqlBuf->data);
*/
ExecuteSqlCommand(AH, AH->sqlBuf, "could not execute query");
resetPQExpBuffer(AH->sqlBuf);
AH->sqlparse.lastChar = '\0';
/*
* Remove any following newlines - so that
* embedded COPY commands don't get a starting
* newline.
*/
pos++;
for (; pos < (eos - qry) && qry[pos] == '\n'; pos++);
/* We've got our line, so exit */
return qry + pos;
}
else
{
/*
* Look for normal boring quote chars, or
* dollar-quotes. We make the assumption that
* $-quotes will not have an ident character
* before them in all pg_dump output.
*/
if (qry[pos] == '"'
|| qry[pos] == '\''
|| (qry[pos] == '$' && _isIdentChar(AH->sqlparse.lastChar) == 0)
)
{
/* fprintf(stderr,"[startquote]\n"); */
AH->sqlparse.state = SQL_IN_QUOTE;
AH->sqlparse.quoteChar = qry[pos];
AH->sqlparse.backSlash = 0;
if (qry[pos] == '$')
{
/* override the state */
AH->sqlparse.state = SQL_IN_DOLLARTAG;
/* Used for checking first char of tag */
startDT = 1;
/* We store the tag for later comparison. */
AH->sqlparse.tagBuf = createPQExpBuffer();
/* Get leading $ */
appendPQExpBufferChar(AH->sqlparse.tagBuf, qry[pos]);
}
}
else if (qry[pos] == '-' && AH->sqlparse.lastChar == '-')
AH->sqlparse.state = SQL_IN_SQL_COMMENT;
else if (qry[pos] == '*' && AH->sqlparse.lastChar == '/')
AH->sqlparse.state = SQL_IN_EXT_COMMENT;
else if (qry[pos] == '(')
AH->sqlparse.braceDepth++;
else if (qry[pos] == ')')
AH->sqlparse.braceDepth--;
AH->sqlparse.lastChar = qry[pos];
}
break;
case SQL_IN_DOLLARTAG:
/* /*
* Like a quote, we look for a closing char *but* we * We've found the end of a statement. Send it and
* only allow a very limited set of contained chars, * reset the buffer.
* and no escape chars. If invalid chars are found, we
* abort tag processing.
*/ */
appendPQExpBufferChar(AH->sqlBuf, ';'); /* inessential */
if (qry[pos] == '$') ExecuteSqlCommand(AH, AH->sqlBuf,
{ "could not execute query");
/* fprintf(stderr,"[endquote]\n"); */ resetPQExpBuffer(AH->sqlBuf);
/* Get trailing $ */ AH->sqlparse.lastChar = '\0';
appendPQExpBufferChar(AH->sqlparse.tagBuf, qry[pos]);
AH->sqlparse.state = SQL_IN_DOLLARQUOTE;
}
else
{
if (_isDQChar(qry[pos], startDT))
{
/* Valid, so add */
appendPQExpBufferChar(AH->sqlparse.tagBuf, qry[pos]);
}
else
{
/*
* Jump back to 'scan' state, we're not really
* in a tag, and valid tag chars do not
* include the various chars we look for in
* this state machine, so it's safe to just
* jump from this state back to SCAN. We set
* consumed = 0 so that this char gets
* rescanned in new state.
*/
destroyPQExpBuffer(AH->sqlparse.tagBuf);
AH->sqlparse.state = SQL_SCAN;
consumed = 0;
}
}
startDT = 0;
break;
case SQL_IN_DOLLARQUOTE:
/* /*
* Comparing the entire string backwards each time is * Remove any following newlines - so that
* NOT efficient, but dollar quotes in pg_dump are * embedded COPY commands don't get a starting newline.
* small and the code is a lot simpler.
*/ */
sqlPtr = AH->sqlBuf->data + AH->sqlBuf->len - AH->sqlparse.tagBuf->len; qry++;
while (qry < eos && *qry == '\n')
if (strncmp(AH->sqlparse.tagBuf->data, sqlPtr, AH->sqlparse.tagBuf->len) == 0) qry++;
{
/* End of $-quote */ /* We've finished one line, so exit */
AH->sqlparse.state = SQL_SCAN; return qry;
destroyPQExpBuffer(AH->sqlparse.tagBuf); }
} else if (*qry == '\'')
break; {
if (AH->sqlparse.lastChar == 'E')
case SQL_IN_SQL_COMMENT: AH->sqlparse.state = SQL_IN_E_QUOTE;
if (qry[pos] == '\n')
AH->sqlparse.state = SQL_SCAN;
break;
case SQL_IN_EXT_COMMENT:
if (AH->sqlparse.lastChar == '*' && qry[pos] == '/')
AH->sqlparse.state = SQL_SCAN;
break;
case SQL_IN_QUOTE:
if (!AH->sqlparse.backSlash && AH->sqlparse.quoteChar == qry[pos])
{
/* fprintf(stderr,"[endquote]\n"); */
AH->sqlparse.state = SQL_SCAN;
}
else else
{ AH->sqlparse.state = SQL_IN_SINGLE_QUOTE;
if (qry[pos] == '\\') AH->sqlparse.backSlash = false;
{ }
if (AH->sqlparse.lastChar == '\\') else if (*qry == '"')
AH->sqlparse.backSlash = !AH->sqlparse.backSlash; {
else AH->sqlparse.state = SQL_IN_DOUBLE_QUOTE;
AH->sqlparse.backSlash = 1; }
} /*
else * Look for dollar-quotes. We make the assumption that
AH->sqlparse.backSlash = 0; * $-quotes will not have an ident character just
} * before them in pg_dump output. XXX is this
break; * good enough?
*/
} else if (*qry == '$' && !_isIdentChar(AH->sqlparse.lastChar))
{
} while (consumed == 0); AH->sqlparse.state = SQL_IN_DOLLAR_TAG;
/* initialize separate buffer with possible tag */
if (AH->sqlparse.tagBuf == NULL)
AH->sqlparse.tagBuf = createPQExpBuffer();
else
resetPQExpBuffer(AH->sqlparse.tagBuf);
appendPQExpBufferChar(AH->sqlparse.tagBuf, *qry);
}
else if (*qry == '-' && AH->sqlparse.lastChar == '-')
AH->sqlparse.state = SQL_IN_SQL_COMMENT;
else if (*qry == '*' && AH->sqlparse.lastChar == '/')
AH->sqlparse.state = SQL_IN_EXT_COMMENT;
else if (*qry == '(')
AH->sqlparse.braceDepth++;
else if (*qry == ')')
AH->sqlparse.braceDepth--;
break;
case SQL_IN_SQL_COMMENT:
if (*qry == '\n')
AH->sqlparse.state = SQL_SCAN;
break;
case SQL_IN_EXT_COMMENT:
/*
* This isn't fully correct, because we don't account for
* nested slash-stars, but pg_dump never emits such.
*/
if (AH->sqlparse.lastChar == '*' && *qry == '/')
AH->sqlparse.state = SQL_SCAN;
break;
case SQL_IN_SINGLE_QUOTE:
/* We needn't handle '' specially */
if (*qry == '\'' && !AH->sqlparse.backSlash)
AH->sqlparse.state = SQL_SCAN;
else if (*qry == '\\')
AH->sqlparse.backSlash = !AH->sqlparse.backSlash;
else
AH->sqlparse.backSlash = false;
break;
case SQL_IN_E_QUOTE:
/*
* Eventually we will need to handle '' specially, because
* after E'...''... we should still be in E_QUOTE state.
*
* XXX problem: how do we tell whether the dump was made
* by a version that thinks backslashes aren't special
* in non-E literals??
*/
if (*qry == '\'' && !AH->sqlparse.backSlash)
AH->sqlparse.state = SQL_SCAN;
else if (*qry == '\\')
AH->sqlparse.backSlash = !AH->sqlparse.backSlash;
else
AH->sqlparse.backSlash = false;
break;
case SQL_IN_DOUBLE_QUOTE:
/* We needn't handle "" specially */
if (*qry == '"')
AH->sqlparse.state = SQL_SCAN;
break;
case SQL_IN_DOLLAR_TAG:
if (*qry == '$')
{
/* Do not add the closing $ to tagBuf */
AH->sqlparse.state = SQL_IN_DOLLAR_QUOTE;
AH->sqlparse.minTagEndPos = AH->sqlBuf->len + AH->sqlparse.tagBuf->len + 1;
}
else if (_isDQChar(*qry, (AH->sqlparse.tagBuf->len == 1)))
{
/* Valid, so add to tag */
appendPQExpBufferChar(AH->sqlparse.tagBuf, *qry);
}
else
{
/*
* Ooops, we're not really in a dollar-tag. Valid tag
* chars do not include the various chars we look for
* in this state machine, so it's safe to just jump
* from this state back to SCAN. We have to back up
* the qry pointer so that the current character gets
* rescanned in SCAN state; and then "continue" so that
* the bottom-of-loop actions aren't done yet.
*/
AH->sqlparse.state = SQL_SCAN;
qry--;
continue;
}
break;
case SQL_IN_DOLLAR_QUOTE:
/*
* If we are at a $, see whether what precedes it matches
* tagBuf. (Remember that the trailing $ of the tag was
* not added to tagBuf.) However, don't compare until we
* have enough data to be a possible match --- this is
* needed to avoid false match on '$a$a$...'
*/
if (*qry == '$' &&
AH->sqlBuf->len >= AH->sqlparse.minTagEndPos &&
strcmp(AH->sqlparse.tagBuf->data,
AH->sqlBuf->data + AH->sqlBuf->len - AH->sqlparse.tagBuf->len) == 0)
AH->sqlparse.state = SQL_SCAN;
break;
}
AH->sqlparse.lastChar = qry[pos]; appendPQExpBufferChar(AH->sqlBuf, *qry);
/* fprintf(stderr, "\n"); */ AH->sqlparse.lastChar = *qry;
} }
/* /*
* If we get here, we've processed entire string with no complete SQL * If we get here, we've processed entire bufferload with no complete SQL
* stmt * stmt
*/ */
return eos; return eos;
...@@ -673,7 +645,7 @@ CommitTransaction(ArchiveHandle *AH) ...@@ -673,7 +645,7 @@ CommitTransaction(ArchiveHandle *AH)
destroyPQExpBuffer(qry); destroyPQExpBuffer(qry);
} }
static int static bool
_isIdentChar(unsigned char c) _isIdentChar(unsigned char c)
{ {
if ((c >= 'a' && c <= 'z') if ((c >= 'a' && c <= 'z')
...@@ -684,22 +656,22 @@ _isIdentChar(unsigned char c) ...@@ -684,22 +656,22 @@ _isIdentChar(unsigned char c)
|| (c >= (unsigned char) '\200') /* no need to check <= || (c >= (unsigned char) '\200') /* no need to check <=
* \377 */ * \377 */
) )
return 1; return true;
else else
return 0; return false;
} }
static int static bool
_isDQChar(unsigned char c, int atStart) _isDQChar(unsigned char c, bool atStart)
{ {
if ((c >= 'a' && c <= 'z') if ((c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z') || (c >= 'A' && c <= 'Z')
|| (c == '_') || (c == '_')
|| (atStart == 0 && c >= '0' && c <= '9') || (!atStart && c >= '0' && c <= '9')
|| (c >= (unsigned char) '\200') /* no need to check <= || (c >= (unsigned char) '\200') /* no need to check <=
* \377 */ * \377 */
) )
return 1; return true;
else else
return 0; return false;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment