Commit 42f94f56 authored by Tom Lane's avatar Tom Lane

Fix incautious handling of possibly-miscoded strings in client code.

An incorrectly-encoded multibyte character near the end of a string
could cause various processing loops to run past the string's
terminating NUL, with results ranging from no detectable issue to
a program crash, depending on what happens to be in the following
memory.

This isn't an issue in the server, because we take care to verify
the encoding of strings before doing any interesting processing
on them.  However, that lack of care leaked into client-side code
which shouldn't assume that anyone has validated the encoding of
its input.

Although this is certainly a bug worth fixing, the PG security team
elected not to regard it as a security issue, primarily because
any untrusted text should be sanitized by PQescapeLiteral or
the like before being incorporated into a SQL or psql command.
(If an app fails to do so, the same technique can be used to
cause SQL injection, with probably much more dire consequences
than a mere client-program crash.)  Those functions were already
made proof against this class of problem, cf CVE-2006-2313.

To fix, invent PQmblenBounded() which is like PQmblen() except it
won't return more than the number of bytes remaining in the string.
In HEAD we can make this a new libpq function, as PQmblen() is.
It seems imprudent to change libpq's API in stable branches though,
so in the back branches define PQmblenBounded as a macro in the files
that need it.  (Note that just changing PQmblen's behavior would not
be a good idea; notably, it would completely break the escaping
functions' defense against this exact problem.  So we just want a
version for those callers that don't have any better way of handling
this issue.)

Per private report from houjingyi.  Back-patch to all supported branches.
parent 68a6d8a8
...@@ -1846,7 +1846,7 @@ skip_white_space(const char *query) ...@@ -1846,7 +1846,7 @@ skip_white_space(const char *query)
while (*query) while (*query)
{ {
int mblen = PQmblen(query, pset.encoding); int mblen = PQmblenBounded(query, pset.encoding);
/* /*
* Note: we assume the encoding is a superset of ASCII, so that for * Note: we assume the encoding is a superset of ASCII, so that for
...@@ -1883,7 +1883,7 @@ skip_white_space(const char *query) ...@@ -1883,7 +1883,7 @@ skip_white_space(const char *query)
query++; query++;
break; break;
} }
query += PQmblen(query, pset.encoding); query += PQmblenBounded(query, pset.encoding);
} }
} }
else if (cnestlevel > 0) else if (cnestlevel > 0)
...@@ -1918,7 +1918,7 @@ command_no_begin(const char *query) ...@@ -1918,7 +1918,7 @@ command_no_begin(const char *query)
*/ */
wordlen = 0; wordlen = 0;
while (isalpha((unsigned char) query[wordlen])) while (isalpha((unsigned char) query[wordlen]))
wordlen += PQmblen(&query[wordlen], pset.encoding); wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
/* /*
* Transaction control commands. These should include every keyword that * Transaction control commands. These should include every keyword that
...@@ -1949,7 +1949,7 @@ command_no_begin(const char *query) ...@@ -1949,7 +1949,7 @@ command_no_begin(const char *query)
wordlen = 0; wordlen = 0;
while (isalpha((unsigned char) query[wordlen])) while (isalpha((unsigned char) query[wordlen]))
wordlen += PQmblen(&query[wordlen], pset.encoding); wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
if (wordlen == 11 && pg_strncasecmp(query, "transaction", 11) == 0) if (wordlen == 11 && pg_strncasecmp(query, "transaction", 11) == 0)
return true; return true;
...@@ -1983,7 +1983,7 @@ command_no_begin(const char *query) ...@@ -1983,7 +1983,7 @@ command_no_begin(const char *query)
wordlen = 0; wordlen = 0;
while (isalpha((unsigned char) query[wordlen])) while (isalpha((unsigned char) query[wordlen]))
wordlen += PQmblen(&query[wordlen], pset.encoding); wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
if (wordlen == 8 && pg_strncasecmp(query, "database", 8) == 0) if (wordlen == 8 && pg_strncasecmp(query, "database", 8) == 0)
return true; return true;
...@@ -1999,7 +1999,7 @@ command_no_begin(const char *query) ...@@ -1999,7 +1999,7 @@ command_no_begin(const char *query)
wordlen = 0; wordlen = 0;
while (isalpha((unsigned char) query[wordlen])) while (isalpha((unsigned char) query[wordlen]))
wordlen += PQmblen(&query[wordlen], pset.encoding); wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
} }
if (wordlen == 5 && pg_strncasecmp(query, "index", 5) == 0) if (wordlen == 5 && pg_strncasecmp(query, "index", 5) == 0)
...@@ -2010,7 +2010,7 @@ command_no_begin(const char *query) ...@@ -2010,7 +2010,7 @@ command_no_begin(const char *query)
wordlen = 0; wordlen = 0;
while (isalpha((unsigned char) query[wordlen])) while (isalpha((unsigned char) query[wordlen]))
wordlen += PQmblen(&query[wordlen], pset.encoding); wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
if (wordlen == 12 && pg_strncasecmp(query, "concurrently", 12) == 0) if (wordlen == 12 && pg_strncasecmp(query, "concurrently", 12) == 0)
return true; return true;
...@@ -2027,7 +2027,7 @@ command_no_begin(const char *query) ...@@ -2027,7 +2027,7 @@ command_no_begin(const char *query)
wordlen = 0; wordlen = 0;
while (isalpha((unsigned char) query[wordlen])) while (isalpha((unsigned char) query[wordlen]))
wordlen += PQmblen(&query[wordlen], pset.encoding); wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
/* ALTER SYSTEM isn't allowed in xacts */ /* ALTER SYSTEM isn't allowed in xacts */
if (wordlen == 6 && pg_strncasecmp(query, "system", 6) == 0) if (wordlen == 6 && pg_strncasecmp(query, "system", 6) == 0)
...@@ -2050,7 +2050,7 @@ command_no_begin(const char *query) ...@@ -2050,7 +2050,7 @@ command_no_begin(const char *query)
wordlen = 0; wordlen = 0;
while (isalpha((unsigned char) query[wordlen])) while (isalpha((unsigned char) query[wordlen]))
wordlen += PQmblen(&query[wordlen], pset.encoding); wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
if (wordlen == 8 && pg_strncasecmp(query, "database", 8) == 0) if (wordlen == 8 && pg_strncasecmp(query, "database", 8) == 0)
return true; return true;
...@@ -2065,7 +2065,7 @@ command_no_begin(const char *query) ...@@ -2065,7 +2065,7 @@ command_no_begin(const char *query)
query = skip_white_space(query); query = skip_white_space(query);
wordlen = 0; wordlen = 0;
while (isalpha((unsigned char) query[wordlen])) while (isalpha((unsigned char) query[wordlen]))
wordlen += PQmblen(&query[wordlen], pset.encoding); wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
/* /*
* REINDEX [ TABLE | INDEX ] CONCURRENTLY are not allowed in * REINDEX [ TABLE | INDEX ] CONCURRENTLY are not allowed in
...@@ -2084,7 +2084,7 @@ command_no_begin(const char *query) ...@@ -2084,7 +2084,7 @@ command_no_begin(const char *query)
wordlen = 0; wordlen = 0;
while (isalpha((unsigned char) query[wordlen])) while (isalpha((unsigned char) query[wordlen]))
wordlen += PQmblen(&query[wordlen], pset.encoding); wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
if (wordlen == 12 && pg_strncasecmp(query, "concurrently", 12) == 0) if (wordlen == 12 && pg_strncasecmp(query, "concurrently", 12) == 0)
return true; return true;
...@@ -2104,7 +2104,7 @@ command_no_begin(const char *query) ...@@ -2104,7 +2104,7 @@ command_no_begin(const char *query)
wordlen = 0; wordlen = 0;
while (isalpha((unsigned char) query[wordlen])) while (isalpha((unsigned char) query[wordlen]))
wordlen += PQmblen(&query[wordlen], pset.encoding); wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
if (wordlen == 3 && pg_strncasecmp(query, "all", 3) == 0) if (wordlen == 3 && pg_strncasecmp(query, "all", 3) == 0)
return true; return true;
...@@ -2140,7 +2140,7 @@ is_select_command(const char *query) ...@@ -2140,7 +2140,7 @@ is_select_command(const char *query)
*/ */
wordlen = 0; wordlen = 0;
while (isalpha((unsigned char) query[wordlen])) while (isalpha((unsigned char) query[wordlen]))
wordlen += PQmblen(&query[wordlen], pset.encoding); wordlen += PQmblenBounded(&query[wordlen], pset.encoding);
if (wordlen == 6 && pg_strncasecmp(query, "select", 6) == 0) if (wordlen == 6 && pg_strncasecmp(query, "select", 6) == 0)
return true; return true;
......
...@@ -753,7 +753,7 @@ dequote_downcase_identifier(char *str, bool downcase, int encoding) ...@@ -753,7 +753,7 @@ dequote_downcase_identifier(char *str, bool downcase, int encoding)
{ {
if (downcase && !inquotes) if (downcase && !inquotes)
*cp = pg_tolower((unsigned char) *cp); *cp = pg_tolower((unsigned char) *cp);
cp += PQmblen(cp, encoding); cp += PQmblenBounded(cp, encoding);
} }
} }
} }
......
...@@ -143,7 +143,7 @@ strtokx(const char *s, ...@@ -143,7 +143,7 @@ strtokx(const char *s,
/* okay, we have a quoted token, now scan for the closer */ /* okay, we have a quoted token, now scan for the closer */
char thisquote = *p++; char thisquote = *p++;
for (; *p; p += PQmblen(p, encoding)) for (; *p; p += PQmblenBounded(p, encoding))
{ {
if (*p == escape && p[1] != '\0') if (*p == escape && p[1] != '\0')
p++; /* process escaped anything */ p++; /* process escaped anything */
...@@ -262,7 +262,7 @@ strip_quotes(char *source, char quote, char escape, int encoding) ...@@ -262,7 +262,7 @@ strip_quotes(char *source, char quote, char escape, int encoding)
else if (c == escape && src[1] != '\0') else if (c == escape && src[1] != '\0')
src++; /* process escaped character */ src++; /* process escaped character */
i = PQmblen(src, encoding); i = PQmblenBounded(src, encoding);
while (i--) while (i--)
*dst++ = *src++; *dst++ = *src++;
} }
...@@ -324,7 +324,7 @@ quote_if_needed(const char *source, const char *entails_quote, ...@@ -324,7 +324,7 @@ quote_if_needed(const char *source, const char *entails_quote,
else if (strchr(entails_quote, c)) else if (strchr(entails_quote, c))
need_quotes = true; need_quotes = true;
i = PQmblen(src, encoding); i = PQmblenBounded(src, encoding);
while (i--) while (i--)
*dst++ = *src++; *dst++ = *src++;
} }
......
...@@ -4397,7 +4397,7 @@ _complete_from_query(const char *simple_query, ...@@ -4397,7 +4397,7 @@ _complete_from_query(const char *simple_query,
while (*pstr) while (*pstr)
{ {
char_length++; char_length++;
pstr += PQmblen(pstr, pset.encoding); pstr += PQmblenBounded(pstr, pset.encoding);
} }
/* Free any prior result */ /* Free any prior result */
......
...@@ -52,7 +52,7 @@ splitTableColumnsSpec(const char *spec, int encoding, ...@@ -52,7 +52,7 @@ splitTableColumnsSpec(const char *spec, int encoding,
cp++; cp++;
} }
else else
cp += PQmblen(cp, encoding); cp += PQmblenBounded(cp, encoding);
} }
*table = pnstrdup(spec, cp - spec); *table = pnstrdup(spec, cp - spec);
*columns = cp; *columns = cp;
......
...@@ -740,7 +740,7 @@ json_lex_string(JsonLexContext *lex) ...@@ -740,7 +740,7 @@ json_lex_string(JsonLexContext *lex)
ch = (ch * 16) + (*s - 'A') + 10; ch = (ch * 16) + (*s - 'A') + 10;
else else
{ {
lex->token_terminator = s + pg_encoding_mblen(lex->input_encoding, s); lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s);
return JSON_UNICODE_ESCAPE_FORMAT; return JSON_UNICODE_ESCAPE_FORMAT;
} }
} }
...@@ -846,7 +846,7 @@ json_lex_string(JsonLexContext *lex) ...@@ -846,7 +846,7 @@ json_lex_string(JsonLexContext *lex)
default: default:
/* Not a valid string escape, so signal error. */ /* Not a valid string escape, so signal error. */
lex->token_start = s; lex->token_start = s;
lex->token_terminator = s + pg_encoding_mblen(lex->input_encoding, s); lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s);
return JSON_ESCAPING_INVALID; return JSON_ESCAPING_INVALID;
} }
} }
...@@ -860,7 +860,7 @@ json_lex_string(JsonLexContext *lex) ...@@ -860,7 +860,7 @@ json_lex_string(JsonLexContext *lex)
* shown it's not a performance win. * shown it's not a performance win.
*/ */
lex->token_start = s; lex->token_start = s;
lex->token_terminator = s + pg_encoding_mblen(lex->input_encoding, s); lex->token_terminator = s + pg_encoding_mblen_bounded(lex->input_encoding, s);
return JSON_ESCAPING_INVALID; return JSON_ESCAPING_INVALID;
} }
......
...@@ -1911,6 +1911,11 @@ const pg_wchar_tbl pg_wchar_table[] = { ...@@ -1911,6 +1911,11 @@ const pg_wchar_tbl pg_wchar_table[] = {
/* /*
* Returns the byte length of a multibyte character. * Returns the byte length of a multibyte character.
*
* Caution: when dealing with text that is not certainly valid in the
* specified encoding, the result may exceed the actual remaining
* string length. Callers that are not prepared to deal with that
* should use pg_encoding_mblen_bounded() instead.
*/ */
int int
pg_encoding_mblen(int encoding, const char *mbstr) pg_encoding_mblen(int encoding, const char *mbstr)
...@@ -1920,6 +1925,16 @@ pg_encoding_mblen(int encoding, const char *mbstr) ...@@ -1920,6 +1925,16 @@ pg_encoding_mblen(int encoding, const char *mbstr)
pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr)); pg_wchar_table[PG_SQL_ASCII].mblen((const unsigned char *) mbstr));
} }
/*
* Returns the byte length of a multibyte character; but not more than
* the distance to end of string.
*/
int
pg_encoding_mblen_bounded(int encoding, const char *mbstr)
{
return strnlen(mbstr, pg_encoding_mblen(encoding, mbstr));
}
/* /*
* Returns the display length of a multibyte character. * Returns the display length of a multibyte character.
*/ */
......
...@@ -3636,6 +3636,9 @@ strlen_max_width(unsigned char *str, int *target_width, int encoding) ...@@ -3636,6 +3636,9 @@ strlen_max_width(unsigned char *str, int *target_width, int encoding)
curr_width += char_width; curr_width += char_width;
str += PQmblen((char *) str, encoding); str += PQmblen((char *) str, encoding);
if (str > end) /* Don't overrun invalid string */
str = end;
} }
*target_width = curr_width; *target_width = curr_width;
......
...@@ -1072,12 +1072,9 @@ patternToSQLRegex(int encoding, PQExpBuffer dbnamebuf, PQExpBuffer schemabuf, ...@@ -1072,12 +1072,9 @@ patternToSQLRegex(int encoding, PQExpBuffer dbnamebuf, PQExpBuffer schemabuf,
appendPQExpBufferChar(curbuf, '\\'); appendPQExpBufferChar(curbuf, '\\');
else if (ch == '[' && cp[1] == ']') else if (ch == '[' && cp[1] == ']')
appendPQExpBufferChar(curbuf, '\\'); appendPQExpBufferChar(curbuf, '\\');
i = PQmblen(cp, encoding); i = PQmblenBounded(cp, encoding);
while (i-- && *cp) while (i--)
{ appendPQExpBufferChar(curbuf, *cp++);
appendPQExpBufferChar(curbuf, *cp);
cp++;
}
} }
} }
appendPQExpBufferStr(curbuf, ")$"); appendPQExpBufferStr(curbuf, ")$");
......
...@@ -574,6 +574,7 @@ extern int pg_valid_server_encoding_id(int encoding); ...@@ -574,6 +574,7 @@ extern int pg_valid_server_encoding_id(int encoding);
* earlier in this file are also available from libpgcommon. * earlier in this file are also available from libpgcommon.
*/ */
extern int pg_encoding_mblen(int encoding, const char *mbstr); extern int pg_encoding_mblen(int encoding, const char *mbstr);
extern int pg_encoding_mblen_bounded(int encoding, const char *mbstr);
extern int pg_encoding_dsplen(int encoding, const char *mbstr); extern int pg_encoding_dsplen(int encoding, const char *mbstr);
extern int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len); extern int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len);
extern int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len); extern int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len);
......
...@@ -184,3 +184,4 @@ PQexitPipelineMode 181 ...@@ -184,3 +184,4 @@ PQexitPipelineMode 181
PQpipelineSync 182 PQpipelineSync 182
PQpipelineStatus 183 PQpipelineStatus 183
PQtraceSetFlags 184 PQtraceSetFlags 184
PQmblenBounded 185
...@@ -1180,8 +1180,13 @@ pqSocketPoll(int sock, int forRead, int forWrite, time_t end_time) ...@@ -1180,8 +1180,13 @@ pqSocketPoll(int sock, int forRead, int forWrite, time_t end_time)
*/ */
/* /*
* returns the byte length of the character beginning at s, using the * Returns the byte length of the character beginning at s, using the
* specified encoding. * specified encoding.
*
* Caution: when dealing with text that is not certainly valid in the
* specified encoding, the result may exceed the actual remaining
* string length. Callers that are not prepared to deal with that
* should use PQmblenBounded() instead.
*/ */
int int
PQmblen(const char *s, int encoding) PQmblen(const char *s, int encoding)
...@@ -1190,7 +1195,17 @@ PQmblen(const char *s, int encoding) ...@@ -1190,7 +1195,17 @@ PQmblen(const char *s, int encoding)
} }
/* /*
* returns the display length of the character beginning at s, using the * Returns the byte length of the character beginning at s, using the
* specified encoding; but not more than the distance to end of string.
*/
int
PQmblenBounded(const char *s, int encoding)
{
return strnlen(s, pg_encoding_mblen(encoding, s));
}
/*
* Returns the display length of the character beginning at s, using the
* specified encoding. * specified encoding.
*/ */
int int
......
...@@ -365,7 +365,7 @@ do_field(const PQprintOpt *po, const PGresult *res, ...@@ -365,7 +365,7 @@ do_field(const PQprintOpt *po, const PGresult *res,
/* Detect whether field contains non-numeric data */ /* Detect whether field contains non-numeric data */
char ch = '0'; char ch = '0';
for (p = pval; *p; p += PQmblen(p, res->client_encoding)) for (p = pval; *p; p += PQmblenBounded(p, res->client_encoding))
{ {
ch = *p; ch = *p;
if (!((ch >= '0' && ch <= '9') || if (!((ch >= '0' && ch <= '9') ||
......
...@@ -1296,7 +1296,7 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding) ...@@ -1296,7 +1296,7 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
if (w <= 0) if (w <= 0)
w = 1; w = 1;
scroffset += w; scroffset += w;
qoffset += pg_encoding_mblen(encoding, &wquery[qoffset]); qoffset += PQmblenBounded(&wquery[qoffset], encoding);
} }
else else
{ {
...@@ -1364,7 +1364,7 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding) ...@@ -1364,7 +1364,7 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding)
* width. * width.
*/ */
scroffset = 0; scroffset = 0;
for (; i < msg->len; i += pg_encoding_mblen(encoding, &msg->data[i])) for (; i < msg->len; i += PQmblenBounded(&msg->data[i], encoding))
{ {
int w = pg_encoding_dsplen(encoding, &msg->data[i]); int w = pg_encoding_dsplen(encoding, &msg->data[i]);
......
...@@ -625,6 +625,9 @@ extern int PQlibVersion(void); ...@@ -625,6 +625,9 @@ extern int PQlibVersion(void);
/* Determine length of multibyte encoded char at *s */ /* Determine length of multibyte encoded char at *s */
extern int PQmblen(const char *s, int encoding); extern int PQmblen(const char *s, int encoding);
/* Same, but not more than the distance to the end of string s */
extern int PQmblenBounded(const char *s, int encoding);
/* Determine display length of multibyte encoded char at *s */ /* Determine display length of multibyte encoded char at *s */
extern int PQdsplen(const char *s, int encoding); extern int PQdsplen(const char *s, int encoding);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment