Commit 16e3ad5d authored by Tom Lane's avatar Tom Lane

Avoid using %c printf format for potentially non-ASCII characters.

Since %c only passes a C "char" to printf, it's incapable of dealing
with multibyte characters.  Passing just the first byte of such a
character leads to an output string that is visibly not correctly
encoded, resulting in undesirable behavior such as encoding conversion
failures while sending error messages to clients.

We've lived with this issue for a long time because it was inconvenient
to avoid in a portable fashion.  However, now that we always use our own
snprintf code, it's reasonable to use the %.*s format to print just one
possibly-multibyte character in a string.  (We previously avoided that
obvious-looking answer in order to work around glibc's bug #6530, cf
commits 54cd4f04 and ed437e2b.)

Hence, run around and fix a bunch of places that used %c to report
a character found in a user-supplied string.  For simplicity, I did
not touch places that were emitting non-user-facing debug messages,
or reporting catalog data that should always be ASCII.  (It's also
unclear how useful this approach could be in frontend code, where
it's less certain that we know what encoding we're dealing with.)

In passing, improve a couple of poorly-written error messages in
pageinspect/heapfuncs.c.

This is a longstanding issue, but I'm hesitant to back-patch because
of the impact on translatable message strings.  In any case this fix
would not work reliably before v12.

Tom Lane and Quan Zongliang

Discussion: https://postgr.es/m/a120087c-4c88-d9d4-1ec5-808d7a7f133d@gmail.com
parent 78c88767
...@@ -80,7 +80,9 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped) ...@@ -80,7 +80,9 @@ get_val(HSParser *state, bool ignoreeq, bool *escaped)
} }
else if (*(state->ptr) == '=' && !ignoreeq) else if (*(state->ptr) == '=' && !ignoreeq)
{ {
elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin)); elog(ERROR, "Syntax error near \"%.*s\" at position %d",
pg_mblen(state->ptr), state->ptr,
(int32) (state->ptr - state->begin));
} }
else if (*(state->ptr) == '\\') else if (*(state->ptr) == '\\')
{ {
...@@ -219,7 +221,9 @@ parse_hstore(HSParser *state) ...@@ -219,7 +221,9 @@ parse_hstore(HSParser *state)
} }
else if (!isspace((unsigned char) *(state->ptr))) else if (!isspace((unsigned char) *(state->ptr)))
{ {
elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin)); elog(ERROR, "Syntax error near \"%.*s\" at position %d",
pg_mblen(state->ptr), state->ptr,
(int32) (state->ptr - state->begin));
} }
} }
else if (st == WGT) else if (st == WGT)
...@@ -234,7 +238,9 @@ parse_hstore(HSParser *state) ...@@ -234,7 +238,9 @@ parse_hstore(HSParser *state)
} }
else else
{ {
elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin)); elog(ERROR, "Syntax error near \"%.*s\" at position %d",
pg_mblen(state->ptr), state->ptr,
(int32) (state->ptr - state->begin));
} }
} }
else if (st == WVAL) else if (st == WVAL)
...@@ -267,7 +273,9 @@ parse_hstore(HSParser *state) ...@@ -267,7 +273,9 @@ parse_hstore(HSParser *state)
} }
else if (!isspace((unsigned char) *(state->ptr))) else if (!isspace((unsigned char) *(state->ptr)))
{ {
elog(ERROR, "Syntax error near '%c' at position %d", *(state->ptr), (int32) (state->ptr - state->begin)); elog(ERROR, "Syntax error near \"%.*s\" at position %d",
pg_mblen(state->ptr), state->ptr,
(int32) (state->ptr - state->begin));
} }
} }
else else
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "catalog/pg_am_d.h" #include "catalog/pg_am_d.h"
#include "catalog/pg_type.h" #include "catalog/pg_type.h"
#include "funcapi.h" #include "funcapi.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "pageinspect.h" #include "pageinspect.h"
#include "port/pg_bitutils.h" #include "port/pg_bitutils.h"
...@@ -99,7 +100,8 @@ text_to_bits(char *str, int len) ...@@ -99,7 +100,8 @@ text_to_bits(char *str, int len)
else else
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED), (errcode(ERRCODE_DATA_CORRUPTED),
errmsg("illegal character '%c' in t_bits string", str[off]))); errmsg("invalid character \"%.*s\" in t_bits string",
pg_mblen(str + off), str + off)));
if (off % 8 == 7) if (off % 8 == 7)
bits[off / 8] = byte; bits[off / 8] = byte;
...@@ -460,14 +462,13 @@ tuple_data_split(PG_FUNCTION_ARGS) ...@@ -460,14 +462,13 @@ tuple_data_split(PG_FUNCTION_ARGS)
if (!t_bits_str) if (!t_bits_str)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED), (errcode(ERRCODE_DATA_CORRUPTED),
errmsg("argument of t_bits is null, but it is expected to be null and %d character long", errmsg("t_bits string must not be NULL")));
bits_len)));
bits_str_len = strlen(t_bits_str); bits_str_len = strlen(t_bits_str);
if (bits_len != bits_str_len) if (bits_len != bits_str_len)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED), (errcode(ERRCODE_DATA_CORRUPTED),
errmsg("unexpected length of t_bits %u, expected %d", errmsg("unexpected length of t_bits string: %u, expected %u",
bits_str_len, bits_len))); bits_str_len, bits_len)));
/* do the conversion */ /* do the conversion */
...@@ -478,7 +479,7 @@ tuple_data_split(PG_FUNCTION_ARGS) ...@@ -478,7 +479,7 @@ tuple_data_split(PG_FUNCTION_ARGS)
if (t_bits_str) if (t_bits_str)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED), (errcode(ERRCODE_DATA_CORRUPTED),
errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes length", errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes long",
strlen(t_bits_str)))); strlen(t_bits_str))));
} }
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <ctype.h> #include <ctype.h>
#include "mb/pg_wchar.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/memutils.h" #include "utils/memutils.h"
...@@ -171,17 +172,19 @@ hex_encode(const char *src, size_t len, char *dst) ...@@ -171,17 +172,19 @@ hex_encode(const char *src, size_t len, char *dst)
} }
static inline char static inline char
get_hex(char c) get_hex(const char *cp)
{ {
unsigned char c = (unsigned char) *cp;
int res = -1; int res = -1;
if (c > 0 && c < 127) if (c < 127)
res = hexlookup[(unsigned char) c]; res = hexlookup[c];
if (res < 0) if (res < 0)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid hexadecimal digit: \"%c\"", c))); errmsg("invalid hexadecimal digit: \"%.*s\"",
pg_mblen(cp), cp)));
return (char) res; return (char) res;
} }
...@@ -205,13 +208,15 @@ hex_decode(const char *src, size_t len, char *dst) ...@@ -205,13 +208,15 @@ hex_decode(const char *src, size_t len, char *dst)
s++; s++;
continue; continue;
} }
v1 = get_hex(*s++) << 4; v1 = get_hex(s) << 4;
s++;
if (s >= srcend) if (s >= srcend)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid hexadecimal data: odd number of digits"))); errmsg("invalid hexadecimal data: odd number of digits")));
v2 = get_hex(*s++); v2 = get_hex(s);
s++;
*p++ = v1 | v2; *p++ = v1 | v2;
} }
...@@ -338,7 +343,8 @@ pg_base64_decode(const char *src, size_t len, char *dst) ...@@ -338,7 +343,8 @@ pg_base64_decode(const char *src, size_t len, char *dst)
if (b < 0) if (b < 0)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid symbol \"%c\" while decoding base64 sequence", (int) c))); errmsg("invalid symbol \"%.*s\" found while decoding base64 sequence",
pg_mblen(s - 1), s - 1)));
} }
/* add it to buffer */ /* add it to buffer */
buf = (buf << 6) + b; buf = (buf << 6) + b;
......
...@@ -526,8 +526,8 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern, ...@@ -526,8 +526,8 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid input syntax for type %s", "jsonpath"), errmsg("invalid input syntax for type %s", "jsonpath"),
errdetail("unrecognized flag character \"%c\" in LIKE_REGEX predicate", errdetail("unrecognized flag character \"%.*s\" in LIKE_REGEX predicate",
flags->val[i]))); pg_mblen(flags->val + i), flags->val + i)));
break; break;
} }
} }
......
...@@ -423,8 +423,8 @@ parse_re_flags(pg_re_flags *flags, text *opts) ...@@ -423,8 +423,8 @@ parse_re_flags(pg_re_flags *flags, text *opts)
default: default:
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid regular expression option: \"%c\"", errmsg("invalid regular expression option: \"%.*s\"",
opt_p[i]))); pg_mblen(opt_p + i), opt_p + i)));
break; break;
} }
} }
......
...@@ -230,8 +230,8 @@ bit_in(PG_FUNCTION_ARGS) ...@@ -230,8 +230,8 @@ bit_in(PG_FUNCTION_ARGS)
else if (*sp != '0') else if (*sp != '0')
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("\"%c\" is not a valid binary digit", errmsg("\"%.*s\" is not a valid binary digit",
*sp))); pg_mblen(sp), sp)));
x >>= 1; x >>= 1;
if (x == 0) if (x == 0)
...@@ -255,8 +255,8 @@ bit_in(PG_FUNCTION_ARGS) ...@@ -255,8 +255,8 @@ bit_in(PG_FUNCTION_ARGS)
else else
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("\"%c\" is not a valid hexadecimal digit", errmsg("\"%.*s\" is not a valid hexadecimal digit",
*sp))); pg_mblen(sp), sp)));
if (bc) if (bc)
{ {
...@@ -531,8 +531,8 @@ varbit_in(PG_FUNCTION_ARGS) ...@@ -531,8 +531,8 @@ varbit_in(PG_FUNCTION_ARGS)
else if (*sp != '0') else if (*sp != '0')
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("\"%c\" is not a valid binary digit", errmsg("\"%.*s\" is not a valid binary digit",
*sp))); pg_mblen(sp), sp)));
x >>= 1; x >>= 1;
if (x == 0) if (x == 0)
...@@ -556,8 +556,8 @@ varbit_in(PG_FUNCTION_ARGS) ...@@ -556,8 +556,8 @@ varbit_in(PG_FUNCTION_ARGS)
else else
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("\"%c\" is not a valid hexadecimal digit", errmsg("\"%.*s\" is not a valid hexadecimal digit",
*sp))); pg_mblen(sp), sp)));
if (bc) if (bc)
{ {
......
...@@ -5586,8 +5586,8 @@ text_format(PG_FUNCTION_ARGS) ...@@ -5586,8 +5586,8 @@ text_format(PG_FUNCTION_ARGS)
if (strchr("sIL", *cp) == NULL) if (strchr("sIL", *cp) == NULL)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized format() type specifier \"%c\"", errmsg("unrecognized format() type specifier \"%.*s\"",
*cp), pg_mblen(cp), cp),
errhint("For a single \"%%\" use \"%%%%\"."))); errhint("For a single \"%%\" use \"%%%%\".")));
/* If indirect width was specified, get its value */ /* If indirect width was specified, get its value */
...@@ -5707,8 +5707,8 @@ text_format(PG_FUNCTION_ARGS) ...@@ -5707,8 +5707,8 @@ text_format(PG_FUNCTION_ARGS)
/* should not get here, because of previous check */ /* should not get here, because of previous check */
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unrecognized format() type specifier \"%c\"", errmsg("unrecognized format() type specifier \"%.*s\"",
*cp), pg_mblen(cp), cp),
errhint("For a single \"%%\" use \"%%%%\"."))); errhint("For a single \"%%\" use \"%%%%\".")));
break; break;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment