Commit a868931f authored by Tom Lane's avatar Tom Lane

Fix insufficiently-paranoid GB18030 encoding verifier.

The previous coding effectively only verified that the second byte of a
multibyte character was in the expected range; moreover, it wasn't careful
to make sure that the second byte even exists in the buffer before touching
it.  The latter seems unlikely to cause any real problems in the field
(in particular, it could never be a problem with null-terminated input),
but it's still a bug.

Since GB18030 is not a supported backend encoding, the only thing we'd
really be doing with GB18030 text is converting it to UTF8 in LocalToUtf,
which would fail anyway on any invalid character for lack of a match in
its lookup table.  So the only user-visible consequence of this change
should be that you'll get "invalid byte sequence for encoding" rather than
"character has no equivalent" for malformed GB18030 input.  However,
impending changes to the GB18030 conversion code will require these tighter
up-front checks to avoid producing bogus results.
parent aff27e33
...@@ -1070,9 +1070,9 @@ pg_uhc_dsplen(const unsigned char *s) ...@@ -1070,9 +1070,9 @@ pg_uhc_dsplen(const unsigned char *s)
} }
/* /*
* * GB18030 * GB18030
* * Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp> * Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp>
* */ */
static int static int
pg_gb18030_mblen(const unsigned char *s) pg_gb18030_mblen(const unsigned char *s)
{ {
...@@ -1080,15 +1080,10 @@ pg_gb18030_mblen(const unsigned char *s) ...@@ -1080,15 +1080,10 @@ pg_gb18030_mblen(const unsigned char *s)
if (!IS_HIGHBIT_SET(*s)) if (!IS_HIGHBIT_SET(*s))
len = 1; /* ASCII */ len = 1; /* ASCII */
else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
len = 4;
else else
{ len = 2;
if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) || (*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
len = 2;
else if (*(s + 1) >= 0x30 && *(s + 1) <= 0x39)
len = 4;
else
len = 2;
}
return len; return len;
} }
...@@ -1403,21 +1398,32 @@ pg_uhc_verifier(const unsigned char *s, int len) ...@@ -1403,21 +1398,32 @@ pg_uhc_verifier(const unsigned char *s, int len)
static int static int
pg_gb18030_verifier(const unsigned char *s, int len) pg_gb18030_verifier(const unsigned char *s, int len)
{ {
int l, int l;
mbl;
l = mbl = pg_gb18030_mblen(s);
if (len < l)
return -1;
while (--l > 0) if (!IS_HIGHBIT_SET(*s))
l = 1; /* ASCII */
else if (len >= 4 && *(s + 1) >= 0x30 && *(s + 1) <= 0x39)
{ {
if (*++s == '\0') /* Should be 4-byte, validate remaining bytes */
return -1; if (*s >= 0x81 && *s <= 0xfe &&
*(s + 2) >= 0x81 && *(s + 2) <= 0xfe &&
*(s + 3) >= 0x30 && *(s + 3) <= 0x39)
l = 4;
else
l = -1;
} }
else if (len >= 2 && *s >= 0x81 && *s <= 0xfe)
return mbl; {
/* Should be 2-byte, validate */
if ((*(s + 1) >= 0x40 && *(s + 1) <= 0x7e) ||
(*(s + 1) >= 0x80 && *(s + 1) <= 0xfe))
l = 2;
else
l = -1;
}
else
l = -1;
return l;
} }
static int static int
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment