Commit b80e1063 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Add mbverifystr() functions specific to each encoding.

This makes pg_verify_mbstr() function faster, by allowing more efficient
encoding-specific implementations. All the implementations included in
this commit are pretty naive, they just call the same encoding-specific
verifychar functions that were used previously, but that already gives a
performance boost because the tight character-at-a-time loop is simpler.

Reviewed-by: John Naylor
Discussion: https://www.postgresql.org/message-id/e7861509-3960-538a-9025-b75a61188e01@iki.fi
parent a3367aa3
...@@ -682,7 +682,7 @@ read_extension_script_file(const ExtensionControlFile *control, ...@@ -682,7 +682,7 @@ read_extension_script_file(const ExtensionControlFile *control,
src_encoding = control->encoding; src_encoding = control->encoding;
/* make sure that source string is valid in the expected encoding */ /* make sure that source string is valid in the expected encoding */
pg_verify_mbstr_len(src_encoding, src_str, len, false); (void) pg_verify_mbstr(src_encoding, src_str, len, false);
/* /*
* Convert the encoding to the database encoding. read_whole_file * Convert the encoding to the database encoding. read_whole_file
......
...@@ -653,7 +653,7 @@ LocalToUtf(const unsigned char *iso, int len, ...@@ -653,7 +653,7 @@ LocalToUtf(const unsigned char *iso, int len,
continue; continue;
} }
l = pg_encoding_verifymb(encoding, (const char *) iso, len); l = pg_encoding_verifymbchar(encoding, (const char *) iso, len);
if (l < 0) if (l < 0)
break; break;
......
...@@ -87,7 +87,7 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len) ...@@ -87,7 +87,7 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
continue; continue;
} }
l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len); l = pg_encoding_verifymbchar(PG_EUC_JIS_2004, (const char *) euc, len);
if (l < 0) if (l < 0)
report_invalid_encoding(PG_EUC_JIS_2004, report_invalid_encoding(PG_EUC_JIS_2004,
...@@ -238,7 +238,7 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len ...@@ -238,7 +238,7 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
continue; continue;
} }
l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len); l = pg_encoding_verifymbchar(PG_SHIFT_JIS_2004, (const char *) sjis, len);
if (l < 0 || l > len) if (l < 0 || l > len)
report_invalid_encoding(PG_SHIFT_JIS_2004, report_invalid_encoding(PG_SHIFT_JIS_2004,
......
...@@ -291,7 +291,7 @@ mic2sjis(const unsigned char *mic, unsigned char *p, int len) ...@@ -291,7 +291,7 @@ mic2sjis(const unsigned char *mic, unsigned char *p, int len)
len--; len--;
continue; continue;
} }
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len); l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0) if (l < 0)
report_invalid_encoding(PG_MULE_INTERNAL, report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len); (const char *) mic, len);
...@@ -381,7 +381,7 @@ euc_jp2mic(const unsigned char *euc, unsigned char *p, int len) ...@@ -381,7 +381,7 @@ euc_jp2mic(const unsigned char *euc, unsigned char *p, int len)
len--; len--;
continue; continue;
} }
l = pg_encoding_verifymb(PG_EUC_JP, (const char *) euc, len); l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
if (l < 0) if (l < 0)
report_invalid_encoding(PG_EUC_JP, report_invalid_encoding(PG_EUC_JP,
(const char *) euc, len); (const char *) euc, len);
...@@ -431,7 +431,7 @@ mic2euc_jp(const unsigned char *mic, unsigned char *p, int len) ...@@ -431,7 +431,7 @@ mic2euc_jp(const unsigned char *mic, unsigned char *p, int len)
len--; len--;
continue; continue;
} }
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len); l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0) if (l < 0)
report_invalid_encoding(PG_MULE_INTERNAL, report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len); (const char *) mic, len);
...@@ -485,7 +485,7 @@ euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len) ...@@ -485,7 +485,7 @@ euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len)
len--; len--;
continue; continue;
} }
l = pg_encoding_verifymb(PG_EUC_JP, (const char *) euc, len); l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
if (l < 0) if (l < 0)
report_invalid_encoding(PG_EUC_JP, report_invalid_encoding(PG_EUC_JP,
(const char *) euc, len); (const char *) euc, len);
...@@ -580,7 +580,7 @@ sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len) ...@@ -580,7 +580,7 @@ sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len)
len--; len--;
continue; continue;
} }
l = pg_encoding_verifymb(PG_SJIS, (const char *) sjis, len); l = pg_encoding_verifymbchar(PG_SJIS, (const char *) sjis, len);
if (l < 0) if (l < 0)
report_invalid_encoding(PG_SJIS, report_invalid_encoding(PG_SJIS,
(const char *) sjis, len); (const char *) sjis, len);
......
...@@ -76,7 +76,7 @@ euc_kr2mic(const unsigned char *euc, unsigned char *p, int len) ...@@ -76,7 +76,7 @@ euc_kr2mic(const unsigned char *euc, unsigned char *p, int len)
c1 = *euc; c1 = *euc;
if (IS_HIGHBIT_SET(c1)) if (IS_HIGHBIT_SET(c1))
{ {
l = pg_encoding_verifymb(PG_EUC_KR, (const char *) euc, len); l = pg_encoding_verifymbchar(PG_EUC_KR, (const char *) euc, len);
if (l != 2) if (l != 2)
report_invalid_encoding(PG_EUC_KR, report_invalid_encoding(PG_EUC_KR,
(const char *) euc, len); (const char *) euc, len);
...@@ -122,7 +122,7 @@ mic2euc_kr(const unsigned char *mic, unsigned char *p, int len) ...@@ -122,7 +122,7 @@ mic2euc_kr(const unsigned char *mic, unsigned char *p, int len)
len--; len--;
continue; continue;
} }
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len); l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0) if (l < 0)
report_invalid_encoding(PG_MULE_INTERNAL, report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len); (const char *) mic, len);
......
...@@ -148,7 +148,7 @@ euc_tw2mic(const unsigned char *euc, unsigned char *p, int len) ...@@ -148,7 +148,7 @@ euc_tw2mic(const unsigned char *euc, unsigned char *p, int len)
c1 = *euc; c1 = *euc;
if (IS_HIGHBIT_SET(c1)) if (IS_HIGHBIT_SET(c1))
{ {
l = pg_encoding_verifymb(PG_EUC_TW, (const char *) euc, len); l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
if (l < 0) if (l < 0)
report_invalid_encoding(PG_EUC_TW, report_invalid_encoding(PG_EUC_TW,
(const char *) euc, len); (const char *) euc, len);
...@@ -213,7 +213,7 @@ mic2euc_tw(const unsigned char *mic, unsigned char *p, int len) ...@@ -213,7 +213,7 @@ mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
len--; len--;
continue; continue;
} }
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len); l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0) if (l < 0)
report_invalid_encoding(PG_MULE_INTERNAL, report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len); (const char *) mic, len);
...@@ -272,7 +272,7 @@ big52mic(const unsigned char *big5, unsigned char *p, int len) ...@@ -272,7 +272,7 @@ big52mic(const unsigned char *big5, unsigned char *p, int len)
len--; len--;
continue; continue;
} }
l = pg_encoding_verifymb(PG_BIG5, (const char *) big5, len); l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
if (l < 0) if (l < 0)
report_invalid_encoding(PG_BIG5, report_invalid_encoding(PG_BIG5,
(const char *) big5, len); (const char *) big5, len);
...@@ -321,7 +321,7 @@ mic2big5(const unsigned char *mic, unsigned char *p, int len) ...@@ -321,7 +321,7 @@ mic2big5(const unsigned char *mic, unsigned char *p, int len)
len--; len--;
continue; continue;
} }
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len); l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
if (l < 0) if (l < 0)
report_invalid_encoding(PG_MULE_INTERNAL, report_invalid_encoding(PG_MULE_INTERNAL,
(const char *) mic, len); (const char *) mic, len);
......
...@@ -519,7 +519,7 @@ pg_convert(PG_FUNCTION_ARGS) ...@@ -519,7 +519,7 @@ pg_convert(PG_FUNCTION_ARGS)
/* make sure that source string is valid */ /* make sure that source string is valid */
len = VARSIZE_ANY_EXHDR(string); len = VARSIZE_ANY_EXHDR(string);
src_str = VARDATA_ANY(string); src_str = VARDATA_ANY(string);
pg_verify_mbstr_len(src_encoding, src_str, len, false); (void) pg_verify_mbstr(src_encoding, src_str, len, false);
/* perform conversion */ /* perform conversion */
dest_str = (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, src_str), dest_str = (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, src_str),
...@@ -1215,10 +1215,10 @@ static bool ...@@ -1215,10 +1215,10 @@ static bool
pg_generic_charinc(unsigned char *charptr, int len) pg_generic_charinc(unsigned char *charptr, int len)
{ {
unsigned char *lastbyte = charptr + len - 1; unsigned char *lastbyte = charptr + len - 1;
mbverifier mbverify; mbchar_verifier mbverify;
/* We can just invoke the character verifier directly. */ /* We can just invoke the character verifier directly. */
mbverify = pg_wchar_table[GetDatabaseEncoding()].mbverify; mbverify = pg_wchar_table[GetDatabaseEncoding()].mbverifychar;
while (*lastbyte < (unsigned char) 255) while (*lastbyte < (unsigned char) 255)
{ {
...@@ -1445,8 +1445,7 @@ pg_database_encoding_max_length(void) ...@@ -1445,8 +1445,7 @@ pg_database_encoding_max_length(void)
bool bool
pg_verifymbstr(const char *mbstr, int len, bool noError) pg_verifymbstr(const char *mbstr, int len, bool noError)
{ {
return return pg_verify_mbstr(GetDatabaseEncoding(), mbstr, len, noError);
pg_verify_mbstr_len(GetDatabaseEncoding(), mbstr, len, noError) >= 0;
} }
/* /*
...@@ -1456,7 +1455,18 @@ pg_verifymbstr(const char *mbstr, int len, bool noError) ...@@ -1456,7 +1455,18 @@ pg_verifymbstr(const char *mbstr, int len, bool noError)
bool bool
pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError) pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
{ {
return pg_verify_mbstr_len(encoding, mbstr, len, noError) >= 0; int oklen;
Assert(PG_VALID_ENCODING(encoding));
oklen = pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len);
if (oklen != len)
{
if (noError)
return false;
report_invalid_encoding(encoding, mbstr + oklen, len - oklen);
}
return true;
} }
/* /*
...@@ -1469,11 +1479,14 @@ pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError) ...@@ -1469,11 +1479,14 @@ pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
* If OK, return length of string in the encoding. * If OK, return length of string in the encoding.
* If a problem is found, return -1 when noError is * If a problem is found, return -1 when noError is
* true; when noError is false, ereport() a descriptive message. * true; when noError is false, ereport() a descriptive message.
*
* Note: We cannot use the faster encoding-specific mbverifystr() function
* here, because we need to count the number of characters in the string.
*/ */
int int
pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError) pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
{ {
mbverifier mbverify; mbchar_verifier mbverifychar;
int mb_len; int mb_len;
Assert(PG_VALID_ENCODING(encoding)); Assert(PG_VALID_ENCODING(encoding));
...@@ -1493,7 +1506,7 @@ pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError) ...@@ -1493,7 +1506,7 @@ pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
} }
/* fetch function pointer just once */ /* fetch function pointer just once */
mbverify = pg_wchar_table[encoding].mbverify; mbverifychar = pg_wchar_table[encoding].mbverifychar;
mb_len = 0; mb_len = 0;
...@@ -1516,7 +1529,7 @@ pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError) ...@@ -1516,7 +1529,7 @@ pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
report_invalid_encoding(encoding, mbstr, len); report_invalid_encoding(encoding, mbstr, len);
} }
l = (*mbverify) ((const unsigned char *) mbstr, len); l = (*mbverifychar) ((const unsigned char *) mbstr, len);
if (l < 0) if (l < 0)
{ {
......
This diff is collapsed.
...@@ -371,7 +371,9 @@ typedef int (*mbdisplaylen_converter) (const unsigned char *mbstr); ...@@ -371,7 +371,9 @@ typedef int (*mbdisplaylen_converter) (const unsigned char *mbstr);
typedef bool (*mbcharacter_incrementer) (unsigned char *mbstr, int len); typedef bool (*mbcharacter_incrementer) (unsigned char *mbstr, int len);
typedef int (*mbverifier) (const unsigned char *mbstr, int len); typedef int (*mbchar_verifier) (const unsigned char *mbstr, int len);
typedef int (*mbstr_verifier) (const unsigned char *mbstr, int len);
typedef struct typedef struct
{ {
...@@ -381,7 +383,8 @@ typedef struct ...@@ -381,7 +383,8 @@ typedef struct
* to a multibyte */ * to a multibyte */
mblen_converter mblen; /* get byte length of a char */ mblen_converter mblen; /* get byte length of a char */
mbdisplaylen_converter dsplen; /* get display width of a char */ mbdisplaylen_converter dsplen; /* get display width of a char */
mbverifier mbverify; /* verify multibyte sequence */ mbchar_verifier mbverifychar; /* verify multibyte character */
mbstr_verifier mbverifystr; /* verify multibyte string */
int maxmblen; /* max bytes for a char in this encoding */ int maxmblen; /* max bytes for a char in this encoding */
} pg_wchar_tbl; } pg_wchar_tbl;
...@@ -554,7 +557,8 @@ extern int pg_valid_server_encoding_id(int encoding); ...@@ -554,7 +557,8 @@ extern int pg_valid_server_encoding_id(int encoding);
*/ */
extern int pg_encoding_mblen(int encoding, const char *mbstr); extern int pg_encoding_mblen(int encoding, const char *mbstr);
extern int pg_encoding_dsplen(int encoding, const char *mbstr); extern int pg_encoding_dsplen(int encoding, const char *mbstr);
extern int pg_encoding_verifymb(int encoding, const char *mbstr, int len); extern int pg_encoding_verifymbchar(int encoding, const char *mbstr, int len);
extern int pg_encoding_verifymbstr(int encoding, const char *mbstr, int len);
extern int pg_encoding_max_length(int encoding); extern int pg_encoding_max_length(int encoding);
extern int pg_valid_client_encoding(const char *name); extern int pg_valid_client_encoding(const char *name);
extern int pg_valid_server_encoding(const char *name); extern int pg_valid_server_encoding(const char *name);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment