Commit 93e8ff87 authored by Michael Paquier's avatar Michael Paquier

Refactor logic to check for ASCII-only characters in string

The same logic was present for collation commands, SASLprep and
pgcrypto, so this removes some code.

Author: Michael Paquier
Reviewed-by: Stephen Frost, Heikki Linnakangas
Discussion: https://postgr.es/m/X9womIn6rne6Gud2@paquier.xyz
parent 4e1ee79e
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include "postgres.h" #include "postgres.h"
#include "catalog/pg_type.h" #include "catalog/pg_type.h"
#include "common/string.h"
#include "funcapi.h" #include "funcapi.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "mb/pg_wchar.h" #include "mb/pg_wchar.h"
...@@ -92,19 +93,6 @@ convert_to_utf8(text *src) ...@@ -92,19 +93,6 @@ convert_to_utf8(text *src)
return convert_charset(src, GetDatabaseEncoding(), PG_UTF8); return convert_charset(src, GetDatabaseEncoding(), PG_UTF8);
} }
static bool
string_is_ascii(const char *str)
{
const char *p;
for (p = str; *p; p++)
{
if (IS_HIGHBIT_SET(*p))
return false;
}
return true;
}
static void static void
clear_and_pfree(text *p) clear_and_pfree(text *p)
{ {
...@@ -814,7 +802,7 @@ parse_key_value_arrays(ArrayType *key_array, ArrayType *val_array, ...@@ -814,7 +802,7 @@ parse_key_value_arrays(ArrayType *key_array, ArrayType *val_array,
v = TextDatumGetCString(key_datums[i]); v = TextDatumGetCString(key_datums[i]);
if (!string_is_ascii(v)) if (!pg_is_ascii(v))
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("header key must not contain non-ASCII characters"))); errmsg("header key must not contain non-ASCII characters")));
...@@ -836,7 +824,7 @@ parse_key_value_arrays(ArrayType *key_array, ArrayType *val_array, ...@@ -836,7 +824,7 @@ parse_key_value_arrays(ArrayType *key_array, ArrayType *val_array,
v = TextDatumGetCString(val_datums[i]); v = TextDatumGetCString(val_datums[i]);
if (!string_is_ascii(v)) if (!pg_is_ascii(v))
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("header value must not contain non-ASCII characters"))); errmsg("header value must not contain non-ASCII characters")));
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "commands/comment.h" #include "commands/comment.h"
#include "commands/dbcommands.h" #include "commands/dbcommands.h"
#include "commands/defrem.h" #include "commands/defrem.h"
#include "common/string.h"
#include "mb/pg_wchar.h" #include "mb/pg_wchar.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "utils/acl.h" #include "utils/acl.h"
...@@ -286,23 +287,6 @@ pg_collation_actual_version(PG_FUNCTION_ARGS) ...@@ -286,23 +287,6 @@ pg_collation_actual_version(PG_FUNCTION_ARGS)
#define READ_LOCALE_A_OUTPUT #define READ_LOCALE_A_OUTPUT
#endif #endif
#if defined(READ_LOCALE_A_OUTPUT) || defined(USE_ICU)
/*
* Check a string to see if it is pure ASCII
*/
static bool
is_all_ascii(const char *str)
{
while (*str)
{
if (IS_HIGHBIT_SET(*str))
return false;
str++;
}
return true;
}
#endif /* READ_LOCALE_A_OUTPUT || USE_ICU */
#ifdef READ_LOCALE_A_OUTPUT #ifdef READ_LOCALE_A_OUTPUT
/* /*
* "Normalize" a libc locale name, stripping off encoding tags such as * "Normalize" a libc locale name, stripping off encoding tags such as
...@@ -396,7 +380,7 @@ get_icu_locale_comment(const char *localename) ...@@ -396,7 +380,7 @@ get_icu_locale_comment(const char *localename)
if (U_FAILURE(status)) if (U_FAILURE(status))
return NULL; /* no good reason to raise an error */ return NULL; /* no good reason to raise an error */
/* Check for non-ASCII comment (can't use is_all_ascii for this) */ /* Check for non-ASCII comment (can't use pg_is_ascii for this) */
for (i = 0; i < len_uchar; i++) for (i = 0; i < len_uchar; i++)
{ {
if (displayname[i] > 127) if (displayname[i] > 127)
...@@ -477,7 +461,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) ...@@ -477,7 +461,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
* interpret the non-ASCII characters. We can't do much with * interpret the non-ASCII characters. We can't do much with
* those, so we filter them out. * those, so we filter them out.
*/ */
if (!is_all_ascii(localebuf)) if (!pg_is_ascii(localebuf))
{ {
elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf); elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf);
continue; continue;
...@@ -623,7 +607,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS) ...@@ -623,7 +607,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
* Be paranoid about not allowing any non-ASCII strings into * Be paranoid about not allowing any non-ASCII strings into
* pg_collation * pg_collation
*/ */
if (!is_all_ascii(langtag) || !is_all_ascii(collcollate)) if (!pg_is_ascii(langtag) || !pg_is_ascii(collcollate))
continue; continue;
collid = CollationCreate(psprintf("%s-x-icu", langtag), collid = CollationCreate(psprintf("%s-x-icu", langtag),
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#endif #endif
#include "common/saslprep.h" #include "common/saslprep.h"
#include "common/string.h"
#include "common/unicode_norm.h" #include "common/unicode_norm.h"
#include "mb/pg_wchar.h" #include "mb/pg_wchar.h"
...@@ -47,7 +48,6 @@ ...@@ -47,7 +48,6 @@
static int codepoint_range_cmp(const void *a, const void *b); static int codepoint_range_cmp(const void *a, const void *b);
static bool is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize); static bool is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize);
static int pg_utf8_string_len(const char *source); static int pg_utf8_string_len(const char *source);
static bool pg_is_ascii_string(const char *p);
/* /*
* Stringprep Mapping Tables. * Stringprep Mapping Tables.
...@@ -1019,21 +1019,6 @@ pg_utf8_string_len(const char *source) ...@@ -1019,21 +1019,6 @@ pg_utf8_string_len(const char *source)
return num_chars; return num_chars;
} }
/*
* Returns true if the input string is pure ASCII.
*/
static bool
pg_is_ascii_string(const char *p)
{
while (*p)
{
if (IS_HIGHBIT_SET(*p))
return false;
p++;
}
return true;
}
/* /*
* pg_saslprep - Normalize a password with SASLprep. * pg_saslprep - Normalize a password with SASLprep.
...@@ -1076,7 +1061,7 @@ pg_saslprep(const char *input, char **output) ...@@ -1076,7 +1061,7 @@ pg_saslprep(const char *input, char **output)
* Quick check if the input is pure ASCII. An ASCII string requires no * Quick check if the input is pure ASCII. An ASCII string requires no
* further processing. * further processing.
*/ */
if (pg_is_ascii_string(input)) if (pg_is_ascii(input))
{ {
*output = STRDUP(input); *output = STRDUP(input);
if (!(*output)) if (!(*output))
......
...@@ -92,6 +92,22 @@ pg_clean_ascii(char *str) ...@@ -92,6 +92,22 @@ pg_clean_ascii(char *str)
} }
/*
* pg_is_ascii -- Check if string is made only of ASCII characters
*/
bool
pg_is_ascii(const char *str)
{
while (*str)
{
if (IS_HIGHBIT_SET(*str))
return false;
str++;
}
return true;
}
/* /*
* pg_strip_crlf -- Remove any trailing newline and carriage return * pg_strip_crlf -- Remove any trailing newline and carriage return
* *
......
...@@ -18,6 +18,7 @@ extern int strtoint(const char *pg_restrict str, char **pg_restrict endptr, ...@@ -18,6 +18,7 @@ extern int strtoint(const char *pg_restrict str, char **pg_restrict endptr,
int base); int base);
extern void pg_clean_ascii(char *str); extern void pg_clean_ascii(char *str);
extern int pg_strip_crlf(char *str); extern int pg_strip_crlf(char *str);
extern bool pg_is_ascii(const char *str);
/* functions in src/common/pg_get_line.c */ /* functions in src/common/pg_get_line.c */
extern char *pg_get_line(FILE *stream); extern char *pg_get_line(FILE *stream);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment