Commit 86eaf208 authored by Andres Freund's avatar Andres Freund

Hand code string to integer conversion for performance.

As benchmarks show, using libc's string-to-integer conversion is
pretty slow. At least part of the reason for that is that strtol[l]
have to be more generic than what largely is required inside pg.

This patch considerably speeds up int2/int4 input (int8 already was
already using hand-rolled code).

Most of the existing pg_atoi callers have been converted. But as one
requires pg_atoi's custom delimiter functionality, and as it seems
likely that there's external pg_atoi users, it seems sensible to just
keep pg_atoi around.

Author: Andres Freund
Reviewed-By: Robert Haas
Discussion: https://postgr.es/m/20171208214437.qgn6zdltyq5hmjpk@alap3.anarazel.de
parent 3522d0ea
...@@ -306,7 +306,7 @@ check_foreign_key(PG_FUNCTION_ARGS) ...@@ -306,7 +306,7 @@ check_foreign_key(PG_FUNCTION_ARGS)
/* internal error */ /* internal error */
elog(ERROR, "check_foreign_key: too short %d (< 5) list of arguments", nargs); elog(ERROR, "check_foreign_key: too short %d (< 5) list of arguments", nargs);
nrefs = pg_atoi(args[0], sizeof(int), 0); nrefs = pg_strtoint32(args[0]);
if (nrefs < 1) if (nrefs < 1)
/* internal error */ /* internal error */
elog(ERROR, "check_foreign_key: %d (< 1) number of references specified", nrefs); elog(ERROR, "check_foreign_key: %d (< 1) number of references specified", nrefs);
......
...@@ -709,7 +709,7 @@ BETTER: could not open file %s (I/O failure) ...@@ -709,7 +709,7 @@ BETTER: could not open file %s (I/O failure)
not helpful information. If the error text doesn't make as much sense not helpful information. If the error text doesn't make as much sense
without the function name, reword it. without the function name, reword it.
<programlisting> <programlisting>
BAD: pg_atoi: error in "z": cannot parse "z" BAD: pg_strtoint32: error in "z": cannot parse "z"
BETTER: invalid input syntax for integer: "z" BETTER: invalid input syntax for integer: "z"
</programlisting> </programlisting>
</para> </para>
......
...@@ -286,10 +286,10 @@ pq_parse_errornotice(StringInfo msg, ErrorData *edata) ...@@ -286,10 +286,10 @@ pq_parse_errornotice(StringInfo msg, ErrorData *edata)
edata->hint = pstrdup(value); edata->hint = pstrdup(value);
break; break;
case PG_DIAG_STATEMENT_POSITION: case PG_DIAG_STATEMENT_POSITION:
edata->cursorpos = pg_atoi(value, sizeof(int), '\0'); edata->cursorpos = pg_strtoint32(value);
break; break;
case PG_DIAG_INTERNAL_POSITION: case PG_DIAG_INTERNAL_POSITION:
edata->internalpos = pg_atoi(value, sizeof(int), '\0'); edata->internalpos = pg_strtoint32(value);
break; break;
case PG_DIAG_INTERNAL_QUERY: case PG_DIAG_INTERNAL_QUERY:
edata->internalquery = pstrdup(value); edata->internalquery = pstrdup(value);
...@@ -316,7 +316,7 @@ pq_parse_errornotice(StringInfo msg, ErrorData *edata) ...@@ -316,7 +316,7 @@ pq_parse_errornotice(StringInfo msg, ErrorData *edata)
edata->filename = pstrdup(value); edata->filename = pstrdup(value);
break; break;
case PG_DIAG_SOURCE_LINE: case PG_DIAG_SOURCE_LINE:
edata->lineno = pg_atoi(value, sizeof(int), '\0'); edata->lineno = pg_strtoint32(value);
break; break;
case PG_DIAG_SOURCE_FUNCTION: case PG_DIAG_SOURCE_FUNCTION:
edata->funcname = pstrdup(value); edata->funcname = pstrdup(value);
......
...@@ -345,7 +345,7 @@ libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli, ...@@ -345,7 +345,7 @@ libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli,
ntuples, nfields, 3, 1))); ntuples, nfields, 3, 1)));
} }
primary_sysid = pstrdup(PQgetvalue(res, 0, 0)); primary_sysid = pstrdup(PQgetvalue(res, 0, 0));
*primary_tli = pg_atoi(PQgetvalue(res, 0, 1), 4, 0); *primary_tli = pg_strtoint32(PQgetvalue(res, 0, 1));
PQclear(res); PQclear(res);
*server_version = PQserverVersion(conn->streamConn); *server_version = PQserverVersion(conn->streamConn);
...@@ -480,7 +480,7 @@ libpqrcv_endstreaming(WalReceiverConn *conn, TimeLineID *next_tli) ...@@ -480,7 +480,7 @@ libpqrcv_endstreaming(WalReceiverConn *conn, TimeLineID *next_tli)
if (PQnfields(res) < 2 || PQntuples(res) != 1) if (PQnfields(res) < 2 || PQntuples(res) != 1)
ereport(ERROR, ereport(ERROR,
(errmsg("unexpected result set after end-of-streaming"))); (errmsg("unexpected result set after end-of-streaming")));
*next_tli = pg_atoi(PQgetvalue(res, 0, 0), sizeof(uint32), 0); *next_tli = pg_strtoint32(PQgetvalue(res, 0, 0));
PQclear(res); PQclear(res);
/* the result set should be followed by CommandComplete */ /* the result set should be followed by CommandComplete */
......
...@@ -2460,13 +2460,13 @@ prsd_headline(PG_FUNCTION_ARGS) ...@@ -2460,13 +2460,13 @@ prsd_headline(PG_FUNCTION_ARGS)
char *val = defGetString(defel); char *val = defGetString(defel);
if (pg_strcasecmp(defel->defname, "MaxWords") == 0) if (pg_strcasecmp(defel->defname, "MaxWords") == 0)
max_words = pg_atoi(val, sizeof(int32), 0); max_words = pg_strtoint32(val);
else if (pg_strcasecmp(defel->defname, "MinWords") == 0) else if (pg_strcasecmp(defel->defname, "MinWords") == 0)
min_words = pg_atoi(val, sizeof(int32), 0); min_words = pg_strtoint32(val);
else if (pg_strcasecmp(defel->defname, "ShortWord") == 0) else if (pg_strcasecmp(defel->defname, "ShortWord") == 0)
shortword = pg_atoi(val, sizeof(int32), 0); shortword = pg_strtoint32(val);
else if (pg_strcasecmp(defel->defname, "MaxFragments") == 0) else if (pg_strcasecmp(defel->defname, "MaxFragments") == 0)
max_fragments = pg_atoi(val, sizeof(int32), 0); max_fragments = pg_strtoint32(val);
else if (pg_strcasecmp(defel->defname, "StartSel") == 0) else if (pg_strcasecmp(defel->defname, "StartSel") == 0)
prs->startsel = pstrdup(val); prs->startsel = pstrdup(val);
else if (pg_strcasecmp(defel->defname, "StopSel") == 0) else if (pg_strcasecmp(defel->defname, "StopSel") == 0)
......
...@@ -226,8 +226,7 @@ ArrayGetIntegerTypmods(ArrayType *arr, int *n) ...@@ -226,8 +226,7 @@ ArrayGetIntegerTypmods(ArrayType *arr, int *n)
result = (int32 *) palloc(*n * sizeof(int32)); result = (int32 *) palloc(*n * sizeof(int32));
for (i = 0; i < *n; i++) for (i = 0; i < *n; i++)
result[i] = pg_atoi(DatumGetCString(elem_values[i]), result[i] = pg_strtoint32(DatumGetCString(elem_values[i]));
sizeof(int32), '\0');
pfree(elem_values); pfree(elem_values);
......
...@@ -60,7 +60,7 @@ int2in(PG_FUNCTION_ARGS) ...@@ -60,7 +60,7 @@ int2in(PG_FUNCTION_ARGS)
{ {
char *num = PG_GETARG_CSTRING(0); char *num = PG_GETARG_CSTRING(0);
PG_RETURN_INT16(pg_atoi(num, sizeof(int16), '\0')); PG_RETURN_INT16(pg_strtoint16(num));
} }
/* /*
...@@ -265,7 +265,7 @@ int4in(PG_FUNCTION_ARGS) ...@@ -265,7 +265,7 @@ int4in(PG_FUNCTION_ARGS)
{ {
char *num = PG_GETARG_CSTRING(0); char *num = PG_GETARG_CSTRING(0);
PG_RETURN_INT32(pg_atoi(num, sizeof(int32), '\0')); PG_RETURN_INT32(pg_strtoint32(num));
} }
/* /*
......
...@@ -101,6 +101,7 @@ scanint8(const char *str, bool errorOK, int64 *result) ...@@ -101,6 +101,7 @@ scanint8(const char *str, bool errorOK, int64 *result)
if (!neg) if (!neg)
{ {
/* could fail if input is most negative number */
if (unlikely(tmp == PG_INT64_MIN)) if (unlikely(tmp == PG_INT64_MIN))
goto out_of_range; goto out_of_range;
tmp = -tmp; tmp = -tmp;
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include <limits.h> #include <limits.h>
#include <ctype.h> #include <ctype.h>
#include "common/int.h"
#include "utils/builtins.h" #include "utils/builtins.h"
/* /*
...@@ -108,6 +109,154 @@ pg_atoi(const char *s, int size, int c) ...@@ -108,6 +109,154 @@ pg_atoi(const char *s, int size, int c)
return (int32) l; return (int32) l;
} }
/*
* Convert input string to a signed 16 bit integer.
*
* Allows any number of leading or trailing whitespace characters. Will throw
* ereport() upon bad input format or overflow.
*
* NB: Accumulate input as a negative number, to deal with two's complement
* representation of the most negative number, which can't be represented as a
* positive number.
*/
int16
pg_strtoint16(const char *s)
{
const char *ptr = s;
int16 tmp = 0;
bool neg = false;
/* skip leading spaces */
while (likely(*ptr) && isspace((unsigned char) *ptr))
ptr++;
/* handle sign */
if (*ptr == '-')
{
ptr++;
neg = true;
}
else if (*ptr == '+')
ptr++;
/* require at least one digit */
if (unlikely(!isdigit((unsigned char) *ptr)))
goto invalid_syntax;
/* process digits */
while (*ptr && isdigit((unsigned char) *ptr))
{
int8 digit = (*ptr++ - '0');
if (unlikely(pg_mul_s16_overflow(tmp, 10, &tmp)) ||
unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
goto out_of_range;
}
/* allow trailing whitespace, but not other trailing chars */
while (*ptr != '\0' && isspace((unsigned char) *ptr))
ptr++;
if (unlikely(*ptr != '\0'))
goto invalid_syntax;
if (!neg)
{
/* could fail if input is most negative number */
if (unlikely(tmp == PG_INT16_MIN))
goto out_of_range;
tmp = -tmp;
}
return tmp;
out_of_range:
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value \"%s\" is out of range for type %s",
s, "smallint")));
invalid_syntax:
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
"smallint", s)));
}
/*
* Convert input string to a signed 32 bit integer.
*
* Allows any number of leading or trailing whitespace characters. Will throw
* ereport() upon bad input format or overflow.
*
* NB: Accumulate input as a negative number, to deal with two's complement
* representation of the most negative number, which can't be represented as a
* positive number.
*/
int32
pg_strtoint32(const char *s)
{
const char *ptr = s;
int32 tmp = 0;
bool neg = false;
/* skip leading spaces */
while (likely(*ptr) && isspace((unsigned char) *ptr))
ptr++;
/* handle sign */
if (*ptr == '-')
{
ptr++;
neg = true;
}
else if (*ptr == '+')
ptr++;
/* require at least one digit */
if (unlikely(!isdigit((unsigned char) *ptr)))
goto invalid_syntax;
/* process digits */
while (*ptr && isdigit((unsigned char) *ptr))
{
int8 digit = (*ptr++ - '0');
if (unlikely(pg_mul_s32_overflow(tmp, 10, &tmp)) ||
unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
goto out_of_range;
}
/* allow trailing whitespace, but not other trailing chars */
while (*ptr != '\0' && isspace((unsigned char) *ptr))
ptr++;
if (unlikely(*ptr != '\0'))
goto invalid_syntax;
if (!neg)
{
/* could fail if input is most negative number */
if (unlikely(tmp == PG_INT32_MIN))
goto out_of_range;
tmp = -tmp;
}
return tmp;
out_of_range:
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value \"%s\" is out of range for type %s",
s, "integer")));
invalid_syntax:
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
"integer", s)));
}
/* /*
* pg_itoa: converts a signed 16-bit integer to its string representation * pg_itoa: converts a signed 16-bit integer to its string representation
* *
......
...@@ -5155,8 +5155,8 @@ text_format(PG_FUNCTION_ARGS) ...@@ -5155,8 +5155,8 @@ text_format(PG_FUNCTION_ARGS)
str = OutputFunctionCall(&typoutputinfo_width, value); str = OutputFunctionCall(&typoutputinfo_width, value);
/* pg_atoi will complain about bad data or overflow */ /* pg_strtoint32 will complain about bad data or overflow */
width = pg_atoi(str, sizeof(int), '\0'); width = pg_strtoint32(str);
pfree(str); pfree(str);
} }
......
...@@ -43,6 +43,8 @@ extern int namestrcmp(Name name, const char *str); ...@@ -43,6 +43,8 @@ extern int namestrcmp(Name name, const char *str);
/* numutils.c */ /* numutils.c */
extern int32 pg_atoi(const char *s, int size, int c); extern int32 pg_atoi(const char *s, int size, int c);
extern int16 pg_strtoint16(const char *s);
extern int32 pg_strtoint32(const char *s);
extern void pg_itoa(int16 i, char *a); extern void pg_itoa(int16 i, char *a);
extern void pg_ltoa(int32 l, char *a); extern void pg_ltoa(int32 l, char *a);
extern void pg_lltoa(int64 ll, char *a); extern void pg_lltoa(int64 ll, char *a);
......
...@@ -6,7 +6,7 @@ INSERT INTO INT2_TBL(f1) VALUES ('0 '); ...@@ -6,7 +6,7 @@ INSERT INTO INT2_TBL(f1) VALUES ('0 ');
INSERT INTO INT2_TBL(f1) VALUES (' 1234 '); INSERT INTO INT2_TBL(f1) VALUES (' 1234 ');
INSERT INTO INT2_TBL(f1) VALUES (' -1234'); INSERT INTO INT2_TBL(f1) VALUES (' -1234');
INSERT INTO INT2_TBL(f1) VALUES ('34.5'); INSERT INTO INT2_TBL(f1) VALUES ('34.5');
ERROR: invalid input syntax for type integer: "34.5" ERROR: invalid input syntax for type smallint: "34.5"
LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('34.5'); LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('34.5');
^ ^
-- largest and smallest values -- largest and smallest values
...@@ -18,27 +18,27 @@ ERROR: value "100000" is out of range for type smallint ...@@ -18,27 +18,27 @@ ERROR: value "100000" is out of range for type smallint
LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('100000'); LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('100000');
^ ^
INSERT INTO INT2_TBL(f1) VALUES ('asdf'); INSERT INTO INT2_TBL(f1) VALUES ('asdf');
ERROR: invalid input syntax for type integer: "asdf" ERROR: invalid input syntax for type smallint: "asdf"
LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('asdf'); LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('asdf');
^ ^
INSERT INTO INT2_TBL(f1) VALUES (' '); INSERT INTO INT2_TBL(f1) VALUES (' ');
ERROR: invalid input syntax for type integer: " " ERROR: invalid input syntax for type smallint: " "
LINE 1: INSERT INTO INT2_TBL(f1) VALUES (' '); LINE 1: INSERT INTO INT2_TBL(f1) VALUES (' ');
^ ^
INSERT INTO INT2_TBL(f1) VALUES ('- 1234'); INSERT INTO INT2_TBL(f1) VALUES ('- 1234');
ERROR: invalid input syntax for type integer: "- 1234" ERROR: invalid input syntax for type smallint: "- 1234"
LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('- 1234'); LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('- 1234');
^ ^
INSERT INTO INT2_TBL(f1) VALUES ('4 444'); INSERT INTO INT2_TBL(f1) VALUES ('4 444');
ERROR: invalid input syntax for type integer: "4 444" ERROR: invalid input syntax for type smallint: "4 444"
LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('4 444'); LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('4 444');
^ ^
INSERT INTO INT2_TBL(f1) VALUES ('123 dt'); INSERT INTO INT2_TBL(f1) VALUES ('123 dt');
ERROR: invalid input syntax for type integer: "123 dt" ERROR: invalid input syntax for type smallint: "123 dt"
LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('123 dt'); LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('123 dt');
^ ^
INSERT INTO INT2_TBL(f1) VALUES (''); INSERT INTO INT2_TBL(f1) VALUES ('');
ERROR: invalid input syntax for type integer: "" ERROR: invalid input syntax for type smallint: ""
LINE 1: INSERT INTO INT2_TBL(f1) VALUES (''); LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('');
^ ^
SELECT '' AS five, * FROM INT2_TBL; SELECT '' AS five, * FROM INT2_TBL;
......
...@@ -975,7 +975,7 @@ ROLLBACK TO SAVEPOINT settings; ...@@ -975,7 +975,7 @@ ROLLBACK TO SAVEPOINT settings;
SAVEPOINT settings; SAVEPOINT settings;
SET LOCAL force_parallel_mode = 1; SET LOCAL force_parallel_mode = 1;
select stringu1::int2 from tenk1 where unique1 = 1; select stringu1::int2 from tenk1 where unique1 = 1;
ERROR: invalid input syntax for type integer: "BAAAAA" ERROR: invalid input syntax for type smallint: "BAAAAA"
CONTEXT: parallel worker CONTEXT: parallel worker
ROLLBACK TO SAVEPOINT settings; ROLLBACK TO SAVEPOINT settings;
-- test interaction with set-returning functions -- test interaction with set-returning functions
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment