Commit 86eaf208 authored by Andres Freund's avatar Andres Freund

Hand code string to integer conversion for performance.

As benchmarks show, using libc's string-to-integer conversion is
pretty slow. At least part of the reason for that is that strtol[l]
have to be more generic than what largely is required inside pg.

This patch considerably speeds up int2/int4 input (int8 already was
already using hand-rolled code).

Most of the existing pg_atoi callers have been converted. But as one
requires pg_atoi's custom delimiter functionality, and as it seems
likely that there's external pg_atoi users, it seems sensible to just
keep pg_atoi around.

Author: Andres Freund
Reviewed-By: Robert Haas
Discussion: https://postgr.es/m/20171208214437.qgn6zdltyq5hmjpk@alap3.anarazel.de
parent 3522d0ea
......@@ -306,7 +306,7 @@ check_foreign_key(PG_FUNCTION_ARGS)
/* internal error */
elog(ERROR, "check_foreign_key: too short %d (< 5) list of arguments", nargs);
nrefs = pg_atoi(args[0], sizeof(int), 0);
nrefs = pg_strtoint32(args[0]);
if (nrefs < 1)
/* internal error */
elog(ERROR, "check_foreign_key: %d (< 1) number of references specified", nrefs);
......
......@@ -709,7 +709,7 @@ BETTER: could not open file %s (I/O failure)
not helpful information. If the error text doesn't make as much sense
without the function name, reword it.
<programlisting>
BAD: pg_atoi: error in "z": cannot parse "z"
BAD: pg_strtoint32: error in "z": cannot parse "z"
BETTER: invalid input syntax for integer: "z"
</programlisting>
</para>
......
......@@ -286,10 +286,10 @@ pq_parse_errornotice(StringInfo msg, ErrorData *edata)
edata->hint = pstrdup(value);
break;
case PG_DIAG_STATEMENT_POSITION:
edata->cursorpos = pg_atoi(value, sizeof(int), '\0');
edata->cursorpos = pg_strtoint32(value);
break;
case PG_DIAG_INTERNAL_POSITION:
edata->internalpos = pg_atoi(value, sizeof(int), '\0');
edata->internalpos = pg_strtoint32(value);
break;
case PG_DIAG_INTERNAL_QUERY:
edata->internalquery = pstrdup(value);
......@@ -316,7 +316,7 @@ pq_parse_errornotice(StringInfo msg, ErrorData *edata)
edata->filename = pstrdup(value);
break;
case PG_DIAG_SOURCE_LINE:
edata->lineno = pg_atoi(value, sizeof(int), '\0');
edata->lineno = pg_strtoint32(value);
break;
case PG_DIAG_SOURCE_FUNCTION:
edata->funcname = pstrdup(value);
......
......@@ -345,7 +345,7 @@ libpqrcv_identify_system(WalReceiverConn *conn, TimeLineID *primary_tli,
ntuples, nfields, 3, 1)));
}
primary_sysid = pstrdup(PQgetvalue(res, 0, 0));
*primary_tli = pg_atoi(PQgetvalue(res, 0, 1), 4, 0);
*primary_tli = pg_strtoint32(PQgetvalue(res, 0, 1));
PQclear(res);
*server_version = PQserverVersion(conn->streamConn);
......@@ -480,7 +480,7 @@ libpqrcv_endstreaming(WalReceiverConn *conn, TimeLineID *next_tli)
if (PQnfields(res) < 2 || PQntuples(res) != 1)
ereport(ERROR,
(errmsg("unexpected result set after end-of-streaming")));
*next_tli = pg_atoi(PQgetvalue(res, 0, 0), sizeof(uint32), 0);
*next_tli = pg_strtoint32(PQgetvalue(res, 0, 0));
PQclear(res);
/* the result set should be followed by CommandComplete */
......
......@@ -2460,13 +2460,13 @@ prsd_headline(PG_FUNCTION_ARGS)
char *val = defGetString(defel);
if (pg_strcasecmp(defel->defname, "MaxWords") == 0)
max_words = pg_atoi(val, sizeof(int32), 0);
max_words = pg_strtoint32(val);
else if (pg_strcasecmp(defel->defname, "MinWords") == 0)
min_words = pg_atoi(val, sizeof(int32), 0);
min_words = pg_strtoint32(val);
else if (pg_strcasecmp(defel->defname, "ShortWord") == 0)
shortword = pg_atoi(val, sizeof(int32), 0);
shortword = pg_strtoint32(val);
else if (pg_strcasecmp(defel->defname, "MaxFragments") == 0)
max_fragments = pg_atoi(val, sizeof(int32), 0);
max_fragments = pg_strtoint32(val);
else if (pg_strcasecmp(defel->defname, "StartSel") == 0)
prs->startsel = pstrdup(val);
else if (pg_strcasecmp(defel->defname, "StopSel") == 0)
......
......@@ -226,8 +226,7 @@ ArrayGetIntegerTypmods(ArrayType *arr, int *n)
result = (int32 *) palloc(*n * sizeof(int32));
for (i = 0; i < *n; i++)
result[i] = pg_atoi(DatumGetCString(elem_values[i]),
sizeof(int32), '\0');
result[i] = pg_strtoint32(DatumGetCString(elem_values[i]));
pfree(elem_values);
......
......@@ -60,7 +60,7 @@ int2in(PG_FUNCTION_ARGS)
{
char *num = PG_GETARG_CSTRING(0);
PG_RETURN_INT16(pg_atoi(num, sizeof(int16), '\0'));
PG_RETURN_INT16(pg_strtoint16(num));
}
/*
......@@ -265,7 +265,7 @@ int4in(PG_FUNCTION_ARGS)
{
char *num = PG_GETARG_CSTRING(0);
PG_RETURN_INT32(pg_atoi(num, sizeof(int32), '\0'));
PG_RETURN_INT32(pg_strtoint32(num));
}
/*
......
......@@ -101,6 +101,7 @@ scanint8(const char *str, bool errorOK, int64 *result)
if (!neg)
{
/* could fail if input is most negative number */
if (unlikely(tmp == PG_INT64_MIN))
goto out_of_range;
tmp = -tmp;
......
......@@ -18,6 +18,7 @@
#include <limits.h>
#include <ctype.h>
#include "common/int.h"
#include "utils/builtins.h"
/*
......@@ -108,6 +109,154 @@ pg_atoi(const char *s, int size, int c)
return (int32) l;
}
/*
* Convert input string to a signed 16 bit integer.
*
* Allows any number of leading or trailing whitespace characters. Will throw
* ereport() upon bad input format or overflow.
*
* NB: Accumulate input as a negative number, to deal with two's complement
* representation of the most negative number, which can't be represented as a
* positive number.
*/
int16
pg_strtoint16(const char *s)
{
const char *ptr = s;
int16 tmp = 0;
bool neg = false;
/* skip leading spaces */
while (likely(*ptr) && isspace((unsigned char) *ptr))
ptr++;
/* handle sign */
if (*ptr == '-')
{
ptr++;
neg = true;
}
else if (*ptr == '+')
ptr++;
/* require at least one digit */
if (unlikely(!isdigit((unsigned char) *ptr)))
goto invalid_syntax;
/* process digits */
while (*ptr && isdigit((unsigned char) *ptr))
{
int8 digit = (*ptr++ - '0');
if (unlikely(pg_mul_s16_overflow(tmp, 10, &tmp)) ||
unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
goto out_of_range;
}
/* allow trailing whitespace, but not other trailing chars */
while (*ptr != '\0' && isspace((unsigned char) *ptr))
ptr++;
if (unlikely(*ptr != '\0'))
goto invalid_syntax;
if (!neg)
{
/* could fail if input is most negative number */
if (unlikely(tmp == PG_INT16_MIN))
goto out_of_range;
tmp = -tmp;
}
return tmp;
out_of_range:
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value \"%s\" is out of range for type %s",
s, "smallint")));
invalid_syntax:
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
"smallint", s)));
}
/*
* Convert input string to a signed 32 bit integer.
*
* Allows any number of leading or trailing whitespace characters. Will throw
* ereport() upon bad input format or overflow.
*
* NB: Accumulate input as a negative number, to deal with two's complement
* representation of the most negative number, which can't be represented as a
* positive number.
*/
int32
pg_strtoint32(const char *s)
{
const char *ptr = s;
int32 tmp = 0;
bool neg = false;
/* skip leading spaces */
while (likely(*ptr) && isspace((unsigned char) *ptr))
ptr++;
/* handle sign */
if (*ptr == '-')
{
ptr++;
neg = true;
}
else if (*ptr == '+')
ptr++;
/* require at least one digit */
if (unlikely(!isdigit((unsigned char) *ptr)))
goto invalid_syntax;
/* process digits */
while (*ptr && isdigit((unsigned char) *ptr))
{
int8 digit = (*ptr++ - '0');
if (unlikely(pg_mul_s32_overflow(tmp, 10, &tmp)) ||
unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
goto out_of_range;
}
/* allow trailing whitespace, but not other trailing chars */
while (*ptr != '\0' && isspace((unsigned char) *ptr))
ptr++;
if (unlikely(*ptr != '\0'))
goto invalid_syntax;
if (!neg)
{
/* could fail if input is most negative number */
if (unlikely(tmp == PG_INT32_MIN))
goto out_of_range;
tmp = -tmp;
}
return tmp;
out_of_range:
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value \"%s\" is out of range for type %s",
s, "integer")));
invalid_syntax:
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type %s: \"%s\"",
"integer", s)));
}
/*
* pg_itoa: converts a signed 16-bit integer to its string representation
*
......
......@@ -5155,8 +5155,8 @@ text_format(PG_FUNCTION_ARGS)
str = OutputFunctionCall(&typoutputinfo_width, value);
/* pg_atoi will complain about bad data or overflow */
width = pg_atoi(str, sizeof(int), '\0');
/* pg_strtoint32 will complain about bad data or overflow */
width = pg_strtoint32(str);
pfree(str);
}
......
......@@ -43,6 +43,8 @@ extern int namestrcmp(Name name, const char *str);
/* numutils.c */
extern int32 pg_atoi(const char *s, int size, int c);
extern int16 pg_strtoint16(const char *s);
extern int32 pg_strtoint32(const char *s);
extern void pg_itoa(int16 i, char *a);
extern void pg_ltoa(int32 l, char *a);
extern void pg_lltoa(int64 ll, char *a);
......
......@@ -6,7 +6,7 @@ INSERT INTO INT2_TBL(f1) VALUES ('0 ');
INSERT INTO INT2_TBL(f1) VALUES (' 1234 ');
INSERT INTO INT2_TBL(f1) VALUES (' -1234');
INSERT INTO INT2_TBL(f1) VALUES ('34.5');
ERROR: invalid input syntax for type integer: "34.5"
ERROR: invalid input syntax for type smallint: "34.5"
LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('34.5');
^
-- largest and smallest values
......@@ -18,27 +18,27 @@ ERROR: value "100000" is out of range for type smallint
LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('100000');
^
INSERT INTO INT2_TBL(f1) VALUES ('asdf');
ERROR: invalid input syntax for type integer: "asdf"
ERROR: invalid input syntax for type smallint: "asdf"
LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('asdf');
^
INSERT INTO INT2_TBL(f1) VALUES (' ');
ERROR: invalid input syntax for type integer: " "
ERROR: invalid input syntax for type smallint: " "
LINE 1: INSERT INTO INT2_TBL(f1) VALUES (' ');
^
INSERT INTO INT2_TBL(f1) VALUES ('- 1234');
ERROR: invalid input syntax for type integer: "- 1234"
ERROR: invalid input syntax for type smallint: "- 1234"
LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('- 1234');
^
INSERT INTO INT2_TBL(f1) VALUES ('4 444');
ERROR: invalid input syntax for type integer: "4 444"
ERROR: invalid input syntax for type smallint: "4 444"
LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('4 444');
^
INSERT INTO INT2_TBL(f1) VALUES ('123 dt');
ERROR: invalid input syntax for type integer: "123 dt"
ERROR: invalid input syntax for type smallint: "123 dt"
LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('123 dt');
^
INSERT INTO INT2_TBL(f1) VALUES ('');
ERROR: invalid input syntax for type integer: ""
ERROR: invalid input syntax for type smallint: ""
LINE 1: INSERT INTO INT2_TBL(f1) VALUES ('');
^
SELECT '' AS five, * FROM INT2_TBL;
......
......@@ -975,7 +975,7 @@ ROLLBACK TO SAVEPOINT settings;
SAVEPOINT settings;
SET LOCAL force_parallel_mode = 1;
select stringu1::int2 from tenk1 where unique1 = 1;
ERROR: invalid input syntax for type integer: "BAAAAA"
ERROR: invalid input syntax for type smallint: "BAAAAA"
CONTEXT: parallel worker
ROLLBACK TO SAVEPOINT settings;
-- test interaction with set-returning functions
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment