Correct overflow handling in pgbench.

This patch attempts, although it's quite possible there are a few holes, to properly detect and reported signed integer overflows in pgbench. Author: Fabien Coelho Reviewed-By: Andres Freund Discussion: https://postgr.es/m/20171212052943.k2hlckfkeft3eiio@alap3.anarazel.de

Correct overflow handling in pgbench.
This patch attempts, although it's quite possible there are a few holes, to properly detect and reported signed integer overflows in pgbench. Author: Fabien Coelho Reviewed-By: Andres Freund Discussion: https://postgr.es/m/20171212052943.k2hlckfkeft3eiio@alap3.anarazel.de
92a0342a · Andres Freund · 78ea8b5d · 92a0342a · 92a0342a · 92a0342a
Commit 92a0342a authored Sep 27, 2018 by Andres Freund
7 changed files
--- a/doc/src/sgml/ref/pgbench.sgml
+++ b/doc/src/sgml/ref/pgbench.sgml
@@ -989,6 +989,13 @@ pgbench <optional> <replaceable>options</replaceable> </optional> <replaceable>d
      are <literal>FALSE</literal>.
     </para>
+     <para>
+      Too large or small integer and double constants, as well as
+      integer arithmetic operators (<literal>+</literal>,
+      <literal>-</literal>, <literal>*</literal> and <literal>/</literal>)
+      raise errors on overflows.
+     </para>
     <para>
      When no final <token>ELSE</token> clause is provided to a
      <token>CASE</token>, the default value is <literal>NULL</literal>.

--- a/src/bin/pgbench/exprparse.y
+++ b/src/bin/pgbench/exprparse.y
@@ -61,7 +61,8 @@ static PgBenchExpr *make_case(yyscan_t yyscanner, PgBenchExprList *when_then_lis
 %type <bval> BOOLEAN_CONST
 %type <str> VARIABLE FUNCTION
-%token NULL_CONST INTEGER_CONST DOUBLE_CONST BOOLEAN_CONST VARIABLE FUNCTION
+%token NULL_CONST INTEGER_CONST MAXINT_PLUS_ONE_CONST DOUBLE_CONST
+%token BOOLEAN_CONST VARIABLE FUNCTION
 %token AND_OP OR_OP NOT_OP NE_OP LE_OP GE_OP LS_OP RS_OP IS_OP
 %token CASE_KW WHEN_KW THEN_KW ELSE_KW END_KW
@@ -90,6 +91,9 @@ expr: '(' expr ')'			{ $$ = $2; }
 	/* unary minus "-x" implemented as "0 - x" */
 	| '-' expr %prec UNARY	{ $$ = make_op(yyscanner, "-",
 										   make_integer_constant(0), $2); }
+	/* special PG_INT64_MIN handling, only after a unary minus */
+	| '-' MAXINT_PLUS_ONE_CONST %prec UNARY
+							{ $$ = make_integer_constant(PG_INT64_MIN); }
 	/* binary ones complement "~x" implemented as 0xffff... xor x" */
 	| '~' expr				{ $$ = make_op(yyscanner, "#",
 										   make_integer_constant(~INT64CONST(0)), $2); }

--- a/src/bin/pgbench/exprscan.l
+++ b/src/bin/pgbench/exprscan.l
@@ -195,16 +195,31 @@ notnull			[Nn][Oo][Tt][Nn][Uu][Ll][Ll]
 					yylval->bval = false;
 					return BOOLEAN_CONST;
 				}
+"9223372036854775808" {
+					/*
+					 * Special handling for PG_INT64_MIN, which can't
+					 * accurately be represented here, as the minus sign is
+					 * lexed separately and INT64_MIN can't be represented as
+					 * a positive integer.
+					 */
+					return MAXINT_PLUS_ONE_CONST;
+				}
 {digit}+		{
-					yylval->ival = strtoint64(yytext);
+					if (!strtoint64(yytext, true, &yylval->ival))
+						expr_yyerror_more(yyscanner, "bigint constant overflow",
+										  strdup(yytext));
 					return INTEGER_CONST;
 				}
 {digit}+(\.{digit}*)?([eE][-+]?{digit}+)?	{
-					yylval->dval = atof(yytext);
+					if (!strtodouble(yytext, true, &yylval->dval))
+						expr_yyerror_more(yyscanner, "double constant overflow",
+										  strdup(yytext));
 					return DOUBLE_CONST;
 				}
 \.{digit}+([eE][-+]?{digit}+)?	{
-					yylval->dval = atof(yytext);
+					if (!strtodouble(yytext, true, &yylval->dval))
+						expr_yyerror_more(yyscanner, "double constant overflow",
+										  strdup(yytext));
 					return DOUBLE_CONST;
 				}
 {alpha}{alnum}*	{

--- a/src/bin/pgbench/pgbench.c
+++ b/src/bin/pgbench/pgbench.c
@@ -32,8 +32,8 @@
 #endif
 #include "postgres_fe.h"
+#include "common/int.h"
 #include "fe_utils/conditional.h"
 #include "getopt_long.h"
 #include "libpq-fe.h"
 #include "portability/instr_time.h"
@@ -662,19 +662,27 @@ is_an_int(const char *str)
 /*
 * strtoint64 -- convert a string to 64-bit integer
 *
- * This function is a modified version of scanint8() from
+ * This function is a slightly modified version of scanint8() from
 * src/backend/utils/adt/int8.c.
+ *
+ * The function returns whether the conversion worked, and if so
+ * "*result" is set to the result.
+ *
+ * If not errorOK, an error message is also printed out on errors.
 */
-int64
+bool
-strtoint64(const char *str)
+strtoint64(const char *str, bool errorOK, int64 *result)
 {
 	const char *ptr = str;
-	int64		result = 0;
+	int64		tmp = 0;
-	int			sign = 1;
+	bool		neg = false;
 	/*
 	 * Do our own scan, rather than relying on sscanf which might be broken
 	 * for long long.
+	 *
+	 * As INT64_MIN can't be stored as a positive 64 bit integer, accumulate
+	 * value as a negative number.
 	 */
 	/* skip leading spaces */
@@ -685,46 +693,80 @@ strtoint64(const char *str)
 	if (*ptr == '-')
 	{
 		ptr++;
+		neg = true;
-		/*
-		 * Do an explicit check for INT64_MIN.  Ugly though this is, it's
-		 * cleaner than trying to get the loop below to handle it portably.
-		 */
-		if (strncmp(ptr, "9223372036854775808", 19) == 0)
-		{
-			result = PG_INT64_MIN;
-			ptr += 19;
-			goto gotdigits;
-		}
-		sign = -1;
 	}
 	else if (*ptr == '+')
 		ptr++;
 	/* require at least one digit */
-	if (!isdigit((unsigned char) *ptr))
+	if (unlikely(!isdigit((unsigned char) *ptr)))
-		fprintf(stderr, "invalid input syntax for integer: \"%s\"\n", str);
+		goto invalid_syntax;
 	/* process digits */
 	while (*ptr && isdigit((unsigned char) *ptr))
 	{
-		int64		tmp = result * 10 + (*ptr++ - '0');
+		int8		digit = (*ptr++ - '0');
-		if ((tmp / 10) != result)	/* overflow? */
+		if (unlikely(pg_mul_s64_overflow(tmp, 10, &tmp)) ||
-			fprintf(stderr, "value \"%s\" is out of range for type bigint\n", str);
+			unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
-		result = tmp;
+			goto out_of_range;
 	}
-gotdigits:
 	/* allow trailing whitespace, but not other trailing chars */
 	while (*ptr != '\0' && isspace((unsigned char) *ptr))
 		ptr++;
-	if (*ptr != '\0')
+	if (unlikely(*ptr != '\0'))
-		fprintf(stderr, "invalid input syntax for integer: \"%s\"\n", str);
+		goto invalid_syntax;
+	if (!neg)
+	{
+		if (unlikely(tmp == PG_INT64_MIN))
+			goto out_of_range;
+		tmp = -tmp;
+	}
-	return ((sign < 0) ? -result : result);
+	*result = tmp;
+	return true;
+out_of_range:
+	if (!errorOK)
+		fprintf(stderr,
+				"value \"%s\" is out of range for type bigint\n", str);
+	return false;
+invalid_syntax:
+	if (!errorOK)
+		fprintf(stderr,
+				"invalid input syntax for type bigint: \"%s\"\n",str);
+	return false;
+}
+/* convert string to double, detecting overflows/underflows */
+bool
+strtodouble(const char *str, bool errorOK, double *dv)
+{
+	char *end;
+	errno = 0;
+	*dv = strtod(str, &end);
+	if (unlikely(errno != 0))
+	{
+		if (!errorOK)
+			fprintf(stderr,
+					"value \"%s\" is out of range for type double\n", str);
+		return false;
+	}
+	if (unlikely(end == str || *end != '\0'))
+	{
+		if (!errorOK)
+			fprintf(stderr,
+					"invalid input syntax for type double: \"%s\"\n",str);
+		return false;
+	}
+	return true;
 }
 /* random number generator: uniform distribution from min to max inclusive */
@@ -1320,14 +1362,19 @@ makeVariableValue(Variable *var)
 	}
 	else if (is_an_int(var->svalue))
 	{
-		setIntValue(&var->value, strtoint64(var->svalue));
+		/* if it looks like an int, it must be an int without overflow */
+		int64 iv;
+		if (!strtoint64(var->svalue, false, &iv))
+			return false;
+		setIntValue(&var->value, iv);
 	}
 	else						/* type should be double */
 	{
 		double		dv;
-		char		xs;
-		if (sscanf(var->svalue, "%lf%c", &dv, &xs) != 1)
+		if (!strtodouble(var->svalue, true, &dv))
 		{
 			fprintf(stderr,
 					"malformed variable \"%s\" value: \"%s\"\n",
@@ -1943,7 +1990,8 @@ evalStandardFunc(TState *thread, CState *st,
 				else			/* we have integer operands, or % */
 				{
 					int64		li,
-								ri;
+								ri,
+								res;
 					if (!coerceToInt(lval, &li) ||
 						!coerceToInt(rval, &ri))
@@ -1952,15 +2000,30 @@ evalStandardFunc(TState *thread, CState *st,
 					switch (func)
 					{
 						case PGBENCH_ADD:
-							setIntValue(retval, li + ri);
+							if (pg_add_s64_overflow(li, ri, &res))
+							{
+								fprintf(stderr, "bigint add out of range\n");
+								return false;
+							}
+							setIntValue(retval, res);
 							return true;
 						case PGBENCH_SUB:
-							setIntValue(retval, li - ri);
+							if (pg_sub_s64_overflow(li, ri, &res))
+							{
+								fprintf(stderr, "bigint sub out of range\n");
+								return false;
+							}
+							setIntValue(retval, res);
 							return true;
 						case PGBENCH_MUL:
-							setIntValue(retval, li * ri);
+							if (pg_mul_s64_overflow(li, ri, &res))
+							{
+								fprintf(stderr, "bigint mul out of range\n");
+								return false;
+							}
+							setIntValue(retval, res);
 							return true;
 						case PGBENCH_EQ:
@@ -1994,7 +2057,7 @@ evalStandardFunc(TState *thread, CState *st,
 									/* overflow check (needed for INT64_MIN) */
 									if (li == PG_INT64_MIN)
 									{
-										fprintf(stderr, "bigint out of range\n");
+										fprintf(stderr, "bigint div out of range\n");
 										return false;
 									}
 									else

--- a/src/bin/pgbench/pgbench.h
+++ b/src/bin/pgbench/pgbench.h
@@ -160,6 +160,7 @@ extern void syntax_error(const char *source, int lineno, const char *line,
 			 const char *cmd, const char *msg,
 			 const char *more, int col) pg_attribute_noreturn();
-extern int64 strtoint64(const char *str);
+extern bool strtoint64(const char *str, bool errorOK, int64 *pi);
+extern bool strtodouble(const char *str, bool errorOK, double *pd);
 #endif							/* PGBENCH_H */
--- a/src/bin/pgbench/t/001_pgbench_with_server.pl
+++ b/src/bin/pgbench/t/001_pgbench_with_server.pl
@@ -255,7 +255,7 @@ COMMIT;
 # test expressions
 # command 1..3 and 23 depend on random seed which is used to call srandom.
 pgbench(
-	'--random-seed=5432 -t 1 -Dfoo=-10.1 -Dbla=false -Di=+3 -Dminint=-9223372036854775808 -Dn=null -Dt=t -Df=of -Dd=1.0',
+	'--random-seed=5432 -t 1 -Dfoo=-10.1 -Dbla=false -Di=+3 -Dn=null -Dt=t -Df=of -Dd=1.0',
 	0,
 	[ qr{type: .*/001_pgbench_expressions}, qr{processed: 1/1} ],
 	[
@@ -278,7 +278,6 @@ pgbench(
 		qr{command=15.: double 15\b},
 		qr{command=16.: double 16\b},
 		qr{command=17.: double 17\b},
-		qr{command=18.: int 9223372036854775807\b},
 		qr{command=20.: int \d\b},    # zipfian random: 1 on linux
 		qr{command=21.: double -27\b},
 		qr{command=22.: double 1024\b},
@@ -322,6 +321,8 @@ pgbench(
 		qr{command=96.: int 1\b},       # :scale
 		qr{command=97.: int 0\b},       # :client_id
 		qr{command=98.: int 5432\b},    # :random_seed
+		qr{command=99.: int -9223372036854775808\b},    # min int
+		qr{command=100.: int 9223372036854775807\b},    # max int
 	],
 	'pgbench expressions',
 	{
@@ -345,10 +346,9 @@ pgbench(
 \set pi debug(pi() * 4.9)
 \set d4 debug(greatest(4, 2, -1.17) * 4.0 * Ln(Exp(1.0)))
 \set d5 debug(least(-5.18, .0E0, 1.0/0) * -3.3)
-- forced overflow
+-- reset variables
-\set maxint debug(:minint - 1)
-- reset a variable
 \set i1 0
+\set d1 false
 -- yet another integer function
 \set id debug(random_zipfian(1, 9, 1.3))
 --- pow and power
@@ -447,6 +447,9 @@ SELECT :v0, :v1, :v2, :v3;
 \set sc debug(:scale)
 \set ci debug(:client_id)
 \set rs debug(:random_seed)
+-- minint constant parsing
+\set min debug(-9223372036854775808)
+\set max debug(-(:min + 1))
 }
 	});
@@ -601,16 +604,10 @@ SELECT LEAST(:i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i);
 		[qr{invalid variable name}], q{\set . 1}
 	],
 	[
-		'set int overflow',                   0,
+		'set division by zero', 0,
-		[qr{double to int overflow for 100}], q{\set i int(1E32)}
+		[qr{division by zero}], q{\set i 1/0}
 	],
-	[ 'set division by zero', 0, [qr{division by zero}], q{\set i 1/0} ],
+	[   'set undefined variable',
-	[
-		'set bigint out of range', 0,
-		[qr{bigint out of range}], q{\set i 9223372036854775808 / -1}
-	],
-	[
-		'set undefined variable',
 		0,
 		[qr{undefined variable "nosuchvariable"}],
 		q{\set i :nosuchvariable}
@@ -630,7 +627,7 @@ SELECT LEAST(:i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i);
 		'set random range too large',
 		0,
 		[qr{random range is too large}],
-		q{\set i random(-9223372036854775808, 9223372036854775807)}
+		q{\set i random(:minint, :maxint)}
 	],
 	[
 		'set gaussian param too small',
@@ -693,6 +690,18 @@ SELECT LEAST(:i, :i, :i, :i, :i, :i, :i, :i, :i, :i, :i);
 		[qr{at least one argument expected}], q{\set i greatest())}
 	],
+	# SET: ARITHMETIC OVERFLOW DETECTION
+	[ 'set double to int overflow',                   0,
+		[ qr{double to int overflow for 100} ], q{\set i int(1E32)} ],
+	[ 'set bigint add overflow', 0,
+		[ qr{int add out} ], q{\set i (1<<62) + (1<<62)} ],
+	[ 'set bigint sub overflow', 0,
+		[ qr{int sub out} ], q{\set i 0 - (1<<62) - (1<<62) - (1<<62)} ],
+	[ 'set bigint mul overflow', 0,
+		[ qr{int mul out} ], q{\set i 2 * (1<<62)} ],
+	[ 'set bigint div out of range', 0,
+		[ qr{bigint div out of range} ], q{\set i :minint / -1} ],
 	# SETSHELL
 	[
 		'setshell not an int',                0,
@@ -740,7 +749,8 @@ for my $e (@errors)
 	my $n = '001_pgbench_error_' . $name;
 	$n =~ s/ /_/g;
 	pgbench(
-		'-n -t 1 -Dfoo=bla -Dnull=null -Dtrue=true -Done=1 -Dzero=0.0 -Dbadtrue=trueXXX -M prepared',
+		'-n -t 1 -M prepared -Dfoo=bla -Dnull=null -Dtrue=true -Done=1 -Dzero=0.0 ' .
+		'-Dbadtrue=trueXXX -Dmaxint=9223372036854775807 -Dminint=-9223372036854775808',
 		$status,
 		[ $status ? qr{^$} : qr{processed: 0/1} ],
 		$re,

--- a/src/bin/pgbench/t/002_pgbench_no_server.pl
+++ b/src/bin/pgbench/t/002_pgbench_no_server.pl
@@ -290,6 +290,22 @@ my @script_tests = (
 		'too many arguments for hash',
 		[qr{unexpected number of arguments \(hash\)}],
 		{ 'bad-hash-2.sql' => "\\set i hash(1,2,3)\n" }
+	],
+	# overflow
+	[
+		'bigint overflow 1',
+		[qr{bigint constant overflow}],
+		{ 'overflow-1.sql' => "\\set i 100000000000000000000\n" }
+	],
+	[
+		'double overflow 2',
+		[qr{double constant overflow}],
+		{ 'overflow-2.sql' => "\\set d 1.0E309\n" }
+	],
+	[
+		'double overflow 3',
+		[qr{double constant overflow}],
+		{ 'overflow-3.sql' => "\\set d .1E310\n" }
 	],);
 for my $t (@script_tests)