Commit b33265e9 authored by Tom Lane's avatar Tom Lane

Adjust hash table sizing algorithm to avoid integer overflow in

ExecHashJoinGetBatch().  Fixes core dump on large hash joins, as in
example from Rae Stiening.
parent c9d87120
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.71 2002/12/15 16:17:46 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.72 2002/12/29 22:28:50 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
*/ */
#include "postgres.h" #include "postgres.h"
#include <limits.h>
#include <math.h> #include <math.h>
#include "access/hash.h" #include "access/hash.h"
...@@ -344,7 +345,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, ...@@ -344,7 +345,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
{ {
int tupsize; int tupsize;
double inner_rel_bytes; double inner_rel_bytes;
double hash_table_bytes; long hash_table_bytes;
double dtmp;
int nbatch; int nbatch;
int nbuckets; int nbuckets;
int totalbuckets; int totalbuckets;
...@@ -362,20 +364,22 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, ...@@ -362,20 +364,22 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
inner_rel_bytes = ntuples * tupsize * FUDGE_FAC; inner_rel_bytes = ntuples * tupsize * FUDGE_FAC;
/* /*
* Target hashtable size is SortMem kilobytes, but not less than * Target in-memory hashtable size is SortMem kilobytes.
* sqrt(estimated inner rel size), so as to avoid horrible
* performance.
*/ */
hash_table_bytes = sqrt(inner_rel_bytes); hash_table_bytes = SortMem * 1024L;
if (hash_table_bytes < (SortMem * 1024L))
hash_table_bytes = SortMem * 1024L;
/* /*
* Count the number of hash buckets we want for the whole relation, * Count the number of hash buckets we want for the whole relation,
* for an average bucket load of NTUP_PER_BUCKET (per virtual * for an average bucket load of NTUP_PER_BUCKET (per virtual
* bucket!). * bucket!). It has to fit in an int, however.
*/ */
totalbuckets = (int) ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET); dtmp = ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET);
if (dtmp < INT_MAX)
totalbuckets = (int) dtmp;
else
totalbuckets = INT_MAX;
if (totalbuckets <= 0)
totalbuckets = 1;
/* /*
* Count the number of buckets we think will actually fit in the * Count the number of buckets we think will actually fit in the
...@@ -409,10 +413,16 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, ...@@ -409,10 +413,16 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
* that nbatch doesn't have to have anything to do with the ratio * that nbatch doesn't have to have anything to do with the ratio
* totalbuckets/nbuckets; in fact, it is the number of groups we * totalbuckets/nbuckets; in fact, it is the number of groups we
* will use for the part of the data that doesn't fall into the * will use for the part of the data that doesn't fall into the
* first nbuckets hash buckets. * first nbuckets hash buckets. We try to set it to make all the
* batches the same size. But we have to keep nbatch small
* enough to avoid integer overflow in ExecHashJoinGetBatch().
*/ */
nbatch = (int) ceil((inner_rel_bytes - hash_table_bytes) / dtmp = ceil((inner_rel_bytes - hash_table_bytes) /
hash_table_bytes); hash_table_bytes);
if (dtmp < INT_MAX / totalbuckets)
nbatch = (int) dtmp;
else
nbatch = INT_MAX / totalbuckets;
if (nbatch <= 0) if (nbatch <= 0)
nbatch = 1; nbatch = 1;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment