Commit bd2c980b authored by Tom Lane's avatar Tom Lane

Buy back some of the cycles spent in more-expensive hash functions by

selecting power-of-2, rather than prime, numbers of buckets in hash joins.
If the hash functions are doing their jobs properly by making all hash bits
equally random, this is good enough, and it saves expensive integer division
and modulus operations.
parent 1f559b7d
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.111 2007/02/22 22:49:27 tgl Exp $ * $PostgreSQL: pgsql/src/backend/executor/nodeHash.c,v 1.112 2007/06/01 17:38:44 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -31,6 +31,7 @@ ...@@ -31,6 +31,7 @@
#include "executor/nodeHashjoin.h" #include "executor/nodeHashjoin.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "parser/parse_expr.h" #include "parser/parse_expr.h"
#include "utils/dynahash.h"
#include "utils/memutils.h" #include "utils/memutils.h"
#include "utils/lsyscache.h" #include "utils/lsyscache.h"
...@@ -223,6 +224,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators) ...@@ -223,6 +224,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
Plan *outerNode; Plan *outerNode;
int nbuckets; int nbuckets;
int nbatch; int nbatch;
int log2_nbuckets;
int nkeys; int nkeys;
int i; int i;
ListCell *ho; ListCell *ho;
...@@ -242,6 +244,10 @@ ExecHashTableCreate(Hash *node, List *hashOperators) ...@@ -242,6 +244,10 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
printf("nbatch = %d, nbuckets = %d\n", nbatch, nbuckets); printf("nbatch = %d, nbuckets = %d\n", nbatch, nbuckets);
#endif #endif
/* nbuckets must be a power of 2 */
log2_nbuckets = my_log2(nbuckets);
Assert(nbuckets == (1 << log2_nbuckets));
/* /*
* Initialize the hash table control block. * Initialize the hash table control block.
* *
...@@ -250,6 +256,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators) ...@@ -250,6 +256,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
*/ */
hashtable = (HashJoinTable) palloc(sizeof(HashJoinTableData)); hashtable = (HashJoinTable) palloc(sizeof(HashJoinTableData));
hashtable->nbuckets = nbuckets; hashtable->nbuckets = nbuckets;
hashtable->log2_nbuckets = log2_nbuckets;
hashtable->buckets = NULL; hashtable->buckets = NULL;
hashtable->nbatch = nbatch; hashtable->nbatch = nbatch;
hashtable->curbatch = 0; hashtable->curbatch = 0;
...@@ -345,13 +352,6 @@ ExecHashTableCreate(Hash *node, List *hashOperators) ...@@ -345,13 +352,6 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
/* Target bucket loading (tuples per bucket) */ /* Target bucket loading (tuples per bucket) */
#define NTUP_PER_BUCKET 10 #define NTUP_PER_BUCKET 10
/* Prime numbers that we like to use as nbuckets values */
static const int hprimes[] = {
1033, 2063, 4111, 8219, 16417, 32779, 65539, 131111,
262151, 524341, 1048589, 2097211, 4194329, 8388619, 16777289, 33554473,
67108913, 134217773, 268435463, 536870951, 1073741831
};
void void
ExecChooseHashTableSize(double ntuples, int tupwidth, ExecChooseHashTableSize(double ntuples, int tupwidth,
int *numbuckets, int *numbuckets,
...@@ -396,7 +396,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, ...@@ -396,7 +396,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
int minbatch; int minbatch;
lbuckets = (hash_table_bytes / tupsize) / NTUP_PER_BUCKET; lbuckets = (hash_table_bytes / tupsize) / NTUP_PER_BUCKET;
lbuckets = Min(lbuckets, INT_MAX); lbuckets = Min(lbuckets, INT_MAX / 2);
nbuckets = (int) lbuckets; nbuckets = (int) lbuckets;
dbatch = ceil(inner_rel_bytes / hash_table_bytes); dbatch = ceil(inner_rel_bytes / hash_table_bytes);
...@@ -412,27 +412,22 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, ...@@ -412,27 +412,22 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
double dbuckets; double dbuckets;
dbuckets = ceil(ntuples / NTUP_PER_BUCKET); dbuckets = ceil(ntuples / NTUP_PER_BUCKET);
dbuckets = Min(dbuckets, INT_MAX); dbuckets = Min(dbuckets, INT_MAX / 2);
nbuckets = (int) dbuckets; nbuckets = (int) dbuckets;
nbatch = 1; nbatch = 1;
} }
/* /*
* We want nbuckets to be prime so as to avoid having bucket and batch * Both nbuckets and nbatch must be powers of 2 to make
* numbers depend on only some bits of the hash code. Choose the next * ExecHashGetBucketAndBatch fast. We already fixed nbatch; now inflate
* larger prime from the list in hprimes[]. (This also enforces that * nbuckets to the next larger power of 2. We also force nbuckets to not
* nbuckets is not very small, by the simple expedient of not putting any * be real small, by starting the search at 2^10.
* very small entries in hprimes[].)
*/ */
for (i = 0; i < (int) lengthof(hprimes); i++) i = 10;
{ while ((1 << i) < nbuckets)
if (hprimes[i] >= nbuckets) i++;
{ nbuckets = (1 << i);
nbuckets = hprimes[i];
break;
}
}
*numbuckets = nbuckets; *numbuckets = nbuckets;
*numbatches = nbatch; *numbatches = nbatch;
...@@ -765,8 +760,11 @@ ExecHashGetHashValue(HashJoinTable hashtable, ...@@ -765,8 +760,11 @@ ExecHashGetHashValue(HashJoinTable hashtable,
* increase. Our algorithm is * increase. Our algorithm is
* bucketno = hashvalue MOD nbuckets * bucketno = hashvalue MOD nbuckets
* batchno = (hashvalue DIV nbuckets) MOD nbatch * batchno = (hashvalue DIV nbuckets) MOD nbatch
* where nbuckets should preferably be prime so that all bits of the * where nbuckets and nbatch are both expected to be powers of 2, so we can
* hash value can affect both bucketno and batchno. * do the computations by shifting and masking. (This assumes that all hash
* functions are good about randomizing all their output bits, else we are
* likely to have very skewed bucket or batch occupancy.)
*
* nbuckets doesn't change over the course of the join. * nbuckets doesn't change over the course of the join.
* *
* nbatch is always a power of 2; we increase it only by doubling it. This * nbatch is always a power of 2; we increase it only by doubling it. This
...@@ -783,13 +781,13 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable, ...@@ -783,13 +781,13 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable,
if (nbatch > 1) if (nbatch > 1)
{ {
*bucketno = hashvalue % nbuckets; /* we can do MOD by masking, DIV by shifting */
/* since nbatch is a power of 2, can do MOD by masking */ *bucketno = hashvalue & (nbuckets - 1);
*batchno = (hashvalue / nbuckets) & (nbatch - 1); *batchno = (hashvalue >> hashtable->log2_nbuckets) & (nbatch - 1);
} }
else else
{ {
*bucketno = hashvalue % nbuckets; *bucketno = hashvalue & (nbuckets - 1);
*batchno = 0; *batchno = 0;
} }
} }
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.44 2007/01/30 01:33:36 tgl Exp $ * $PostgreSQL: pgsql/src/include/executor/hashjoin.h,v 1.45 2007/06/01 17:38:44 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -76,6 +76,8 @@ typedef struct HashJoinTupleData ...@@ -76,6 +76,8 @@ typedef struct HashJoinTupleData
typedef struct HashJoinTableData typedef struct HashJoinTableData
{ {
int nbuckets; /* # buckets in the in-memory hash table */ int nbuckets; /* # buckets in the in-memory hash table */
int log2_nbuckets; /* its log2 (nbuckets must be a power of 2) */
/* buckets[i] is head of list of tuples in i'th in-memory bucket */ /* buckets[i] is head of list of tuples in i'th in-memory bucket */
struct HashJoinTupleData **buckets; struct HashJoinTupleData **buckets;
/* buckets array is per-batch storage, as are all the tuples */ /* buckets array is per-batch storage, as are all the tuples */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment