Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
P
Postgres FD Implementation
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Abuhujair Javed
Postgres FD Implementation
Commits
01a819ab
Commit
01a819ab
authored
Jun 11, 2001
by
Tom Lane
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Make planner compute the number of hash buckets the same way that
nodeHash.c will compute it (by sharing code).
parent
ccda1a67
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
161 additions
and
127 deletions
+161
-127
src/backend/executor/nodeHash.c
src/backend/executor/nodeHash.c
+119
-94
src/backend/optimizer/path/costsize.c
src/backend/optimizer/path/costsize.c
+37
-29
src/include/executor/nodeHash.h
src/include/executor/nodeHash.h
+5
-4
No files found.
src/backend/executor/nodeHash.c
View file @
01a819ab
...
...
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* $Id: nodeHash.c,v 1.5
7 2001/05/27 20:42:18
tgl Exp $
* $Id: nodeHash.c,v 1.5
8 2001/06/11 00:17:07
tgl Exp $
*
*-------------------------------------------------------------------------
*/
...
...
@@ -16,14 +16,12 @@
* ExecHash - generate an in-memory hash table of the relation
* ExecInitHash - initialize node and subnodes
* ExecEndHash - shutdown node and subnodes
*
*/
#include "postgres.h"
#include <sys/types.h>
#include <math.h>
#include "postgres.h"
#include "executor/execdebug.h"
#include "executor/nodeHash.h"
#include "executor/nodeHashjoin.h"
...
...
@@ -209,111 +207,27 @@ ExecEndHash(Hash *node)
* create a hashtable in shared memory for hashjoin.
* ----------------------------------------------------------------
*/
#define FUDGE_FAC 2.0
HashJoinTable
ExecHashTableCreate
(
Hash
*
node
)
{
Plan
*
outerNode
;
double
ntuples
;
int
tupsize
;
double
inner_rel_bytes
;
double
hash_table_bytes
;
int
nbatch
;
HashJoinTable
hashtable
;
int
nbuckets
;
Plan
*
outerNode
;
int
totalbuckets
;
int
bucketsize
;
int
nbuckets
;
int
nbatch
;
int
i
;
MemoryContext
oldcxt
;
/*
* Get information about the size of the relation to be hashed (it's
* the "outer" subtree of this node, but the inner relation of the
* hashjoin).
*
* Caution: this is only the planner's estimates, and so can't be trusted
* too far. Apply a healthy fudge factor.
* hashjoin). Compute the appropriate size of the hash table.
*/
outerNode
=
outerPlan
(
node
);
ntuples
=
outerNode
->
plan_rows
;
if
(
ntuples
<=
0
.
0
)
/* force a plausible size if no info */
ntuples
=
1000
.
0
;
/*
* estimate tupsize based on footprint of tuple in hashtable... but
* what about palloc overhead?
*/
tupsize
=
MAXALIGN
(
outerNode
->
plan_width
)
+
MAXALIGN
(
sizeof
(
HashJoinTupleData
));
inner_rel_bytes
=
ntuples
*
tupsize
*
FUDGE_FAC
;
/*
* Target hashtable size is SortMem kilobytes, but not less than
* sqrt(estimated inner rel size), so as to avoid horrible
* performance.
*/
hash_table_bytes
=
sqrt
(
inner_rel_bytes
);
if
(
hash_table_bytes
<
(
SortMem
*
1024L
))
hash_table_bytes
=
SortMem
*
1024L
;
/*
* Count the number of hash buckets we want for the whole relation,
* for an average bucket load of NTUP_PER_BUCKET (per virtual
* bucket!).
*/
totalbuckets
=
(
int
)
ceil
(
ntuples
*
FUDGE_FAC
/
NTUP_PER_BUCKET
);
/*
* Count the number of buckets we think will actually fit in the
* target memory size, at a loading of NTUP_PER_BUCKET (physical
* buckets). NOTE: FUDGE_FAC here determines the fraction of the
* hashtable space reserved to allow for nonuniform distribution of
* hash values. Perhaps this should be a different number from the
* other uses of FUDGE_FAC, but since we have no real good way to pick
* either one...
*/
bucketsize
=
NTUP_PER_BUCKET
*
tupsize
;
nbuckets
=
(
int
)
(
hash_table_bytes
/
(
bucketsize
*
FUDGE_FAC
));
if
(
nbuckets
<=
0
)
nbuckets
=
1
;
if
(
totalbuckets
<=
nbuckets
)
{
ExecChooseHashTableSize
(
outerNode
->
plan_rows
,
outerNode
->
plan_width
,
&
totalbuckets
,
&
nbuckets
,
&
nbatch
);
/*
* We have enough space, so no batching. In theory we could even
* reduce nbuckets, but since that could lead to poor behavior if
* estimated ntuples is much less than reality, it seems better to
* make more buckets instead of fewer.
*/
totalbuckets
=
nbuckets
;
nbatch
=
0
;
}
else
{
/*
* Need to batch; compute how many batches we want to use. Note
* that nbatch doesn't have to have anything to do with the ratio
* totalbuckets/nbuckets; in fact, it is the number of groups we
* will use for the part of the data that doesn't fall into the
* first nbuckets hash buckets.
*/
nbatch
=
(
int
)
ceil
((
inner_rel_bytes
-
hash_table_bytes
)
/
hash_table_bytes
);
if
(
nbatch
<=
0
)
nbatch
=
1
;
}
/*
* Now, totalbuckets is the number of (virtual) hashbuckets for the
* whole relation, and nbuckets is the number of physical hashbuckets
* we will use in the first pass. Data falling into the first
* nbuckets virtual hashbuckets gets handled in the first pass;
* everything else gets divided into nbatch batches to be processed in
* additional passes.
*/
#ifdef HJDEBUG
printf
(
"nbatch = %d, totalbuckets = %d, nbuckets = %d
\n
"
,
nbatch
,
totalbuckets
,
nbuckets
);
...
...
@@ -407,6 +321,117 @@ ExecHashTableCreate(Hash *node)
return
hashtable
;
}
/*
* Compute appropriate size for hashtable given the estimated size of the
* relation to be hashed (number of rows and average row width).
*
* Caution: the input is only the planner's estimates, and so can't be
* trusted too far. Apply a healthy fudge factor.
*
* This is exported so that the planner's costsize.c can use it.
*/
/* Target bucket loading (tuples per bucket) */
#define NTUP_PER_BUCKET 10
/* Fudge factor to allow for inaccuracy of input estimates */
#define FUDGE_FAC 2.0
void
ExecChooseHashTableSize
(
double
ntuples
,
int
tupwidth
,
int
*
virtualbuckets
,
int
*
physicalbuckets
,
int
*
numbatches
)
{
int
tupsize
;
double
inner_rel_bytes
;
double
hash_table_bytes
;
int
nbatch
;
int
nbuckets
;
int
totalbuckets
;
int
bucketsize
;
/* Force a plausible relation size if no info */
if
(
ntuples
<=
0
.
0
)
ntuples
=
1000
.
0
;
/*
* Estimate tupsize based on footprint of tuple in hashtable... but
* what about palloc overhead?
*/
tupsize
=
MAXALIGN
(
tupwidth
)
+
MAXALIGN
(
sizeof
(
HashJoinTupleData
));
inner_rel_bytes
=
ntuples
*
tupsize
*
FUDGE_FAC
;
/*
* Target hashtable size is SortMem kilobytes, but not less than
* sqrt(estimated inner rel size), so as to avoid horrible
* performance.
*/
hash_table_bytes
=
sqrt
(
inner_rel_bytes
);
if
(
hash_table_bytes
<
(
SortMem
*
1024L
))
hash_table_bytes
=
SortMem
*
1024L
;
/*
* Count the number of hash buckets we want for the whole relation,
* for an average bucket load of NTUP_PER_BUCKET (per virtual
* bucket!).
*/
totalbuckets
=
(
int
)
ceil
(
ntuples
*
FUDGE_FAC
/
NTUP_PER_BUCKET
);
/*
* Count the number of buckets we think will actually fit in the
* target memory size, at a loading of NTUP_PER_BUCKET (physical
* buckets). NOTE: FUDGE_FAC here determines the fraction of the
* hashtable space reserved to allow for nonuniform distribution of
* hash values. Perhaps this should be a different number from the
* other uses of FUDGE_FAC, but since we have no real good way to pick
* either one...
*/
bucketsize
=
NTUP_PER_BUCKET
*
tupsize
;
nbuckets
=
(
int
)
(
hash_table_bytes
/
(
bucketsize
*
FUDGE_FAC
));
if
(
nbuckets
<=
0
)
nbuckets
=
1
;
if
(
totalbuckets
<=
nbuckets
)
{
/*
* We have enough space, so no batching. In theory we could even
* reduce nbuckets, but since that could lead to poor behavior if
* estimated ntuples is much less than reality, it seems better to
* make more buckets instead of fewer.
*/
totalbuckets
=
nbuckets
;
nbatch
=
0
;
}
else
{
/*
* Need to batch; compute how many batches we want to use. Note
* that nbatch doesn't have to have anything to do with the ratio
* totalbuckets/nbuckets; in fact, it is the number of groups we
* will use for the part of the data that doesn't fall into the
* first nbuckets hash buckets.
*/
nbatch
=
(
int
)
ceil
((
inner_rel_bytes
-
hash_table_bytes
)
/
hash_table_bytes
);
if
(
nbatch
<=
0
)
nbatch
=
1
;
}
/*
* Now, totalbuckets is the number of (virtual) hashbuckets for the
* whole relation, and nbuckets is the number of physical hashbuckets
* we will use in the first pass. Data falling into the first
* nbuckets virtual hashbuckets gets handled in the first pass;
* everything else gets divided into nbatch batches to be processed in
* additional passes.
*/
*
virtualbuckets
=
totalbuckets
;
*
physicalbuckets
=
nbuckets
;
*
numbatches
=
nbatch
;
}
/* ----------------------------------------------------------------
* ExecHashTableDestroy
*
...
...
src/backend/optimizer/path/costsize.c
View file @
01a819ab
...
...
@@ -42,7 +42,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.7
6 2001/06/10 02:59:35
tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.7
7 2001/06/11 00:17:08
tgl Exp $
*
*-------------------------------------------------------------------------
*/
...
...
@@ -791,19 +791,19 @@ cost_hashjoin(Path *path, Query *root,
* smart enough to figure out how the restrict clauses might change the
* distribution, so this will have to do for now.
*
*
The executor tries for average bucket loading of NTUP_PER_BUCKET by setting
*
number of buckets equal to ntuples / NTUP_PER_BUCKET, which would yield
*
a bucketsize fraction of NTUP_PER_BUCKET / ntuples. But that goal will
*
be reached only if the data values are uniformly distributed among the
*
buckets, which requires (a) at least ntuples / NTUP_PER_BUCKET distinct
*
data values, and (b) a not-too-skewed data distribution. Otherwise the
* b
uckets will be nonuniformly occupied. If the other relation in the join
*
has a similar distribution, the most-loaded buckets are exactly thos
e
*
that will be probed most often. Therefore, the "average" bucket size for
*
costing purposes should really be taken as something close to the "worst
*
case" bucket size. We try to estimate this by first scaling up if ther
e
*
are too few distinct data values, and then scaling up again by the
* ratio of the most common value's frequency to the average frequency.
*
We can get the number of buckets the executor will use for the given
*
input relation. If the data were perfectly distributed, with the same
*
number of tuples going into each available bucket, then the bucketsize
*
fraction would be 1/nbuckets. But this happy state of affairs will occur
*
only if (a) there are at least nbuckets distinct data values, and (b)
*
we have a not-too-skewed data distribution. Otherwise the buckets will
* b
e nonuniformly occupied. If the other relation in the join has a key
*
distribution similar to this one's, then the most-loaded buckets ar
e
*
exactly those that will be probed most often. Therefore, the "average"
*
bucket size for costing purposes should really be taken as something close
*
to the "worst case" bucket size. We try to estimate this by adjusting th
e
*
fraction if there are too few distinct data values, and then scaling up
*
by the
ratio of the most common value's frequency to the average frequency.
*
* If no statistics are available, use a default estimate of 0.1. This will
* discourage use of a hash rather strongly if the inner relation is large,
...
...
@@ -815,11 +815,13 @@ estimate_hash_bucketsize(Query *root, Var *var)
{
Oid
relid
;
RelOptInfo
*
rel
;
int
virtualbuckets
;
int
physicalbuckets
;
int
numbatches
;
HeapTuple
tuple
;
Form_pg_statistic
stats
;
double
estfract
,
ndistinct
,
needdistinct
,
mcvfreq
,
avgfreq
;
float4
*
numbers
;
...
...
@@ -841,6 +843,12 @@ estimate_hash_bucketsize(Query *root, Var *var)
if
(
rel
->
tuples
<=
0
.
0
||
rel
->
rows
<=
0
.
0
)
return
0
.
1
;
/* ensure we can divide below */
/* Get hash table size that executor would use for this relation */
ExecChooseHashTableSize
(
rel
->
rows
,
rel
->
width
,
&
virtualbuckets
,
&
physicalbuckets
,
&
numbatches
);
tuple
=
SearchSysCache
(
STATRELATT
,
ObjectIdGetDatum
(
relid
),
Int16GetDatum
(
var
->
varattno
),
...
...
@@ -857,7 +865,7 @@ estimate_hash_bucketsize(Query *root, Var *var)
case
ObjectIdAttributeNumber
:
case
SelfItemPointerAttributeNumber
:
/* these are unique, so buckets should be well-distributed */
return
(
double
)
NTUP_PER_BUCKET
/
rel
->
row
s
;
return
1
.
0
/
(
double
)
virtualbucket
s
;
case
TableOidAttributeNumber
:
/* hashing this is a terrible idea... */
return
1
.
0
;
...
...
@@ -873,6 +881,12 @@ estimate_hash_bucketsize(Query *root, Var *var)
if
(
ndistinct
<
0
.
0
)
ndistinct
=
-
ndistinct
*
rel
->
tuples
;
if
(
ndistinct
<=
0
.
0
)
/* ensure we can divide */
{
ReleaseSysCache
(
tuple
);
return
0
.
1
;
}
/* Also compute avg freq of all distinct data values in raw relation */
avgfreq
=
(
1
.
0
-
stats
->
stanullfrac
)
/
ndistinct
;
...
...
@@ -887,20 +901,14 @@ estimate_hash_bucketsize(Query *root, Var *var)
ndistinct
*=
rel
->
rows
/
rel
->
tuples
;
/*
* Form initial estimate of bucketsize fraction. Here we use rel->rows,
* ie the number of rows after applying restriction clauses, because
* that's what the fraction will eventually be multiplied by in
* cost_heapjoin.
* Initial estimate of bucketsize fraction is 1/nbuckets as long as
* the number of buckets is less than the expected number of distinct
* values; otherwise it is 1/ndistinct.
*/
estfract
=
(
double
)
NTUP_PER_BUCKET
/
rel
->
rows
;
/*
* Adjust estimated bucketsize if too few distinct values (after
* restriction clauses) to fill all the buckets.
*/
needdistinct
=
rel
->
rows
/
(
double
)
NTUP_PER_BUCKET
;
if
(
ndistinct
<
needdistinct
)
estfract
*=
needdistinct
/
ndistinct
;
if
(
ndistinct
>
(
double
)
virtualbuckets
)
estfract
=
1
.
0
/
(
double
)
virtualbuckets
;
else
estfract
=
1
.
0
/
ndistinct
;
/*
* Look up the frequency of the most common value, if available.
...
...
src/include/executor/nodeHash.h
View file @
01a819ab
...
...
@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: nodeHash.h,v 1.
19 2001/03/22 04:00:44 momjian
Exp $
* $Id: nodeHash.h,v 1.
20 2001/06/11 00:17:07 tgl
Exp $
*
*-------------------------------------------------------------------------
*/
...
...
@@ -16,9 +16,6 @@
#include "nodes/plannodes.h"
/* NTUP_PER_BUCKET is exported because planner wants to see it */
#define NTUP_PER_BUCKET 10
extern
TupleTableSlot
*
ExecHash
(
Hash
*
node
);
extern
bool
ExecInitHash
(
Hash
*
node
,
EState
*
estate
,
Plan
*
parent
);
extern
int
ExecCountSlotsHash
(
Hash
*
node
);
...
...
@@ -35,5 +32,9 @@ extern HeapTuple ExecScanHashBucket(HashJoinState *hjstate, List *hjclauses,
ExprContext
*
econtext
);
extern
void
ExecHashTableReset
(
HashJoinTable
hashtable
,
long
ntuples
);
extern
void
ExecReScanHash
(
Hash
*
node
,
ExprContext
*
exprCtxt
,
Plan
*
parent
);
extern
void
ExecChooseHashTableSize
(
double
ntuples
,
int
tupwidth
,
int
*
virtualbuckets
,
int
*
physicalbuckets
,
int
*
numbatches
);
#endif
/* NODEHASH_H */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment