Commit 5edc63bd authored by Robert Haas's avatar Robert Haas

Account for the effect of lossy pages when costing bitmap scans.

Dilip Kumar, reviewed by Alexander Kumenkov, Amul Sul, and me.
Some final adjustments by me.

Discussion: http://postgr.es/m/CAFiTN-sYtqUOXQ4SpuhTv0Z9gD0si3YxZGv_PQAAMX8qbOotcg@mail.gmail.com
parent 0c98d0dd
...@@ -265,7 +265,6 @@ TIDBitmap * ...@@ -265,7 +265,6 @@ TIDBitmap *
tbm_create(long maxbytes, dsa_area *dsa) tbm_create(long maxbytes, dsa_area *dsa)
{ {
TIDBitmap *tbm; TIDBitmap *tbm;
long nbuckets;
/* Create the TIDBitmap struct and zero all its fields */ /* Create the TIDBitmap struct and zero all its fields */
tbm = makeNode(TIDBitmap); tbm = makeNode(TIDBitmap);
...@@ -273,17 +272,7 @@ tbm_create(long maxbytes, dsa_area *dsa) ...@@ -273,17 +272,7 @@ tbm_create(long maxbytes, dsa_area *dsa)
tbm->mcxt = CurrentMemoryContext; tbm->mcxt = CurrentMemoryContext;
tbm->status = TBM_EMPTY; tbm->status = TBM_EMPTY;
/* tbm->maxentries = (int) tbm_calculate_entries(maxbytes);
* Estimate number of hashtable entries we can have within maxbytes. This
* estimates the hash cost as sizeof(PagetableEntry), which is good enough
* for our purpose. Also count an extra Pointer per entry for the arrays
* created during iteration readout.
*/
nbuckets = maxbytes /
(sizeof(PagetableEntry) + sizeof(Pointer) + sizeof(Pointer));
nbuckets = Min(nbuckets, INT_MAX - 1); /* safety limit */
nbuckets = Max(nbuckets, 16); /* sanity limit */
tbm->maxentries = (int) nbuckets;
tbm->lossify_start = 0; tbm->lossify_start = 0;
tbm->dsa = dsa; tbm->dsa = dsa;
tbm->dsapagetable = InvalidDsaPointer; tbm->dsapagetable = InvalidDsaPointer;
...@@ -1546,3 +1535,27 @@ pagetable_free(pagetable_hash *pagetable, void *pointer) ...@@ -1546,3 +1535,27 @@ pagetable_free(pagetable_hash *pagetable, void *pointer)
tbm->dsapagetableold = InvalidDsaPointer; tbm->dsapagetableold = InvalidDsaPointer;
} }
} }
/*
* tbm_calculate_entries
*
* Estimate number of hashtable entries we can have within maxbytes.
*/
long
tbm_calculate_entries(double maxbytes)
{
long nbuckets;
/*
* Estimate number of hashtable entries we can have within maxbytes. This
* estimates the hash cost as sizeof(PagetableEntry), which is good enough
* for our purpose. Also count an extra Pointer per entry for the arrays
* created during iteration readout.
*/
nbuckets = maxbytes /
(sizeof(PagetableEntry) + sizeof(Pointer) + sizeof(Pointer));
nbuckets = Min(nbuckets, INT_MAX - 1); /* safety limit */
nbuckets = Max(nbuckets, 16); /* sanity limit */
return nbuckets;
}
...@@ -5171,6 +5171,8 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, ...@@ -5171,6 +5171,8 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
double T; double T;
double pages_fetched; double pages_fetched;
double tuples_fetched; double tuples_fetched;
double heap_pages;
long maxentries;
/* /*
* Fetch total cost of obtaining the bitmap, as well as its total * Fetch total cost of obtaining the bitmap, as well as its total
...@@ -5185,6 +5187,24 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, ...@@ -5185,6 +5187,24 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
T = (baserel->pages > 1) ? (double) baserel->pages : 1.0; T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
/*
* For a single scan, the number of heap pages that need to be fetched is
* the same as the Mackert and Lohman formula for the case T <= b (ie, no
* re-reads needed).
*/
pages_fetched = (2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
/*
* Calculate the number of pages fetched from the heap. Then based on
* current work_mem estimate get the estimated maxentries in the bitmap.
* (Note that we always do this calculation based on the number of pages
* that would be fetched in a single iteration, even if loop_count > 1.
* That's correct, because only that number of entries will be stored in
* the bitmap at one time.)
*/
heap_pages = Min(pages_fetched, baserel->pages);
maxentries = tbm_calculate_entries(work_mem * 1024L);
if (loop_count > 1) if (loop_count > 1)
{ {
/* /*
...@@ -5199,22 +5219,41 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual, ...@@ -5199,22 +5219,41 @@ compute_bitmap_pages(PlannerInfo *root, RelOptInfo *baserel, Path *bitmapqual,
root); root);
pages_fetched /= loop_count; pages_fetched /= loop_count;
} }
else
{
/*
* For a single scan, the number of heap pages that need to be fetched
* is the same as the Mackert and Lohman formula for the case T <= b
* (ie, no re-reads needed).
*/
pages_fetched =
(2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
}
if (pages_fetched >= T) if (pages_fetched >= T)
pages_fetched = T; pages_fetched = T;
else else
pages_fetched = ceil(pages_fetched); pages_fetched = ceil(pages_fetched);
if (maxentries < heap_pages)
{
double exact_pages;
double lossy_pages;
/*
* Crude approximation of the number of lossy pages. Because of the
* way tbm_lossify() is coded, the number of lossy pages increases
* very sharply as soon as we run short of memory; this formula has
* that property and seems to perform adequately in testing, but it's
* possible we could do better somehow.
*/
lossy_pages = Max(0, heap_pages - maxentries / 2);
exact_pages = heap_pages - lossy_pages;
/*
* If there are lossy pages then recompute the number of tuples
* processed by the bitmap heap node. We assume here that the chance
* of a given tuple coming from an exact page is the same as the
* chance that a given page is exact. This might not be true, but
* it's not clear how we can do any better.
*/
if (lossy_pages > 0)
tuples_fetched =
clamp_row_est(indexSelectivity *
(exact_pages / heap_pages) * baserel->tuples +
(lossy_pages / heap_pages) * baserel->tuples);
}
if (cost) if (cost)
*cost = indexTotalCost; *cost = indexTotalCost;
if (tuple) if (tuple)
......
...@@ -70,5 +70,6 @@ extern void tbm_end_iterate(TBMIterator *iterator); ...@@ -70,5 +70,6 @@ extern void tbm_end_iterate(TBMIterator *iterator);
extern void tbm_end_shared_iterate(TBMSharedIterator *iterator); extern void tbm_end_shared_iterate(TBMSharedIterator *iterator);
extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa, extern TBMSharedIterator *tbm_attach_shared_iterate(dsa_area *dsa,
dsa_pointer dp); dsa_pointer dp);
extern long tbm_calculate_entries(double maxbytes);
#endif /* TIDBITMAP_H */ #endif /* TIDBITMAP_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment