Commit 4e514c61 authored by Amit Kapila's avatar Amit Kapila

Delete empty pages in each pass during GIST VACUUM.

Earlier, we use to postpone deleting empty pages till the second stage of
vacuum to amortize the cost of scanning internal pages.  However, that can
sometimes (say vacuum is canceled or errored between first and second
stage) delay the pages to be recycled.

Another thing is that to facilitate deleting empty pages in the second
stage, we need to share the information about internal and empty pages
between different stages of vacuum.  It will be quite tricky to share this
information via DSM which is required for the upcoming parallel vacuum
patch.

Also, it will bring the logic to reclaim deleted pages closer to nbtree
where we delete empty pages in each pass.

Overall, the advantages of deleting empty pages in each pass outweigh the
advantages of postponing the same.

Author: Dilip Kumar, with changes by Amit Kapila
Reviewed-by: Sawada Masahiko and Amit Kapila
Discussion: https://postgr.es/m/CAA4eK1LGr+MN0xHZpJ2dfS8QNQ1a_aROKowZB+MPNep8FVtwAA@mail.gmail.com
parent eae056c1
...@@ -429,18 +429,17 @@ splits during searches, we don't need a "vacuum cycle ID" concept for that ...@@ -429,18 +429,17 @@ splits during searches, we don't need a "vacuum cycle ID" concept for that
like B-tree does. like B-tree does.
While we scan all the pages, we also make note of any completely empty leaf While we scan all the pages, we also make note of any completely empty leaf
pages. We will try to unlink them from the tree in the second stage. We also pages. We will try to unlink them from the tree after the scan. We also record
record the block numbers of all internal pages; they are needed in the second the block numbers of all internal pages; they are needed to locate parents of
stage, to locate parents of the empty pages. the empty pages while unlinking them.
In the second stage, we try to unlink any empty leaf pages from the tree, so We try to unlink any empty leaf pages from the tree, so that their space can
that their space can be reused. In order to delete an empty page, its be reused. In order to delete an empty page, its downlink must be removed from
downlink must be removed from the parent. We scan all the internal pages, the parent. We scan all the internal pages, whose block numbers we memorized
whose block numbers we memorized in the first stage, and look for downlinks in the first stage, and look for downlinks to pages that we have memorized as
to pages that we have memorized as being empty. Whenever we find one, we being empty. Whenever we find one, we acquire a lock on the parent and child
acquire a lock on the parent and child page, re-check that the child page is page, re-check that the child page is still empty. Then, we remove the
still empty. Then, we remove the downlink and mark the child as deleted, and downlink and mark the child as deleted, and release the locks.
release the locks.
The insertion algorithm would get confused, if an internal page was completely The insertion algorithm would get confused, if an internal page was completely
empty. So we never delete the last child of an internal page, even if it's empty. So we never delete the last child of an internal page, even if it's
......
...@@ -24,58 +24,34 @@ ...@@ -24,58 +24,34 @@
#include "storage/lmgr.h" #include "storage/lmgr.h"
#include "utils/memutils.h" #include "utils/memutils.h"
/* /* Working state needed by gistbulkdelete */
* State kept across vacuum stages.
*/
typedef struct typedef struct
{ {
IndexBulkDeleteResult stats; /* must be first */ IndexVacuumInfo *info;
IndexBulkDeleteResult *stats;
IndexBulkDeleteCallback callback;
void *callback_state;
GistNSN startNSN;
/* /*
* These are used to memorize all internal and empty leaf pages in the 1st * These are used to memorize all internal and empty leaf pages. They are
* vacuum stage. They are used in the 2nd stage, to delete all the empty * used for deleting all the empty pages.
* pages.
*/ */
IntegerSet *internal_page_set; IntegerSet *internal_page_set;
IntegerSet *empty_leaf_set; IntegerSet *empty_leaf_set;
MemoryContext page_set_context; MemoryContext page_set_context;
} GistBulkDeleteResult;
/* Working state needed by gistbulkdelete */
typedef struct
{
IndexVacuumInfo *info;
GistBulkDeleteResult *stats;
IndexBulkDeleteCallback callback;
void *callback_state;
GistNSN startNSN;
} GistVacState; } GistVacState;
static void gistvacuumscan(IndexVacuumInfo *info, GistBulkDeleteResult *stats, static void gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
IndexBulkDeleteCallback callback, void *callback_state); IndexBulkDeleteCallback callback, void *callback_state);
static void gistvacuumpage(GistVacState *vstate, BlockNumber blkno, static void gistvacuumpage(GistVacState *vstate, BlockNumber blkno,
BlockNumber orig_blkno); BlockNumber orig_blkno);
static void gistvacuum_delete_empty_pages(IndexVacuumInfo *info, static void gistvacuum_delete_empty_pages(IndexVacuumInfo *info,
GistBulkDeleteResult *stats); GistVacState *vstate);
static bool gistdeletepage(IndexVacuumInfo *info, GistBulkDeleteResult *stats, static bool gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
Buffer buffer, OffsetNumber downlink, Buffer buffer, OffsetNumber downlink,
Buffer leafBuffer); Buffer leafBuffer);
/* allocate the 'stats' struct that's kept over vacuum stages */
static GistBulkDeleteResult *
create_GistBulkDeleteResult(void)
{
GistBulkDeleteResult *gist_stats;
gist_stats = (GistBulkDeleteResult *) palloc0(sizeof(GistBulkDeleteResult));
gist_stats->page_set_context =
GenerationContextCreate(CurrentMemoryContext,
"GiST VACUUM page set context",
16 * 1024);
return gist_stats;
}
/* /*
* VACUUM bulkdelete stage: remove index entries. * VACUUM bulkdelete stage: remove index entries.
*/ */
...@@ -83,15 +59,13 @@ IndexBulkDeleteResult * ...@@ -83,15 +59,13 @@ IndexBulkDeleteResult *
gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
IndexBulkDeleteCallback callback, void *callback_state) IndexBulkDeleteCallback callback, void *callback_state)
{ {
GistBulkDeleteResult *gist_stats = (GistBulkDeleteResult *) stats;
/* allocate stats if first time through, else re-use existing struct */ /* allocate stats if first time through, else re-use existing struct */
if (gist_stats == NULL) if (stats == NULL)
gist_stats = create_GistBulkDeleteResult(); stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
gistvacuumscan(info, gist_stats, callback, callback_state); gistvacuumscan(info, stats, callback, callback_state);
return (IndexBulkDeleteResult *) gist_stats; return stats;
} }
/* /*
...@@ -100,8 +74,6 @@ gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats, ...@@ -100,8 +74,6 @@ gistbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
IndexBulkDeleteResult * IndexBulkDeleteResult *
gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
{ {
GistBulkDeleteResult *gist_stats = (GistBulkDeleteResult *) stats;
/* No-op in ANALYZE ONLY mode */ /* No-op in ANALYZE ONLY mode */
if (info->analyze_only) if (info->analyze_only)
return stats; return stats;
...@@ -111,24 +83,12 @@ gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) ...@@ -111,24 +83,12 @@ gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
* stats from the latest gistbulkdelete call. If it wasn't called, we * stats from the latest gistbulkdelete call. If it wasn't called, we
* still need to do a pass over the index, to obtain index statistics. * still need to do a pass over the index, to obtain index statistics.
*/ */
if (gist_stats == NULL) if (stats == NULL)
{ {
gist_stats = create_GistBulkDeleteResult(); stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
gistvacuumscan(info, gist_stats, NULL, NULL); gistvacuumscan(info, stats, NULL, NULL);
} }
/*
* If we saw any empty pages, try to unlink them from the tree so that
* they can be reused.
*/
gistvacuum_delete_empty_pages(info, gist_stats);
/* we don't need the internal and empty page sets anymore */
MemoryContextDelete(gist_stats->page_set_context);
gist_stats->page_set_context = NULL;
gist_stats->internal_page_set = NULL;
gist_stats->empty_leaf_set = NULL;
/* /*
* It's quite possible for us to be fooled by concurrent page splits into * It's quite possible for us to be fooled by concurrent page splits into
* double-counting some index tuples, so disbelieve any total that exceeds * double-counting some index tuples, so disbelieve any total that exceeds
...@@ -137,11 +97,11 @@ gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) ...@@ -137,11 +97,11 @@ gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
*/ */
if (!info->estimated_count) if (!info->estimated_count)
{ {
if (gist_stats->stats.num_index_tuples > info->num_heap_tuples) if (stats->num_index_tuples > info->num_heap_tuples)
gist_stats->stats.num_index_tuples = info->num_heap_tuples; stats->num_index_tuples = info->num_heap_tuples;
} }
return (IndexBulkDeleteResult *) gist_stats; return stats;
} }
/* /*
...@@ -153,15 +113,16 @@ gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats) ...@@ -153,15 +113,16 @@ gistvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
* occurred). * occurred).
* *
* This also makes note of any empty leaf pages, as well as all internal * This also makes note of any empty leaf pages, as well as all internal
* pages. The second stage, gistvacuum_delete_empty_pages(), needs that * pages while looping over all index pages. After scanning all the pages, we
* information. Any deleted pages are added directly to the free space map. * remove the empty pages so that they can be reused. Any deleted pages are
* (They should've been added there when they were originally deleted, already, * added directly to the free space map. (They should've been added there
* but it's possible that the FSM was lost at a crash, for example.) * when they were originally deleted, already, but it's possible that the FSM
* was lost at a crash, for example.)
* *
* The caller is responsible for initially allocating/zeroing a stats struct. * The caller is responsible for initially allocating/zeroing a stats struct.
*/ */
static void static void
gistvacuumscan(IndexVacuumInfo *info, GistBulkDeleteResult *stats, gistvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
IndexBulkDeleteCallback callback, void *callback_state) IndexBulkDeleteCallback callback, void *callback_state)
{ {
Relation rel = info->index; Relation rel = info->index;
...@@ -175,11 +136,10 @@ gistvacuumscan(IndexVacuumInfo *info, GistBulkDeleteResult *stats, ...@@ -175,11 +136,10 @@ gistvacuumscan(IndexVacuumInfo *info, GistBulkDeleteResult *stats,
* Reset counts that will be incremented during the scan; needed in case * Reset counts that will be incremented during the scan; needed in case
* of multiple scans during a single VACUUM command. * of multiple scans during a single VACUUM command.
*/ */
stats->stats.estimated_count = false; stats->estimated_count = false;
stats->stats.num_index_tuples = 0; stats->num_index_tuples = 0;
stats->stats.pages_deleted = 0; stats->pages_deleted = 0;
stats->stats.pages_free = 0; stats->pages_free = 0;
MemoryContextReset(stats->page_set_context);
/* /*
* Create the integer sets to remember all the internal and the empty leaf * Create the integer sets to remember all the internal and the empty leaf
...@@ -187,9 +147,12 @@ gistvacuumscan(IndexVacuumInfo *info, GistBulkDeleteResult *stats, ...@@ -187,9 +147,12 @@ gistvacuumscan(IndexVacuumInfo *info, GistBulkDeleteResult *stats,
* this context so that the subsequent allocations for these integer sets * this context so that the subsequent allocations for these integer sets
* will be done from the same context. * will be done from the same context.
*/ */
oldctx = MemoryContextSwitchTo(stats->page_set_context); vstate.page_set_context = GenerationContextCreate(CurrentMemoryContext,
stats->internal_page_set = intset_create(); "GiST VACUUM page set context",
stats->empty_leaf_set = intset_create(); 16 * 1024);
oldctx = MemoryContextSwitchTo(vstate.page_set_context);
vstate.internal_page_set = intset_create();
vstate.empty_leaf_set = intset_create();
MemoryContextSwitchTo(oldctx); MemoryContextSwitchTo(oldctx);
/* Set up info to pass down to gistvacuumpage */ /* Set up info to pass down to gistvacuumpage */
...@@ -257,11 +220,23 @@ gistvacuumscan(IndexVacuumInfo *info, GistBulkDeleteResult *stats, ...@@ -257,11 +220,23 @@ gistvacuumscan(IndexVacuumInfo *info, GistBulkDeleteResult *stats,
* Note that if no recyclable pages exist, we don't bother vacuuming the * Note that if no recyclable pages exist, we don't bother vacuuming the
* FSM at all. * FSM at all.
*/ */
if (stats->stats.pages_free > 0) if (stats->pages_free > 0)
IndexFreeSpaceMapVacuum(rel); IndexFreeSpaceMapVacuum(rel);
/* update statistics */ /* update statistics */
stats->stats.num_pages = num_pages; stats->num_pages = num_pages;
/*
* If we saw any empty pages, try to unlink them from the tree so that
* they can be reused.
*/
gistvacuum_delete_empty_pages(info, &vstate);
/* we don't need the internal and empty page sets anymore */
MemoryContextDelete(vstate.page_set_context);
vstate.page_set_context = NULL;
vstate.internal_page_set = NULL;
vstate.empty_leaf_set = NULL;
} }
/* /*
...@@ -278,7 +253,6 @@ gistvacuumscan(IndexVacuumInfo *info, GistBulkDeleteResult *stats, ...@@ -278,7 +253,6 @@ gistvacuumscan(IndexVacuumInfo *info, GistBulkDeleteResult *stats,
static void static void
gistvacuumpage(GistVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno) gistvacuumpage(GistVacState *vstate, BlockNumber blkno, BlockNumber orig_blkno)
{ {
GistBulkDeleteResult *stats = vstate->stats;
IndexVacuumInfo *info = vstate->info; IndexVacuumInfo *info = vstate->info;
IndexBulkDeleteCallback callback = vstate->callback; IndexBulkDeleteCallback callback = vstate->callback;
void *callback_state = vstate->callback_state; void *callback_state = vstate->callback_state;
...@@ -307,13 +281,13 @@ restart: ...@@ -307,13 +281,13 @@ restart:
{ {
/* Okay to recycle this page */ /* Okay to recycle this page */
RecordFreeIndexPage(rel, blkno); RecordFreeIndexPage(rel, blkno);
stats->stats.pages_free++; vstate->stats->pages_free++;
stats->stats.pages_deleted++; vstate->stats->pages_deleted++;
} }
else if (GistPageIsDeleted(page)) else if (GistPageIsDeleted(page))
{ {
/* Already deleted, but can't recycle yet */ /* Already deleted, but can't recycle yet */
stats->stats.pages_deleted++; vstate->stats->pages_deleted++;
} }
else if (GistPageIsLeaf(page)) else if (GistPageIsLeaf(page))
{ {
...@@ -388,7 +362,7 @@ restart: ...@@ -388,7 +362,7 @@ restart:
END_CRIT_SECTION(); END_CRIT_SECTION();
stats->stats.tuples_removed += ntodelete; vstate->stats->tuples_removed += ntodelete;
/* must recompute maxoff */ /* must recompute maxoff */
maxoff = PageGetMaxOffsetNumber(page); maxoff = PageGetMaxOffsetNumber(page);
} }
...@@ -405,10 +379,10 @@ restart: ...@@ -405,10 +379,10 @@ restart:
* it up. * it up.
*/ */
if (blkno == orig_blkno) if (blkno == orig_blkno)
intset_add_member(stats->empty_leaf_set, blkno); intset_add_member(vstate->empty_leaf_set, blkno);
} }
else else
stats->stats.num_index_tuples += nremain; vstate->stats->num_index_tuples += nremain;
} }
else else
{ {
...@@ -443,7 +417,7 @@ restart: ...@@ -443,7 +417,7 @@ restart:
* parents of empty leaf pages. * parents of empty leaf pages.
*/ */
if (blkno == orig_blkno) if (blkno == orig_blkno)
intset_add_member(stats->internal_page_set, blkno); intset_add_member(vstate->internal_page_set, blkno);
} }
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
...@@ -466,7 +440,7 @@ restart: ...@@ -466,7 +440,7 @@ restart:
* Scan all internal pages, and try to delete their empty child pages. * Scan all internal pages, and try to delete their empty child pages.
*/ */
static void static void
gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistBulkDeleteResult *stats) gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistVacState *vstate)
{ {
Relation rel = info->index; Relation rel = info->index;
BlockNumber empty_pages_remaining; BlockNumber empty_pages_remaining;
...@@ -475,10 +449,10 @@ gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistBulkDeleteResult *stats ...@@ -475,10 +449,10 @@ gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistBulkDeleteResult *stats
/* /*
* Rescan all inner pages to find those that have empty child pages. * Rescan all inner pages to find those that have empty child pages.
*/ */
empty_pages_remaining = intset_num_entries(stats->empty_leaf_set); empty_pages_remaining = intset_num_entries(vstate->empty_leaf_set);
intset_begin_iterate(stats->internal_page_set); intset_begin_iterate(vstate->internal_page_set);
while (empty_pages_remaining > 0 && while (empty_pages_remaining > 0 &&
intset_iterate_next(stats->internal_page_set, &blkno)) intset_iterate_next(vstate->internal_page_set, &blkno))
{ {
Buffer buffer; Buffer buffer;
Page page; Page page;
...@@ -521,7 +495,7 @@ gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistBulkDeleteResult *stats ...@@ -521,7 +495,7 @@ gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistBulkDeleteResult *stats
BlockNumber leafblk; BlockNumber leafblk;
leafblk = ItemPointerGetBlockNumber(&(idxtuple->t_tid)); leafblk = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
if (intset_is_member(stats->empty_leaf_set, leafblk)) if (intset_is_member(vstate->empty_leaf_set, leafblk))
{ {
leafs_to_delete[ntodelete] = leafblk; leafs_to_delete[ntodelete] = leafblk;
todelete[ntodelete++] = off; todelete[ntodelete++] = off;
...@@ -561,7 +535,7 @@ gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistBulkDeleteResult *stats ...@@ -561,7 +535,7 @@ gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistBulkDeleteResult *stats
gistcheckpage(rel, leafbuf); gistcheckpage(rel, leafbuf);
LockBuffer(buffer, GIST_EXCLUSIVE); LockBuffer(buffer, GIST_EXCLUSIVE);
if (gistdeletepage(info, stats, if (gistdeletepage(info, vstate->stats,
buffer, todelete[i] - deleted, buffer, todelete[i] - deleted,
leafbuf)) leafbuf))
deleted++; deleted++;
...@@ -573,7 +547,7 @@ gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistBulkDeleteResult *stats ...@@ -573,7 +547,7 @@ gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistBulkDeleteResult *stats
ReleaseBuffer(buffer); ReleaseBuffer(buffer);
/* update stats */ /* update stats */
stats->stats.pages_removed += deleted; vstate->stats->pages_removed += deleted;
/* /*
* We can stop the scan as soon as we have seen the downlinks, even if * We can stop the scan as soon as we have seen the downlinks, even if
...@@ -596,7 +570,7 @@ gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistBulkDeleteResult *stats ...@@ -596,7 +570,7 @@ gistvacuum_delete_empty_pages(IndexVacuumInfo *info, GistBulkDeleteResult *stats
* prevented it. * prevented it.
*/ */
static bool static bool
gistdeletepage(IndexVacuumInfo *info, GistBulkDeleteResult *stats, gistdeletepage(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
Buffer parentBuffer, OffsetNumber downlink, Buffer parentBuffer, OffsetNumber downlink,
Buffer leafBuffer) Buffer leafBuffer)
{ {
...@@ -665,7 +639,7 @@ gistdeletepage(IndexVacuumInfo *info, GistBulkDeleteResult *stats, ...@@ -665,7 +639,7 @@ gistdeletepage(IndexVacuumInfo *info, GistBulkDeleteResult *stats,
/* mark the page as deleted */ /* mark the page as deleted */
MarkBufferDirty(leafBuffer); MarkBufferDirty(leafBuffer);
GistPageSetDeleted(leafPage, txid); GistPageSetDeleted(leafPage, txid);
stats->stats.pages_deleted++; stats->pages_deleted++;
/* remove the downlink from the parent */ /* remove the downlink from the parent */
MarkBufferDirty(parentBuffer); MarkBufferDirty(parentBuffer);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment