Commit 70508ba7 authored by Tom Lane's avatar Tom Lane

Make btree index structure adjustments and WAL logging changes needed to

support btree compaction, as per proposal of a few days ago.  btree index
pages no longer store parent links, instead they have a level indicator
(counting up from zero for leaf pages).  The FixBTree recovery logic is
removed, and replaced by code that detects missing parent-level insertions
during WAL replay.  Also, generate appropriate WAL entries when updating
btree metapage and when building a btree index from scratch.  I believe
btree indexes are now completely WAL-legal for the first time.
initdb forced due to index and WAL changes.
parent 4df0f1d2
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
# Makefile for access/nbtree # Makefile for access/nbtree
# #
# IDENTIFICATION # IDENTIFICATION
# $Header: /cvsroot/pgsql/src/backend/access/nbtree/Makefile,v 1.11 2001/07/15 22:48:16 tgl Exp $ # $Header: /cvsroot/pgsql/src/backend/access/nbtree/Makefile,v 1.12 2003/02/21 00:06:21 tgl Exp $
# #
#------------------------------------------------------------------------- #-------------------------------------------------------------------------
...@@ -13,7 +13,7 @@ top_builddir = ../../../.. ...@@ -13,7 +13,7 @@ top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global include $(top_builddir)/src/Makefile.global
OBJS = nbtcompare.o nbtinsert.o nbtpage.o nbtree.o nbtsearch.o \ OBJS = nbtcompare.o nbtinsert.o nbtpage.o nbtree.o nbtsearch.o \
nbtstrat.o nbtutils.o nbtsort.o nbtstrat.o nbtutils.o nbtsort.o nbtxlog.o
all: SUBSYS.o all: SUBSYS.o
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.72 2002/06/20 20:29:25 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.73 2003/02/21 00:06:21 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -895,6 +895,89 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) ...@@ -895,6 +895,89 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
return true; return true;
} }
/*
* _bt_get_endpoint() -- Find the first or last page on a given tree level
*
* If the index is empty, we will return InvalidBuffer; any other failure
* condition causes elog().
*
* The returned buffer is pinned and read-locked.
*/
Buffer
_bt_get_endpoint(Relation rel, uint32 level, bool rightmost)
{
Buffer buf;
Page page;
BTPageOpaque opaque;
OffsetNumber offnum;
BlockNumber blkno;
BTItem btitem;
IndexTuple itup;
/*
* If we are looking for a leaf page, okay to descend from fast root;
* otherwise better descend from true root. (There is no point in being
* smarter about intermediate levels.)
*/
if (level == 0)
buf = _bt_getroot(rel, BT_READ);
else
buf = _bt_gettrueroot(rel);
if (!BufferIsValid(buf))
{
/* empty index... */
return InvalidBuffer;
}
page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
for (;;)
{
/*
* If we landed on a deleted page, step right to find a live page
* (there must be one). Also, if we want the rightmost page,
* step right if needed to get to it (this could happen if the
* page split since we obtained a pointer to it).
*/
while (P_ISDELETED(opaque) ||
(rightmost && !P_RIGHTMOST(opaque)))
{
blkno = opaque->btpo_next;
if (blkno == P_NONE)
elog(ERROR, "_bt_get_endpoint: ran off end of btree");
_bt_relbuf(rel, buf);
buf = _bt_getbuf(rel, blkno, BT_READ);
page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
}
/* Done? */
if (opaque->btpo.level == level)
break;
if (opaque->btpo.level < level)
elog(ERROR, "_bt_get_endpoint: btree level %u not found", level);
/* Step to leftmost or rightmost child page */
if (rightmost)
offnum = PageGetMaxOffsetNumber(page);
else
offnum = P_FIRSTDATAKEY(opaque);
btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
itup = &(btitem->bti_itup);
blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
_bt_relbuf(rel, buf);
buf = _bt_getbuf(rel, blkno, BT_READ);
page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
}
return buf;
}
/* /*
* _bt_endpoint() -- Find the first or last key in the index. * _bt_endpoint() -- Find the first or last key in the index.
* *
...@@ -910,8 +993,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) ...@@ -910,8 +993,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
Page page; Page page;
BTPageOpaque opaque; BTPageOpaque opaque;
ItemPointer current; ItemPointer current;
OffsetNumber offnum, OffsetNumber maxoff;
maxoff;
OffsetNumber start; OffsetNumber start;
BlockNumber blkno; BlockNumber blkno;
BTItem btitem; BTItem btitem;
...@@ -929,7 +1011,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) ...@@ -929,7 +1011,7 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
* simplified version of _bt_search(). We don't maintain a stack * simplified version of _bt_search(). We don't maintain a stack
* since we know we won't need it. * since we know we won't need it.
*/ */
buf = _bt_getroot(rel, BT_READ); buf = _bt_get_endpoint(rel, 0, ScanDirectionIsBackward(dir));
if (!BufferIsValid(buf)) if (!BufferIsValid(buf))
{ {
...@@ -942,51 +1024,14 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir) ...@@ -942,51 +1024,14 @@ _bt_endpoint(IndexScanDesc scan, ScanDirection dir)
blkno = BufferGetBlockNumber(buf); blkno = BufferGetBlockNumber(buf);
page = BufferGetPage(buf); page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page); opaque = (BTPageOpaque) PageGetSpecialPointer(page);
Assert(P_ISLEAF(opaque));
for (;;)
{
if (P_ISLEAF(opaque))
break;
if (ScanDirectionIsForward(dir))
offnum = P_FIRSTDATAKEY(opaque);
else
offnum = PageGetMaxOffsetNumber(page);
btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum));
itup = &(btitem->bti_itup);
blkno = ItemPointerGetBlockNumber(&(itup->t_tid));
_bt_relbuf(rel, buf);
buf = _bt_getbuf(rel, blkno, BT_READ);
page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
/*
* Race condition: If the child page we just stepped onto was just
* split, we need to make sure we're all the way at the right edge
* of the tree. See the paper by Lehman and Yao.
*/
if (ScanDirectionIsBackward(dir) && !P_RIGHTMOST(opaque))
{
do
{
blkno = opaque->btpo_next;
_bt_relbuf(rel, buf);
buf = _bt_getbuf(rel, blkno, BT_READ);
page = BufferGetPage(buf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
} while (!P_RIGHTMOST(opaque));
}
}
/* okay, we've got the {left,right}-most page in the tree */
maxoff = PageGetMaxOffsetNumber(page); maxoff = PageGetMaxOffsetNumber(page);
if (ScanDirectionIsForward(dir)) if (ScanDirectionIsForward(dir))
{ {
Assert(P_LEFTMOST(opaque)); /* There could be dead pages to the left, so not this: */
/* Assert(P_LEFTMOST(opaque)); */
start = P_FIRSTDATAKEY(opaque); start = P_FIRSTDATAKEY(opaque);
} }
......
...@@ -35,7 +35,7 @@ ...@@ -35,7 +35,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsort.c,v 1.70 2002/11/15 01:26:08 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsort.c,v 1.71 2003/02/21 00:06:21 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -43,6 +43,7 @@ ...@@ -43,6 +43,7 @@
#include "postgres.h" #include "postgres.h"
#include "access/nbtree.h" #include "access/nbtree.h"
#include "miscadmin.h"
#include "utils/tuplesort.h" #include "utils/tuplesort.h"
...@@ -76,7 +77,7 @@ typedef struct BTPageState ...@@ -76,7 +77,7 @@ typedef struct BTPageState
BTItem btps_minkey; /* copy of minimum key (first item) on BTItem btps_minkey; /* copy of minimum key (first item) on
* page */ * page */
OffsetNumber btps_lastoff; /* last item offset loaded */ OffsetNumber btps_lastoff; /* last item offset loaded */
int btps_level; /* tree level (0 = leaf) */ uint32 btps_level; /* tree level (0 = leaf) */
Size btps_full; /* "full" if less than this much free Size btps_full; /* "full" if less than this much free
* space */ * space */
struct BTPageState *btps_next; /* link to parent level, if any */ struct BTPageState *btps_next; /* link to parent level, if any */
...@@ -90,8 +91,9 @@ typedef struct BTPageState ...@@ -90,8 +91,9 @@ typedef struct BTPageState
0) 0)
static void _bt_blnewpage(Relation index, Buffer *buf, Page *page, int flags); static void _bt_blnewpage(Relation index, Buffer *buf, Page *page,
static BTPageState *_bt_pagestate(Relation index, int flags, int level); uint32 level);
static BTPageState *_bt_pagestate(Relation index, uint32 level);
static void _bt_slideleft(Relation index, Buffer buf, Page page); static void _bt_slideleft(Relation index, Buffer buf, Page page);
static void _bt_sortaddtup(Page page, Size itemsize, static void _bt_sortaddtup(Page page, Size itemsize,
BTItem btitem, OffsetNumber itup_off); BTItem btitem, OffsetNumber itup_off);
...@@ -179,7 +181,7 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2) ...@@ -179,7 +181,7 @@ _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2)
* allocate a new, clean btree page, not linked to any siblings. * allocate a new, clean btree page, not linked to any siblings.
*/ */
static void static void
_bt_blnewpage(Relation index, Buffer *buf, Page *page, int flags) _bt_blnewpage(Relation index, Buffer *buf, Page *page, uint32 level)
{ {
BTPageOpaque opaque; BTPageOpaque opaque;
...@@ -192,23 +194,67 @@ _bt_blnewpage(Relation index, Buffer *buf, Page *page, int flags) ...@@ -192,23 +194,67 @@ _bt_blnewpage(Relation index, Buffer *buf, Page *page, int flags)
/* Initialize BT opaque state */ /* Initialize BT opaque state */
opaque = (BTPageOpaque) PageGetSpecialPointer(*page); opaque = (BTPageOpaque) PageGetSpecialPointer(*page);
opaque->btpo_prev = opaque->btpo_next = P_NONE; opaque->btpo_prev = opaque->btpo_next = P_NONE;
opaque->btpo_flags = flags; opaque->btpo.level = level;
opaque->btpo_flags = (level > 0) ? 0 : BTP_LEAF;
/* Make the P_HIKEY line pointer appear allocated */ /* Make the P_HIKEY line pointer appear allocated */
((PageHeader) *page)->pd_lower += sizeof(ItemIdData); ((PageHeader) *page)->pd_lower += sizeof(ItemIdData);
} }
/*
* emit a completed btree page, and release the lock and pin on it.
* This is essentially _bt_wrtbuf except we also emit a WAL record.
*/
static void
_bt_blwritepage(Relation index, Buffer buf)
{
Page pg = BufferGetPage(buf);
/* NO ELOG(ERROR) from here till newpage op is logged */
START_CRIT_SECTION();
/* XLOG stuff */
if (!index->rd_istemp)
{
xl_btree_newpage xlrec;
XLogRecPtr recptr;
XLogRecData rdata[2];
xlrec.node = index->rd_node;
xlrec.blkno = BufferGetBlockNumber(buf);
rdata[0].buffer = InvalidBuffer;
rdata[0].data = (char *) &xlrec;
rdata[0].len = SizeOfBtreeNewpage;
rdata[0].next = &(rdata[1]);
rdata[1].buffer = buf;
rdata[1].data = (char *) pg;
rdata[1].len = BLCKSZ;
rdata[1].next = NULL;
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWPAGE, rdata);
PageSetLSN(pg, recptr);
PageSetSUI(pg, ThisStartUpID);
}
END_CRIT_SECTION();
_bt_wrtbuf(index, buf);
}
/* /*
* allocate and initialize a new BTPageState. the returned structure * allocate and initialize a new BTPageState. the returned structure
* is suitable for immediate use by _bt_buildadd. * is suitable for immediate use by _bt_buildadd.
*/ */
static BTPageState * static BTPageState *
_bt_pagestate(Relation index, int flags, int level) _bt_pagestate(Relation index, uint32 level)
{ {
BTPageState *state = (BTPageState *) palloc0(sizeof(BTPageState)); BTPageState *state = (BTPageState *) palloc0(sizeof(BTPageState));
/* create initial page */ /* create initial page */
_bt_blnewpage(index, &(state->btps_buf), &(state->btps_page), flags); _bt_blnewpage(index, &(state->btps_buf), &(state->btps_page), level);
state->btps_minkey = (BTItem) NULL; state->btps_minkey = (BTItem) NULL;
/* initialize lastoff so first item goes into P_FIRSTKEY */ /* initialize lastoff so first item goes into P_FIRSTKEY */
...@@ -365,9 +411,8 @@ _bt_buildadd(Relation index, BTPageState *state, BTItem bti) ...@@ -365,9 +411,8 @@ _bt_buildadd(Relation index, BTPageState *state, BTItem bti)
ItemId hii; ItemId hii;
BTItem obti; BTItem obti;
/* Create new page */ /* Create new page on same level */
_bt_blnewpage(index, &nbuf, &npage, _bt_blnewpage(index, &nbuf, &npage, state->btps_level);
(state->btps_level > 0) ? 0 : BTP_LEAF);
/* /*
* We copy the last item on the page into the new page, and then * We copy the last item on the page into the new page, and then
...@@ -396,10 +441,8 @@ _bt_buildadd(Relation index, BTPageState *state, BTItem bti) ...@@ -396,10 +441,8 @@ _bt_buildadd(Relation index, BTPageState *state, BTItem bti)
* btree level. * btree level.
*/ */
if (state->btps_next == (BTPageState *) NULL) if (state->btps_next == (BTPageState *) NULL)
{ state->btps_next = _bt_pagestate(index, state->btps_level + 1);
state->btps_next =
_bt_pagestate(index, 0, state->btps_level + 1);
}
Assert(state->btps_minkey != NULL); Assert(state->btps_minkey != NULL);
ItemPointerSet(&(state->btps_minkey->bti_itup.t_tid), ItemPointerSet(&(state->btps_minkey->bti_itup.t_tid),
BufferGetBlockNumber(obuf), P_HIKEY); BufferGetBlockNumber(obuf), P_HIKEY);
...@@ -414,16 +457,7 @@ _bt_buildadd(Relation index, BTPageState *state, BTItem bti) ...@@ -414,16 +457,7 @@ _bt_buildadd(Relation index, BTPageState *state, BTItem bti)
state->btps_minkey = _bt_formitem(&(obti->bti_itup)); state->btps_minkey = _bt_formitem(&(obti->bti_itup));
/* /*
* Set the sibling links for both pages, and parent links too. * Set the sibling links for both pages.
*
* It's not necessary to set the parent link at all, because it's
* only used for handling concurrent root splits, but we may as
* well do it as a debugging aid. Note we set new page's link as
* well as old's, because if the new page turns out to be the last
* of the level, _bt_uppershutdown won't change it. The links may
* be out of date by the time the build finishes, but that's OK;
* they need only point to a left-sibling of the true parent. See
* the README file for more info.
*/ */
{ {
BTPageOpaque oopaque = (BTPageOpaque) PageGetSpecialPointer(opage); BTPageOpaque oopaque = (BTPageOpaque) PageGetSpecialPointer(opage);
...@@ -431,9 +465,7 @@ _bt_buildadd(Relation index, BTPageState *state, BTItem bti) ...@@ -431,9 +465,7 @@ _bt_buildadd(Relation index, BTPageState *state, BTItem bti)
oopaque->btpo_next = BufferGetBlockNumber(nbuf); oopaque->btpo_next = BufferGetBlockNumber(nbuf);
nopaque->btpo_prev = BufferGetBlockNumber(obuf); nopaque->btpo_prev = BufferGetBlockNumber(obuf);
nopaque->btpo_next = P_NONE; nopaque->btpo_next = P_NONE; /* redundant */
oopaque->btpo_parent = nopaque->btpo_parent =
BufferGetBlockNumber(state->btps_next->btps_buf);
} }
/* /*
...@@ -441,7 +473,7 @@ _bt_buildadd(Relation index, BTPageState *state, BTItem bti) ...@@ -441,7 +473,7 @@ _bt_buildadd(Relation index, BTPageState *state, BTItem bti)
* can give up our lock (if we had one; most likely BuildingBtree * can give up our lock (if we had one; most likely BuildingBtree
* is set, so we aren't locking). * is set, so we aren't locking).
*/ */
_bt_wrtbuf(index, obuf); _bt_blwritepage(index, obuf);
/* /*
* Reset last_off to point to new page * Reset last_off to point to new page
...@@ -519,7 +551,7 @@ _bt_uppershutdown(Relation index, BTPageState *state) ...@@ -519,7 +551,7 @@ _bt_uppershutdown(Relation index, BTPageState *state)
* slid back one slot. Then we can dump out the page. * slid back one slot. Then we can dump out the page.
*/ */
_bt_slideleft(index, s->btps_buf, s->btps_page); _bt_slideleft(index, s->btps_buf, s->btps_page);
_bt_wrtbuf(index, s->btps_buf); _bt_blwritepage(index, s->btps_buf);
} }
} }
...@@ -603,7 +635,7 @@ _bt_load(Relation index, BTSpool *btspool, BTSpool *btspool2) ...@@ -603,7 +635,7 @@ _bt_load(Relation index, BTSpool *btspool, BTSpool *btspool2)
/* When we see first tuple, create first index page */ /* When we see first tuple, create first index page */
if (state == NULL) if (state == NULL)
state = _bt_pagestate(index, BTP_LEAF, 0); state = _bt_pagestate(index, 0);
if (load1) if (load1)
{ {
...@@ -623,13 +655,13 @@ _bt_load(Relation index, BTSpool *btspool, BTSpool *btspool2) ...@@ -623,13 +655,13 @@ _bt_load(Relation index, BTSpool *btspool, BTSpool *btspool2)
_bt_freeskey(indexScanKey); _bt_freeskey(indexScanKey);
} }
else else
/* merge is unnecessary */
{ {
/* merge is unnecessary */
while (bti = (BTItem) tuplesort_getindextuple(btspool->sortstate, true, &should_free), bti != (BTItem) NULL) while (bti = (BTItem) tuplesort_getindextuple(btspool->sortstate, true, &should_free), bti != (BTItem) NULL)
{ {
/* When we see first tuple, create first index page */ /* When we see first tuple, create first index page */
if (state == NULL) if (state == NULL)
state = _bt_pagestate(index, BTP_LEAF, 0); state = _bt_pagestate(index, 0);
_bt_buildadd(index, state, bti); _bt_buildadd(index, state, bti);
if (should_free) if (should_free)
......
This diff is collapsed.
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* *
* Resource managers definition * Resource managers definition
* *
* $Header: /cvsroot/pgsql/src/backend/access/transam/rmgr.c,v 1.9 2001/08/25 18:52:41 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/access/transam/rmgr.c,v 1.10 2003/02/21 00:06:22 tgl Exp $
*/ */
#include "postgres.h" #include "postgres.h"
...@@ -19,21 +19,22 @@ ...@@ -19,21 +19,22 @@
#include "commands/sequence.h" #include "commands/sequence.h"
RmgrData RmgrTable[] = { RmgrData RmgrTable[RM_MAX_ID+1] = {
{"XLOG", xlog_redo, xlog_undo, xlog_desc}, {"XLOG", xlog_redo, xlog_undo, xlog_desc, NULL, NULL},
{"Transaction", xact_redo, xact_undo, xact_desc}, {"Transaction", xact_redo, xact_undo, xact_desc, NULL, NULL},
{"Storage", smgr_redo, smgr_undo, smgr_desc}, {"Storage", smgr_redo, smgr_undo, smgr_desc, NULL, NULL},
{"CLOG", clog_redo, clog_undo, clog_desc}, {"CLOG", clog_redo, clog_undo, clog_desc, NULL, NULL},
{"Reserved 4", NULL, NULL, NULL}, {"Reserved 4", NULL, NULL, NULL, NULL, NULL},
{"Reserved 5", NULL, NULL, NULL}, {"Reserved 5", NULL, NULL, NULL, NULL, NULL},
{"Reserved 6", NULL, NULL, NULL}, {"Reserved 6", NULL, NULL, NULL, NULL, NULL},
{"Reserved 7", NULL, NULL, NULL}, {"Reserved 7", NULL, NULL, NULL, NULL, NULL},
{"Reserved 8", NULL, NULL, NULL}, {"Reserved 8", NULL, NULL, NULL, NULL, NULL},
{"Reserved 9", NULL, NULL, NULL}, {"Reserved 9", NULL, NULL, NULL, NULL, NULL},
{"Heap", heap_redo, heap_undo, heap_desc}, {"Heap", heap_redo, heap_undo, heap_desc, NULL, NULL},
{"Btree", btree_redo, btree_undo, btree_desc}, {"Btree", btree_redo, btree_undo, btree_desc,
{"Hash", hash_redo, hash_undo, hash_desc}, btree_xlog_startup, btree_xlog_cleanup},
{"Rtree", rtree_redo, rtree_undo, rtree_desc}, {"Hash", hash_redo, hash_undo, hash_desc, NULL, NULL},
{"Gist", gist_redo, gist_undo, gist_desc}, {"Rtree", rtree_redo, rtree_undo, rtree_desc, NULL, NULL},
{"Sequence", seq_redo, seq_undo, seq_desc} {"Gist", gist_redo, gist_undo, gist_desc, NULL, NULL},
{"Sequence", seq_redo, seq_undo, seq_desc, NULL, NULL}
}; };
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.111 2003/01/25 03:06:04 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.112 2003/02/21 00:06:22 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1203,16 +1203,6 @@ XLogFlush(XLogRecPtr record) ...@@ -1203,16 +1203,6 @@ XLogFlush(XLogRecPtr record)
XLogRecPtr WriteRqstPtr; XLogRecPtr WriteRqstPtr;
XLogwrtRqst WriteRqst; XLogwrtRqst WriteRqst;
if (XLOG_DEBUG)
{
elog(LOG, "XLogFlush%s%s: request %X/%X; write %X/%X; flush %X/%X",
(IsBootstrapProcessingMode()) ? "(bootstrap)" : "",
(InRedo) ? "(redo)" : "",
record.xlogid, record.xrecoff,
LogwrtResult.Write.xlogid, LogwrtResult.Write.xrecoff,
LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);
}
/* Disabled during REDO */ /* Disabled during REDO */
if (InRedo) if (InRedo)
return; return;
...@@ -1221,6 +1211,15 @@ XLogFlush(XLogRecPtr record) ...@@ -1221,6 +1211,15 @@ XLogFlush(XLogRecPtr record)
if (XLByteLE(record, LogwrtResult.Flush)) if (XLByteLE(record, LogwrtResult.Flush))
return; return;
if (XLOG_DEBUG)
{
elog(LOG, "XLogFlush%s: request %X/%X; write %X/%X; flush %X/%X",
(IsBootstrapProcessingMode()) ? "(bootstrap)" : "",
record.xlogid, record.xrecoff,
LogwrtResult.Write.xlogid, LogwrtResult.Write.xrecoff,
LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);
}
START_CRIT_SECTION(); START_CRIT_SECTION();
/* /*
...@@ -2515,6 +2514,12 @@ StartupXLOG(void) ...@@ -2515,6 +2514,12 @@ StartupXLOG(void)
elog(LOG, "database system was interrupted at %s", elog(LOG, "database system was interrupted at %s",
str_time(ControlFile->time)); str_time(ControlFile->time));
/* This is just to allow attaching to startup process with a debugger */
#ifdef XLOG_REPLAY_DELAY
if (XLOG_DEBUG && ControlFile->state != DB_SHUTDOWNED)
sleep(60);
#endif
/* /*
* Get the last valid checkpoint record. If the latest one according * Get the last valid checkpoint record. If the latest one according
* to pg_control is broken, try the next-to-last one. * to pg_control is broken, try the next-to-last one.
...@@ -2578,14 +2583,23 @@ StartupXLOG(void) ...@@ -2578,14 +2583,23 @@ StartupXLOG(void)
/* REDO */ /* REDO */
if (InRecovery) if (InRecovery)
{ {
int rmid;
elog(LOG, "database system was not properly shut down; " elog(LOG, "database system was not properly shut down; "
"automatic recovery in progress"); "automatic recovery in progress");
ControlFile->state = DB_IN_RECOVERY; ControlFile->state = DB_IN_RECOVERY;
ControlFile->time = time(NULL); ControlFile->time = time(NULL);
UpdateControlFile(); UpdateControlFile();
/* Start up the recovery environment */
XLogInitRelationCache(); XLogInitRelationCache();
for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
{
if (RmgrTable[rmid].rm_startup != NULL)
RmgrTable[rmid].rm_startup();
}
/* Is REDO required ? */ /* Is REDO required ? */
if (XLByteLT(checkPoint.redo, RecPtr)) if (XLByteLT(checkPoint.redo, RecPtr))
record = ReadRecord(&(checkPoint.redo), PANIC, buffer); record = ReadRecord(&(checkPoint.redo), PANIC, buffer);
...@@ -2737,7 +2751,25 @@ StartupXLOG(void) ...@@ -2737,7 +2751,25 @@ StartupXLOG(void)
if (InRecovery) if (InRecovery)
{ {
int rmid;
/*
* Allow resource managers to do any required cleanup.
*/
for (rmid = 0; rmid <= RM_MAX_ID; rmid++)
{
if (RmgrTable[rmid].rm_cleanup != NULL)
RmgrTable[rmid].rm_cleanup();
}
/* suppress in-transaction check in CreateCheckPoint */
MyLastRecPtr.xrecoff = 0;
MyXactMadeXLogEntry = false;
MyXactMadeTempRelUpdate = false;
/* /*
* Perform a new checkpoint to update our recovery activity to disk.
*
* In case we had to use the secondary checkpoint, make sure that * In case we had to use the secondary checkpoint, make sure that
* it will still be shown as the secondary checkpoint after this * it will still be shown as the secondary checkpoint after this
* CreateCheckPoint operation; we don't want the broken primary * CreateCheckPoint operation; we don't want the broken primary
...@@ -2745,6 +2777,10 @@ StartupXLOG(void) ...@@ -2745,6 +2777,10 @@ StartupXLOG(void)
*/ */
ControlFile->checkPoint = checkPointLoc; ControlFile->checkPoint = checkPointLoc;
CreateCheckPoint(true, true); CreateCheckPoint(true, true);
/*
* Close down recovery environment
*/
XLogCloseRelationCache(); XLogCloseRelationCache();
} }
......
This diff is collapsed.
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: xlog.h,v 1.40 2002/11/15 02:44:57 momjian Exp $ * $Id: xlog.h,v 1.41 2003/02/21 00:06:22 tgl Exp $
*/ */
#ifndef XLOG_H #ifndef XLOG_H
#define XLOG_H #define XLOG_H
...@@ -145,10 +145,12 @@ typedef XLogPageHeaderData *XLogPageHeader; ...@@ -145,10 +145,12 @@ typedef XLogPageHeaderData *XLogPageHeader;
*/ */
typedef struct RmgrData typedef struct RmgrData
{ {
char *rm_name; const char *rm_name;
void (*rm_redo) (XLogRecPtr lsn, XLogRecord *rptr); void (*rm_redo) (XLogRecPtr lsn, XLogRecord *rptr);
void (*rm_undo) (XLogRecPtr lsn, XLogRecord *rptr); void (*rm_undo) (XLogRecPtr lsn, XLogRecord *rptr);
void (*rm_desc) (char *buf, uint8 xl_info, char *rec); void (*rm_desc) (char *buf, uint8 xl_info, char *rec);
void (*rm_startup) (void);
void (*rm_cleanup) (void);
} RmgrData; } RmgrData;
extern RmgrData RmgrTable[]; extern RmgrData RmgrTable[];
......
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
* Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $Id: catversion.h,v 1.177 2003/02/16 02:30:39 tgl Exp $ * $Id: catversion.h,v 1.178 2003/02/21 00:06:22 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -53,6 +53,6 @@ ...@@ -53,6 +53,6 @@
*/ */
/* yyyymmddN */ /* yyyymmddN */
#define CATALOG_VERSION_NO 200302151 #define CATALOG_VERSION_NO 200302171
#endif #endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment