Commit e33f205a authored by Tom Lane's avatar Tom Lane

Adjust btree index build procedure so that the btree metapage looks

invalid (has the wrong magic number) until the build is entirely
complete.  This turns out to cost no additional writes in the normal
case, since we were rewriting the metapage at the end of the process
anyway.  In normal scenarios there's no real gain in security, because
a failed index build would roll back the transaction leaving an unused
index file, but for rebuilding shared system indexes this seems to add
some useful protection.
parent 6b4caf53
<!--
$Header: /cvsroot/pgsql/doc/src/sgml/ref/reindex.sgml,v 1.21 2003/09/24 18:54:01 tgl Exp $
$Header: /cvsroot/pgsql/doc/src/sgml/ref/reindex.sgml,v 1.22 2003/09/29 23:40:26 tgl Exp $
PostgreSQL documentation
-->
......@@ -180,9 +180,10 @@ REINDEX { DATABASE | TABLE | INDEX } <replaceable class="PARAMETER">name</replac
is crash-safe and transaction-safe. <command>REINDEX</> is not
crash-safe for shared indexes, which is why this case is disallowed
during normal operation. If a failure occurs while reindexing one
of these catalogs in standalone mode, it is important that the failure
be rectified and the <command>REINDEX</> operation redone
before attempting to restart the regular server.
of these catalogs in standalone mode, it will not be possible to
restart the regular server until the problem is rectified. (The
typical symptom of a partially rebuilt shared index is <quote>index is not
a btree</> errors.)
</para>
<para>
......
......@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.71 2003/09/25 06:57:57 petere Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.72 2003/09/29 23:40:26 tgl Exp $
*
* NOTES
* Postgres btree pages look like ordinary relation pages. The opaque
......@@ -31,12 +31,16 @@
/*
* _bt_metapinit() -- Initialize the metadata page of a new btree.
*
* If markvalid is true, the index is immediately marked valid, else it
* will be invalid until _bt_metaproot() is called.
*
* Note: there's no real need for any locking here. Since the transaction
* creating the index hasn't committed yet, no one else can even see the index
* much less be trying to use it.
* much less be trying to use it. (In a REINDEX-in-place scenario, that's
* not true, but we assume the caller holds sufficient locks on the index.)
*/
void
_bt_metapinit(Relation rel)
_bt_metapinit(Relation rel, bool markvalid)
{
Buffer buf;
Page pg;
......@@ -57,7 +61,7 @@ _bt_metapinit(Relation rel)
_bt_pageinit(pg, BufferGetPageSize(buf));
metad = BTPageGetMeta(pg);
metad->btm_magic = BTREE_MAGIC;
metad->btm_magic = markvalid ? BTREE_MAGIC : 0;
metad->btm_version = BTREE_VERSION;
metad->btm_root = P_NONE;
metad->btm_level = 0;
......@@ -85,7 +89,9 @@ _bt_metapinit(Relation rel)
rdata[0].len = SizeOfBtreeNewmeta;
rdata[0].next = NULL;
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWMETA, rdata);
recptr = XLogInsert(RM_BTREE_ID,
markvalid ? XLOG_BTREE_NEWMETA : XLOG_BTREE_INVALIDMETA,
rdata);
PageSetLSN(pg, recptr);
PageSetSUI(pg, ThisStartUpID);
......@@ -611,6 +617,8 @@ _bt_metaproot(Relation rel, BlockNumber rootbknum, uint32 level)
START_CRIT_SECTION();
metad = BTPageGetMeta(metap);
Assert(metad->btm_magic == BTREE_MAGIC || metad->btm_magic == 0);
metad->btm_magic = BTREE_MAGIC; /* it's valid now for sure */
metad->btm_root = rootbknum;
metad->btm_level = level;
metad->btm_fastroot = rootbknum;
......
......@@ -12,7 +12,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.105 2003/08/04 02:39:57 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.106 2003/09/29 23:40:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -112,7 +112,8 @@ btbuild(PG_FUNCTION_ARGS)
RelationGetRelationName(index));
/* initialize the btree index metadata page */
_bt_metapinit(index);
/* mark it valid right away only if using slow build */
_bt_metapinit(index, !buildstate.usefast);
if (buildstate.usefast)
{
......
......@@ -36,7 +36,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsort.c,v 1.76 2003/09/25 06:57:57 petere Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsort.c,v 1.77 2003/09/29 23:40:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -514,6 +514,8 @@ static void
_bt_uppershutdown(Relation index, BTPageState *state)
{
BTPageState *s;
BlockNumber rootblkno = P_NONE;
uint32 rootlevel = 0;
/*
* Each iteration of this loop completes one more level of the tree.
......@@ -537,7 +539,8 @@ _bt_uppershutdown(Relation index, BTPageState *state)
if (s->btps_next == (BTPageState *) NULL)
{
opaque->btpo_flags |= BTP_ROOT;
_bt_metaproot(index, blkno, s->btps_level);
rootblkno = blkno;
rootlevel = s->btps_level;
}
else
{
......@@ -556,6 +559,14 @@ _bt_uppershutdown(Relation index, BTPageState *state)
_bt_slideleft(index, s->btps_buf, s->btps_page);
_bt_blwritepage(index, s->btps_buf);
}
/*
* As the last step in the process, update the metapage to point to
* the new root (unless we had no data at all, in which case it's
* left pointing to "P_NONE"). This changes the index to the "valid"
* state by updating its magic number.
*/
_bt_metaproot(index, rootblkno, rootlevel);
}
/*
......@@ -672,7 +683,6 @@ _bt_load(Relation index, BTSpool *btspool, BTSpool *btspool2)
}
}
/* Close down final pages, if we had any data at all */
if (state != NULL)
/* Close down final pages and rewrite the metapage */
_bt_uppershutdown(index, state);
}
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.6 2003/08/08 21:41:27 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.7 2003/09/29 23:40:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -109,7 +109,8 @@ _bt_restore_page(Page page, char *from, int len)
static void
_bt_restore_meta(Relation reln, XLogRecPtr lsn,
BlockNumber root, uint32 level,
BlockNumber fastroot, uint32 fastlevel)
BlockNumber fastroot, uint32 fastlevel,
bool markvalid)
{
Buffer metabuf;
Page metapg;
......@@ -124,7 +125,7 @@ _bt_restore_meta(Relation reln, XLogRecPtr lsn,
_bt_pageinit(metapg, BufferGetPageSize(metabuf));
md = BTPageGetMeta(metapg);
md->btm_magic = BTREE_MAGIC;
md->btm_magic = markvalid ? BTREE_MAGIC : 0;
md->btm_version = BTREE_VERSION;
md->btm_root = root;
md->btm_level = level;
......@@ -213,7 +214,8 @@ btree_xlog_insert(bool redo, bool isleaf, bool ismeta,
if (ismeta)
_bt_restore_meta(reln, lsn,
md.root, md.level,
md.fastroot, md.fastlevel);
md.fastroot, md.fastlevel,
true);
}
/* Forget any split this insertion completes */
......@@ -562,7 +564,8 @@ btree_xlog_delete_page(bool redo, bool ismeta,
sizeof(xl_btree_metadata));
_bt_restore_meta(reln, lsn,
md.root, md.level,
md.fastroot, md.fastlevel);
md.fastroot, md.fastlevel,
true);
}
}
}
......@@ -607,7 +610,8 @@ btree_xlog_newroot(bool redo, XLogRecPtr lsn, XLogRecord *record)
_bt_restore_meta(reln, lsn,
xlrec->rootblk, xlrec->level,
xlrec->rootblk, xlrec->level);
xlrec->rootblk, xlrec->level,
true);
/* Check to see if this satisfies any incomplete insertions */
if (record->xl_len > SizeOfBtreeNewroot &&
......@@ -621,7 +625,8 @@ btree_xlog_newroot(bool redo, XLogRecPtr lsn, XLogRecord *record)
}
static void
btree_xlog_newmeta(bool redo, XLogRecPtr lsn, XLogRecord *record)
btree_xlog_newmeta(bool redo, XLogRecPtr lsn, XLogRecord *record,
bool markvalid)
{
xl_btree_newmeta *xlrec = (xl_btree_newmeta *) XLogRecGetData(record);
Relation reln;
......@@ -635,7 +640,8 @@ btree_xlog_newmeta(bool redo, XLogRecPtr lsn, XLogRecord *record)
_bt_restore_meta(reln, lsn,
xlrec->meta.root, xlrec->meta.level,
xlrec->meta.fastroot, xlrec->meta.fastlevel);
xlrec->meta.fastroot, xlrec->meta.fastlevel,
markvalid);
}
static void
......@@ -707,11 +713,14 @@ btree_redo(XLogRecPtr lsn, XLogRecord *record)
btree_xlog_newroot(true, lsn, record);
break;
case XLOG_BTREE_NEWMETA:
btree_xlog_newmeta(true, lsn, record);
btree_xlog_newmeta(true, lsn, record, true);
break;
case XLOG_BTREE_NEWPAGE:
btree_xlog_newpage(true, lsn, record);
break;
case XLOG_BTREE_INVALIDMETA:
btree_xlog_newmeta(true, lsn, record, false);
break;
default:
elog(PANIC, "btree_redo: unknown op code %u", info);
}
......@@ -758,11 +767,14 @@ btree_undo(XLogRecPtr lsn, XLogRecord *record)
btree_xlog_newroot(false, lsn, record);
break;
case XLOG_BTREE_NEWMETA:
btree_xlog_newmeta(false, lsn, record);
btree_xlog_newmeta(false, lsn, record, true);
break;
case XLOG_BTREE_NEWPAGE:
btree_xlog_newpage(false, lsn, record);
break;
case XLOG_BTREE_INVALIDMETA:
btree_xlog_newmeta(false, lsn, record, false);
break;
default:
elog(PANIC, "btree_undo: unknown op code %u", info);
}
......@@ -895,6 +907,16 @@ btree_desc(char *buf, uint8 xl_info, char *rec)
xlrec->blkno);
break;
}
case XLOG_BTREE_INVALIDMETA:
{
xl_btree_newmeta *xlrec = (xl_btree_newmeta *) rec;
sprintf(buf + strlen(buf), "invalidmeta: node %u/%u; root %u lev %u fast %u lev %u",
xlrec->node.tblNode, xlrec->node.relNode,
xlrec->meta.root, xlrec->meta.level,
xlrec->meta.fastroot, xlrec->meta.fastlevel);
break;
}
default:
strcat(buf, "UNKNOWN");
break;
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: nbtree.h,v 1.70 2003/08/08 21:42:32 momjian Exp $
* $Id: nbtree.h,v 1.71 2003/09/29 23:40:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -198,6 +198,7 @@ typedef BTItemData *BTItem;
#define XLOG_BTREE_NEWROOT 0xA0 /* new root page */
#define XLOG_BTREE_NEWMETA 0xB0 /* update metadata page */
#define XLOG_BTREE_NEWPAGE 0xC0 /* new index page during build */
#define XLOG_BTREE_INVALIDMETA 0xD0 /* new metadata, temp. invalid */
/*
* All that we need to find changed index tuple
......@@ -448,7 +449,7 @@ extern void _bt_insert_parent(Relation rel, Buffer buf, Buffer rbuf,
/*
* prototypes for functions in nbtpage.c
*/
extern void _bt_metapinit(Relation rel);
extern void _bt_metapinit(Relation rel, bool markvalid);
extern Buffer _bt_getroot(Relation rel, int access);
extern Buffer _bt_gettrueroot(Relation rel);
extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment