Commit 3f0e808c authored by Heikki Linnakangas's avatar Heikki Linnakangas

Introduce the concept of relation forks. An smgr relation can now consist

of multiple forks, and each fork can be created and grown separately.

The bulk of this patch is about changing the smgr API to include an extra
ForkNumber argument in every smgr function. Also, smgrscheduleunlink and
smgrdounlink no longer implicitly call smgrclose, because other forks might
still exist after unlinking one. The callers of those functions have been
modified to call smgrclose instead.

This patch in itself doesn't have any user-visible effect, but provides the
infrastructure needed for upcoming patches. The additional forks envisioned
are a rewritten FSM implementation that doesn't rely on a fixed-size shared
memory block, and a visibility map to allow skipping portions of a table in
VACUUM that have no dead tuples.
parent eca13886
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.75 2008/05/12 00:00:44 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.76 2008/08/11 11:05:10 heikki Exp $
*
* NOTES
* Postgres hash pages look like ordinary relation pages. The opaque
......@@ -158,7 +158,7 @@ _hash_getinitbuf(Relation rel, BlockNumber blkno)
if (blkno == P_NEW)
elog(ERROR, "hash AM does not use P_NEW");
buf = ReadOrZeroBuffer(rel, blkno);
buf = ReadOrZeroBuffer(rel, MAIN_FORKNUM, blkno);
LockBuffer(buf, HASH_WRITE);
......@@ -203,7 +203,7 @@ _hash_getnewbuf(Relation rel, BlockNumber blkno)
BufferGetBlockNumber(buf), blkno);
}
else
buf = ReadOrZeroBuffer(rel, blkno);
buf = ReadOrZeroBuffer(rel, MAIN_FORKNUM, blkno);
LockBuffer(buf, HASH_WRITE);
......@@ -737,7 +737,7 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
MemSet(zerobuf, 0, sizeof(zerobuf));
RelationOpenSmgr(rel);
smgrextend(rel->rd_smgr, lastblock, zerobuf, rel->rd_istemp);
smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf, rel->rd_istemp);
return true;
}
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.261 2008/07/13 20:45:47 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.262 2008/08/11 11:05:10 heikki Exp $
*
*
* INTERFACE ROUTINES
......@@ -3906,7 +3906,8 @@ log_heap_move(Relation reln, Buffer oldbuf, ItemPointerData from,
* not do anything that assumes we are touching a heap.
*/
XLogRecPtr
log_newpage(RelFileNode *rnode, BlockNumber blkno, Page page)
log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
Page page)
{
xl_heap_newpage xlrec;
XLogRecPtr recptr;
......@@ -3916,6 +3917,7 @@ log_newpage(RelFileNode *rnode, BlockNumber blkno, Page page)
START_CRIT_SECTION();
xlrec.node = *rnode;
xlrec.forknum = forkNum;
xlrec.blkno = blkno;
rdata[0].data = (char *) &xlrec;
......@@ -4714,7 +4716,7 @@ heap_sync(Relation rel)
/* main heap */
FlushRelationBuffers(rel);
/* FlushRelationBuffers will have opened rd_smgr */
smgrimmedsync(rel->rd_smgr);
smgrimmedsync(rel->rd_smgr, MAIN_FORKNUM);
/* toast heap, if any */
if (OidIsValid(rel->rd_rel->reltoastrelid))
......@@ -4723,7 +4725,7 @@ heap_sync(Relation rel)
toastrel = heap_open(rel->rd_rel->reltoastrelid, AccessShareLock);
FlushRelationBuffers(toastrel);
smgrimmedsync(toastrel->rd_smgr);
smgrimmedsync(toastrel->rd_smgr, MAIN_FORKNUM);
heap_close(toastrel, AccessShareLock);
}
}
......@@ -96,7 +96,7 @@
* Portions Copyright (c) 1994-5, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.14 2008/06/19 00:46:03 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.15 2008/08/11 11:05:10 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -270,10 +270,11 @@ end_heap_rewrite(RewriteState state)
{
if (state->rs_use_wal)
log_newpage(&state->rs_new_rel->rd_node,
MAIN_FORKNUM,
state->rs_blockno,
state->rs_buffer);
RelationOpenSmgr(state->rs_new_rel);
smgrextend(state->rs_new_rel->rd_smgr, state->rs_blockno,
smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM, state->rs_blockno,
(char *) state->rs_buffer, true);
}
......@@ -606,6 +607,7 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
/* XLOG stuff */
if (state->rs_use_wal)
log_newpage(&state->rs_new_rel->rd_node,
MAIN_FORKNUM,
state->rs_blockno,
page);
......@@ -616,8 +618,8 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
* end_heap_rewrite.
*/
RelationOpenSmgr(state->rs_new_rel);
smgrextend(state->rs_new_rel->rd_smgr, state->rs_blockno,
(char *) page, true);
smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM,
state->rs_blockno, (char *) page, true);
state->rs_blockno++;
state->rs_buffer_valid = false;
......
......@@ -57,7 +57,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.116 2008/06/19 00:46:03 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.117 2008/08/11 11:05:10 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -267,7 +267,7 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
if (wstate->btws_use_wal)
{
/* We use the heap NEWPAGE record type for this */
log_newpage(&wstate->index->rd_node, blkno, page);
log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page);
}
else
{
......@@ -286,7 +286,8 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
{
if (!wstate->btws_zeropage)
wstate->btws_zeropage = (Page) palloc0(BLCKSZ);
smgrextend(wstate->index->rd_smgr, wstate->btws_pages_written++,
smgrextend(wstate->index->rd_smgr, MAIN_FORKNUM,
wstate->btws_pages_written++,
(char *) wstate->btws_zeropage,
true);
}
......@@ -299,13 +300,15 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
if (blkno == wstate->btws_pages_written)
{
/* extending the file... */
smgrextend(wstate->index->rd_smgr, blkno, (char *) page, true);
smgrextend(wstate->index->rd_smgr, MAIN_FORKNUM, blkno,
(char *) page, true);
wstate->btws_pages_written++;
}
else
{
/* overwriting a block we zero-filled before */
smgrwrite(wstate->index->rd_smgr, blkno, (char *) page, true);
smgrwrite(wstate->index->rd_smgr, MAIN_FORKNUM, blkno,
(char *) page, true);
}
pfree(page);
......@@ -809,6 +812,6 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
if (!wstate->index->rd_istemp)
{
RelationOpenSmgr(wstate->index);
smgrimmedsync(wstate->index->rd_smgr);
smgrimmedsync(wstate->index->rd_smgr, MAIN_FORKNUM);
}
}
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.44 2008/08/01 13:16:08 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.45 2008/08/11 11:05:10 heikki Exp $
*
* NOTES
* Each global transaction is associated with a global transaction
......@@ -141,12 +141,12 @@ static void RecordTransactionCommitPrepared(TransactionId xid,
int nchildren,
TransactionId *children,
int nrels,
RelFileNode *rels);
RelFileFork *rels);
static void RecordTransactionAbortPrepared(TransactionId xid,
int nchildren,
TransactionId *children,
int nrels,
RelFileNode *rels);
RelFileFork *rels);
static void ProcessRecords(char *bufptr, TransactionId xid,
const TwoPhaseCallback callbacks[]);
......@@ -694,8 +694,8 @@ TwoPhaseGetDummyProc(TransactionId xid)
*
* 1. TwoPhaseFileHeader
* 2. TransactionId[] (subtransactions)
* 3. RelFileNode[] (files to be deleted at commit)
* 4. RelFileNode[] (files to be deleted at abort)
* 3. RelFileFork[] (files to be deleted at commit)
* 4. RelFileFork[] (files to be deleted at abort)
* 5. TwoPhaseRecordOnDisk
* 6. ...
* 7. TwoPhaseRecordOnDisk (end sentinel, rmid == TWOPHASE_RM_END_ID)
......@@ -793,8 +793,8 @@ StartPrepare(GlobalTransaction gxact)
TransactionId xid = gxact->proc.xid;
TwoPhaseFileHeader hdr;
TransactionId *children;
RelFileNode *commitrels;
RelFileNode *abortrels;
RelFileFork *commitrels;
RelFileFork *abortrels;
/* Initialize linked list */
records.head = palloc0(sizeof(XLogRecData));
......@@ -832,12 +832,12 @@ StartPrepare(GlobalTransaction gxact)
}
if (hdr.ncommitrels > 0)
{
save_state_data(commitrels, hdr.ncommitrels * sizeof(RelFileNode));
save_state_data(commitrels, hdr.ncommitrels * sizeof(RelFileFork));
pfree(commitrels);
}
if (hdr.nabortrels > 0)
{
save_state_data(abortrels, hdr.nabortrels * sizeof(RelFileNode));
save_state_data(abortrels, hdr.nabortrels * sizeof(RelFileFork));
pfree(abortrels);
}
}
......@@ -1140,8 +1140,8 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
TwoPhaseFileHeader *hdr;
TransactionId latestXid;
TransactionId *children;
RelFileNode *commitrels;
RelFileNode *abortrels;
RelFileFork *commitrels;
RelFileFork *abortrels;
int i;
/*
......@@ -1169,10 +1169,10 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
children = (TransactionId *) bufptr;
bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
commitrels = (RelFileNode *) bufptr;
bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode));
abortrels = (RelFileNode *) bufptr;
bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode));
commitrels = (RelFileFork *) bufptr;
bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileFork));
abortrels = (RelFileFork *) bufptr;
bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileFork));
/* compute latestXid among all children */
latestXid = TransactionIdLatest(xid, hdr->nsubxacts, children);
......@@ -1215,12 +1215,20 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
if (isCommit)
{
for (i = 0; i < hdr->ncommitrels; i++)
smgrdounlink(smgropen(commitrels[i]), false, false);
{
SMgrRelation srel = smgropen(commitrels[i].rnode);
smgrdounlink(srel, commitrels[i].forknum, false, false);
smgrclose(srel);
}
}
else
{
for (i = 0; i < hdr->nabortrels; i++)
smgrdounlink(smgropen(abortrels[i]), false, false);
{
SMgrRelation srel = smgropen(abortrels[i].rnode);
smgrdounlink(srel, abortrels[i].forknum, false, false);
smgrclose(srel);
}
}
/* And now do the callbacks */
......@@ -1631,8 +1639,8 @@ RecoverPreparedTransactions(void)
bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
subxids = (TransactionId *) bufptr;
bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode));
bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode));
bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileFork));
bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileFork));
/*
* Reconstruct subtrans state for the transaction --- needed
......@@ -1685,7 +1693,7 @@ RecordTransactionCommitPrepared(TransactionId xid,
int nchildren,
TransactionId *children,
int nrels,
RelFileNode *rels)
RelFileFork *rels)
{
XLogRecData rdata[3];
int lastrdata = 0;
......@@ -1710,7 +1718,7 @@ RecordTransactionCommitPrepared(TransactionId xid,
{
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode);
rdata[1].len = nrels * sizeof(RelFileFork);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
}
......@@ -1760,7 +1768,7 @@ RecordTransactionAbortPrepared(TransactionId xid,
int nchildren,
TransactionId *children,
int nrels,
RelFileNode *rels)
RelFileFork *rels)
{
XLogRecData rdata[3];
int lastrdata = 0;
......@@ -1790,7 +1798,7 @@ RecordTransactionAbortPrepared(TransactionId xid,
{
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode);
rdata[1].len = nrels * sizeof(RelFileFork);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
}
......
......@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.264 2008/05/12 20:01:58 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.265 2008/08/11 11:05:10 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -819,7 +819,7 @@ RecordTransactionCommit(void)
bool markXidCommitted = TransactionIdIsValid(xid);
TransactionId latestXid = InvalidTransactionId;
int nrels;
RelFileNode *rels;
RelFileFork *rels;
bool haveNonTemp;
int nchildren;
TransactionId *children;
......@@ -900,7 +900,7 @@ RecordTransactionCommit(void)
{
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode);
rdata[1].len = nrels * sizeof(RelFileFork);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
}
......@@ -1203,7 +1203,7 @@ RecordTransactionAbort(bool isSubXact)
TransactionId xid = GetCurrentTransactionIdIfAny();
TransactionId latestXid;
int nrels;
RelFileNode *rels;
RelFileFork *rels;
int nchildren;
TransactionId *children;
XLogRecData rdata[3];
......@@ -1264,7 +1264,7 @@ RecordTransactionAbort(bool isSubXact)
{
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode);
rdata[1].len = nrels * sizeof(RelFileFork);
rdata[1].buffer = InvalidBuffer;
lastrdata = 1;
}
......@@ -4282,8 +4282,13 @@ xact_redo_commit(xl_xact_commit *xlrec, TransactionId xid)
/* Make sure files supposed to be dropped are dropped */
for (i = 0; i < xlrec->nrels; i++)
{
XLogDropRelation(xlrec->xnodes[i]);
smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
SMgrRelation srel;
XLogDropRelation(xlrec->xnodes[i].rnode, xlrec->xnodes[i].forknum);
srel = smgropen(xlrec->xnodes[i].rnode);
smgrdounlink(srel, xlrec->xnodes[i].forknum, false, true);
smgrclose(srel);
}
}
......@@ -4317,8 +4322,13 @@ xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid)
/* Make sure files supposed to be dropped are dropped */
for (i = 0; i < xlrec->nrels; i++)
{
XLogDropRelation(xlrec->xnodes[i]);
smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
SMgrRelation srel;
XLogDropRelation(xlrec->xnodes[i].rnode, xlrec->xnodes[i].forknum);
srel = smgropen(xlrec->xnodes[i].rnode);
smgrdounlink(srel, xlrec->xnodes[i].forknum, false, true);
smgrclose(srel);
}
}
......@@ -4374,10 +4384,12 @@ xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec)
appendStringInfo(buf, "; rels:");
for (i = 0; i < xlrec->nrels; i++)
{
RelFileNode rnode = xlrec->xnodes[i];
RelFileNode rnode = xlrec->xnodes[i].rnode;
ForkNumber forknum = xlrec->xnodes[i].forknum;
appendStringInfo(buf, " %u/%u/%u",
rnode.spcNode, rnode.dbNode, rnode.relNode);
appendStringInfo(buf, " %u/%u/%u/%u",
rnode.spcNode, rnode.dbNode, rnode.relNode,
forknum);
}
}
if (xlrec->nsubxacts > 0)
......@@ -4402,10 +4414,12 @@ xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec)
appendStringInfo(buf, "; rels:");
for (i = 0; i < xlrec->nrels; i++)
{
RelFileNode rnode = xlrec->xnodes[i];
RelFileNode rnode = xlrec->xnodes[i].rnode;
ForkNumber forknum = xlrec->xnodes[i].forknum;
appendStringInfo(buf, " %u/%u/%u",
rnode.spcNode, rnode.dbNode, rnode.relNode);
appendStringInfo(buf, " %u/%u/%u/%u",
rnode.spcNode, rnode.dbNode, rnode.relNode,
forknum);
}
}
if (xlrec->nsubxacts > 0)
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.316 2008/07/13 20:45:47 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.317 2008/08/11 11:05:10 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -1034,8 +1034,7 @@ XLogCheckBuffer(XLogRecData *rdata, bool doPageWrites,
/*
* The page needs to be backed up, so set up *bkpb
*/
bkpb->node = BufferGetFileNode(rdata->buffer);
bkpb->block = BufferGetBlockNumber(rdata->buffer);
BufferGetTag(rdata->buffer, &bkpb->node, &bkpb->fork, &bkpb->block);
if (rdata->buffer_std)
{
......@@ -2855,7 +2854,8 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn)
memcpy(&bkpb, blk, sizeof(BkpBlock));
blk += sizeof(BkpBlock);
buffer = XLogReadBuffer(bkpb.node, bkpb.block, true);
buffer = XLogReadBufferWithFork(bkpb.node, bkpb.fork, bkpb.block,
true);
Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer);
......
......@@ -11,7 +11,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.57 2008/07/13 20:45:47 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.58 2008/08/11 11:05:10 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -37,6 +37,7 @@
typedef struct xl_invalid_page_key
{
RelFileNode node; /* the relation */
ForkNumber forkno; /* the fork number */
BlockNumber blkno; /* the page */
} xl_invalid_page_key;
......@@ -51,7 +52,8 @@ static HTAB *invalid_page_tab = NULL;
/* Log a reference to an invalid page */
static void
log_invalid_page(RelFileNode node, BlockNumber blkno, bool present)
log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno,
bool present)
{
xl_invalid_page_key key;
xl_invalid_page *hentry;
......@@ -63,11 +65,11 @@ log_invalid_page(RelFileNode node, BlockNumber blkno, bool present)
* something about the XLOG record that generated the reference).
*/
if (present)
elog(DEBUG1, "page %u of relation %u/%u/%u is uninitialized",
blkno, node.spcNode, node.dbNode, node.relNode);
elog(DEBUG1, "page %u of relation %u/%u/%u/%u is uninitialized",
blkno, node.spcNode, node.dbNode, node.relNode, forkno);
else
elog(DEBUG1, "page %u of relation %u/%u/%u does not exist",
blkno, node.spcNode, node.dbNode, node.relNode);
elog(DEBUG1, "page %u of relation %u/%u/%u/%u does not exist",
blkno, node.spcNode, node.dbNode, node.relNode, forkno);
if (invalid_page_tab == NULL)
{
......@@ -87,6 +89,7 @@ log_invalid_page(RelFileNode node, BlockNumber blkno, bool present)
/* we currently assume xl_invalid_page_key contains no padding */
key.node = node;
key.forkno = forkno;
key.blkno = blkno;
hentry = (xl_invalid_page *)
hash_search(invalid_page_tab, (void *) &key, HASH_ENTER, &found);
......@@ -104,7 +107,7 @@ log_invalid_page(RelFileNode node, BlockNumber blkno, bool present)
/* Forget any invalid pages >= minblkno, because they've been dropped */
static void
forget_invalid_pages(RelFileNode node, BlockNumber minblkno)
forget_invalid_pages(RelFileNode node, ForkNumber forkno, BlockNumber minblkno)
{
HASH_SEQ_STATUS status;
xl_invalid_page *hentry;
......@@ -117,11 +120,12 @@ forget_invalid_pages(RelFileNode node, BlockNumber minblkno)
while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
{
if (RelFileNodeEquals(hentry->key.node, node) &&
hentry->key.forkno == forkno &&
hentry->key.blkno >= minblkno)
{
elog(DEBUG2, "page %u of relation %u/%u/%u has been dropped",
elog(DEBUG2, "page %u of relation %u/%u/%u/%u has been dropped",
hentry->key.blkno, hentry->key.node.spcNode,
hentry->key.node.dbNode, hentry->key.node.relNode);
hentry->key.node.dbNode, hentry->key.node.relNode, forkno);
if (hash_search(invalid_page_tab,
(void *) &hentry->key,
......@@ -223,6 +227,18 @@ XLogCheckInvalidPages(void)
*/
Buffer
XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
{
return XLogReadBufferWithFork(rnode, MAIN_FORKNUM, blkno, init);
}
/*
* XLogReadBufferWithFork
* Like XLogReadBuffer, but for reading other relation forks than
* the main one.
*/
Buffer
XLogReadBufferWithFork(RelFileNode rnode, ForkNumber forknum,
BlockNumber blkno, bool init)
{
BlockNumber lastblock;
Buffer buffer;
......@@ -241,21 +257,21 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
* filesystem loses an inode during a crash. Better to write the data
* until we are actually told to delete the file.)
*/
smgrcreate(smgr, false, true);
smgrcreate(smgr, forknum, false, true);
lastblock = smgrnblocks(smgr);
lastblock = smgrnblocks(smgr, forknum);
if (blkno < lastblock)
{
/* page exists in file */
buffer = ReadBufferWithoutRelcache(rnode, false, blkno, init);
buffer = ReadBufferWithoutRelcache(rnode, false, forknum, blkno, init);
}
else
{
/* hm, page doesn't exist in file */
if (!init)
{
log_invalid_page(rnode, blkno, false);
log_invalid_page(rnode, forknum, blkno, false);
return InvalidBuffer;
}
/* OK to extend the file */
......@@ -266,7 +282,8 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
{
if (buffer != InvalidBuffer)
ReleaseBuffer(buffer);
buffer = ReadBufferWithoutRelcache(rnode, false, P_NEW, false);
buffer = ReadBufferWithoutRelcache(rnode, false, forknum,
P_NEW, false);
lastblock++;
}
Assert(BufferGetBlockNumber(buffer) == blkno);
......@@ -282,7 +299,7 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
if (PageIsNew(page))
{
UnlockReleaseBuffer(buffer);
log_invalid_page(rnode, blkno, true);
log_invalid_page(rnode, forknum, blkno, true);
return InvalidBuffer;
}
}
......@@ -363,12 +380,9 @@ FreeFakeRelcacheEntry(Relation fakerel)
* any open "invalid-page" records for the relation.
*/
void
XLogDropRelation(RelFileNode rnode)
XLogDropRelation(RelFileNode rnode, ForkNumber forknum)
{
/* Tell smgr to forget about this relation as well */
smgrclosenode(rnode);
forget_invalid_pages(rnode, 0);
forget_invalid_pages(rnode, forknum, 0);
}
/*
......@@ -397,7 +411,8 @@ XLogDropDatabase(Oid dbid)
* We need to clean up any open "invalid-page" records for the dropped pages.
*/
void
XLogTruncateRelation(RelFileNode rnode, BlockNumber nblocks)
XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
BlockNumber nblocks)
{
forget_invalid_pages(rnode, nblocks);
forget_invalid_pages(rnode, forkNum, nblocks);
}
......@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/catalog/catalog.c,v 1.77 2008/06/19 00:46:04 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/catalog/catalog.c,v 1.78 2008/08/11 11:05:10 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -42,7 +42,8 @@
#include "utils/tqual.h"
#define OIDCHARS 10 /* max chars printed by %u */
#define OIDCHARS 10 /* max chars printed by %u */
#define FORKNUMCHARS 1 /* max chars for a fork number */
/*
......@@ -51,7 +52,7 @@
* Result is a palloc'd string.
*/
char *
relpath(RelFileNode rnode)
relpath(RelFileNode rnode, ForkNumber forknum)
{
int pathlen;
char *path;
......@@ -60,26 +61,38 @@ relpath(RelFileNode rnode)
{
/* Shared system relations live in {datadir}/global */
Assert(rnode.dbNode == 0);
pathlen = 7 + OIDCHARS + 1;
pathlen = 7 + OIDCHARS + 1 + FORKNUMCHARS + 1;
path = (char *) palloc(pathlen);
snprintf(path, pathlen, "global/%u",
rnode.relNode);
if (forknum != MAIN_FORKNUM)
snprintf(path, pathlen, "global/%u_%u",
rnode.relNode, forknum);
else
snprintf(path, pathlen, "global/%u", rnode.relNode);
}
else if (rnode.spcNode == DEFAULTTABLESPACE_OID)
{
/* The default tablespace is {datadir}/base */
pathlen = 5 + OIDCHARS + 1 + OIDCHARS + 1;
pathlen = 5 + OIDCHARS + 1 + OIDCHARS + 1 + FORKNUMCHARS + 1;
path = (char *) palloc(pathlen);
snprintf(path, pathlen, "base/%u/%u",
rnode.dbNode, rnode.relNode);
if (forknum != MAIN_FORKNUM)
snprintf(path, pathlen, "base/%u/%u_%u",
rnode.dbNode, rnode.relNode, forknum);
else
snprintf(path, pathlen, "base/%u/%u",
rnode.dbNode, rnode.relNode);
}
else
{
/* All other tablespaces are accessed via symlinks */
pathlen = 10 + OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1;
pathlen = 10 + OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1
+ FORKNUMCHARS + 1;
path = (char *) palloc(pathlen);
snprintf(path, pathlen, "pg_tblspc/%u/%u/%u",
rnode.spcNode, rnode.dbNode, rnode.relNode);
if (forknum != MAIN_FORKNUM)
snprintf(path, pathlen, "pg_tblspc/%u/%u/%u_%u",
rnode.spcNode, rnode.dbNode, rnode.relNode, forknum);
else
snprintf(path, pathlen, "pg_tblspc/%u/%u/%u",
rnode.spcNode, rnode.dbNode, rnode.relNode);
}
return path;
}
......@@ -431,7 +444,7 @@ GetNewRelFileNode(Oid reltablespace, bool relisshared, Relation pg_class)
rnode.relNode = GetNewObjectId();
/* Check for existing file of same name */
rpath = relpath(rnode);
rpath = relpath(rnode, MAIN_FORKNUM);
fd = BasicOpenFile(rpath, O_RDONLY | PG_BINARY, 0);
if (fd >= 0)
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.336 2008/07/30 19:35:13 tgl Exp $
* $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.337 2008/08/11 11:05:10 heikki Exp $
*
*
* INTERFACE ROUTINES
......@@ -292,13 +292,16 @@ heap_create(const char *relname,
shared_relation);
/*
* have the storage manager create the relation's disk file, if needed.
* Have the storage manager create the relation's disk file, if needed.
*
* We only create storage for the main fork here. The caller is
* responsible for creating any additional forks if needed.
*/
if (create_storage)
{
Assert(rel->rd_smgr == NULL);
RelationOpenSmgr(rel);
smgrcreate(rel->rd_smgr, rel->rd_istemp, false);
smgrcreate(rel->rd_smgr, MAIN_FORKNUM, rel->rd_istemp, false);
}
return rel;
......@@ -1385,13 +1388,18 @@ heap_drop_with_catalog(Oid relid)
rel = relation_open(relid, AccessExclusiveLock);
/*
* Schedule unlinking of the relation's physical file at commit.
* Schedule unlinking of the relation's physical files at commit.
*/
if (rel->rd_rel->relkind != RELKIND_VIEW &&
rel->rd_rel->relkind != RELKIND_COMPOSITE_TYPE)
{
ForkNumber forknum;
RelationOpenSmgr(rel);
smgrscheduleunlink(rel->rd_smgr, rel->rd_istemp);
for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
if (smgrexists(rel->rd_smgr, forknum))
smgrscheduleunlink(rel->rd_smgr, forknum, rel->rd_istemp);
RelationCloseSmgr(rel);
}
/*
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.301 2008/08/10 19:02:33 tgl Exp $
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.302 2008/08/11 11:05:10 heikki Exp $
*
*
* INTERFACE ROUTINES
......@@ -874,6 +874,7 @@ index_drop(Oid indexId)
Relation indexRelation;
HeapTuple tuple;
bool hasexprs;
ForkNumber forknum;
/*
* To drop an index safely, we must grab exclusive lock on its parent
......@@ -892,11 +893,14 @@ index_drop(Oid indexId)
userIndexRelation = index_open(indexId, AccessExclusiveLock);
/*
* Schedule physical removal of the file
* Schedule physical removal of the files
*/
RelationOpenSmgr(userIndexRelation);
smgrscheduleunlink(userIndexRelation->rd_smgr,
userIndexRelation->rd_istemp);
for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
if (smgrexists(userIndexRelation->rd_smgr, forknum))
smgrscheduleunlink(userIndexRelation->rd_smgr, forknum,
userIndexRelation->rd_istemp);
RelationCloseSmgr(userIndexRelation);
/*
* Close and flush the index's relcache entry, to ensure relcache doesn't
......@@ -1260,6 +1264,7 @@ setNewRelfilenode(Relation relation, TransactionId freezeXid)
Relation pg_class;
HeapTuple tuple;
Form_pg_class rd_rel;
ForkNumber i;
/* Can't change relfilenode for nailed tables (indexes ok though) */
Assert(!relation->rd_isnailed ||
......@@ -1290,18 +1295,29 @@ setNewRelfilenode(Relation relation, TransactionId freezeXid)
RelationGetRelid(relation));
rd_rel = (Form_pg_class) GETSTRUCT(tuple);
/* create another storage file. Is it a little ugly ? */
/* NOTE: any conflict in relfilenode value will be caught here */
RelationOpenSmgr(relation);
/*
* ... and create storage for corresponding forks in the new relfilenode.
*
* NOTE: any conflict in relfilenode value will be caught here
*/
newrnode = relation->rd_node;
newrnode.relNode = newrelfilenode;
srel = smgropen(newrnode);
smgrcreate(srel, relation->rd_istemp, false);
smgrclose(srel);
/* schedule unlinking old relfilenode */
RelationOpenSmgr(relation);
smgrscheduleunlink(relation->rd_smgr, relation->rd_istemp);
/* Create the main fork, like heap_create() does */
smgrcreate(srel, MAIN_FORKNUM, relation->rd_istemp, false);
/* schedule unlinking old files */
for (i = 0; i <= MAX_FORKNUM; i++)
{
if (smgrexists(relation->rd_smgr, i))
smgrscheduleunlink(relation->rd_smgr, i, relation->rd_istemp);
}
smgrclose(srel);
RelationCloseSmgr(relation);
/* update the pg_class row */
rd_rel->relfilenode = newrelfilenode;
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.261 2008/07/16 19:33:25 tgl Exp $
* $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.262 2008/08/11 11:05:11 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -318,7 +318,8 @@ static void ATExecEnableDisableRule(Relation rel, char *rulename,
char fires_when);
static void ATExecAddInherit(Relation rel, RangeVar *parent);
static void ATExecDropInherit(Relation rel, RangeVar *parent);
static void copy_relation_data(Relation rel, SMgrRelation dst);
static void copy_relation_data(SMgrRelation rel, SMgrRelation dst,
ForkNumber forkNum, bool istemp);
/* ----------------------------------------------------------------
......@@ -6483,6 +6484,7 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace)
Relation pg_class;
HeapTuple tuple;
Form_pg_class rd_rel;
ForkNumber forkNum;
/*
* Need lock here in case we are recursing to toast table or index
......@@ -6538,26 +6540,42 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace)
elog(ERROR, "cache lookup failed for relation %u", tableOid);
rd_rel = (Form_pg_class) GETSTRUCT(tuple);
/* create another storage file. Is it a little ugly ? */
/* NOTE: any conflict in relfilenode value will be caught here */
/*
* Since we copy the file directly without looking at the shared buffers,
* we'd better first flush out any pages of the source relation that are
* in shared buffers. We assume no new changes will be made while we are
* holding exclusive lock on the rel.
*/
FlushRelationBuffers(rel);
/* Open old and new relation */
newrnode = rel->rd_node;
newrnode.spcNode = newTableSpace;
dstrel = smgropen(newrnode);
smgrcreate(dstrel, rel->rd_istemp, false);
/* copy relation data to the new physical file */
copy_relation_data(rel, dstrel);
/* schedule unlinking old physical file */
RelationOpenSmgr(rel);
smgrscheduleunlink(rel->rd_smgr, rel->rd_istemp);
/*
* Now drop smgr references. The source was already dropped by
* smgrscheduleunlink.
* Create and copy all forks of the relation, and schedule unlinking
* of old physical files.
*
* NOTE: any conflict in relfilenode value will be caught in
* smgrcreate() below.
*/
for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
{
if (smgrexists(rel->rd_smgr, forkNum))
{
smgrcreate(dstrel, forkNum, rel->rd_istemp, false);
copy_relation_data(rel->rd_smgr, dstrel, forkNum, rel->rd_istemp);
smgrscheduleunlink(rel->rd_smgr, forkNum, rel->rd_istemp);
}
}
/* Close old and new relation */
smgrclose(dstrel);
RelationCloseSmgr(rel);
/* update the pg_class row */
rd_rel->reltablespace = (newTableSpace == MyDatabaseTableSpace) ? InvalidOid : newTableSpace;
......@@ -6584,47 +6602,37 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace)
* Copy data, block by block
*/
static void
copy_relation_data(Relation rel, SMgrRelation dst)
copy_relation_data(SMgrRelation src, SMgrRelation dst,
ForkNumber forkNum, bool istemp)
{
SMgrRelation src;
bool use_wal;
BlockNumber nblocks;
BlockNumber blkno;
char buf[BLCKSZ];
Page page = (Page) buf;
/*
* Since we copy the file directly without looking at the shared buffers,
* we'd better first flush out any pages of the source relation that are
* in shared buffers. We assume no new changes will be made while we are
* holding exclusive lock on the rel.
*/
FlushRelationBuffers(rel);
/*
* We need to log the copied data in WAL iff WAL archiving is enabled AND
* it's not a temp rel.
*/
use_wal = XLogArchivingActive() && !rel->rd_istemp;
use_wal = XLogArchivingActive() && !istemp;
nblocks = RelationGetNumberOfBlocks(rel);
/* RelationGetNumberOfBlocks will certainly have opened rd_smgr */
src = rel->rd_smgr;
nblocks = smgrnblocks(src, forkNum);
for (blkno = 0; blkno < nblocks; blkno++)
{
smgrread(src, blkno, buf);
smgrread(src, forkNum, blkno, buf);
/* XLOG stuff */
if (use_wal)
log_newpage(&dst->smgr_rnode, blkno, page);
log_newpage(&dst->smgr_rnode, forkNum, blkno, page);
/*
* Now write the page. We say isTemp = true even if it's not a temp
* rel, because there's no need for smgr to schedule an fsync for this
* write; we'll do it ourselves below.
*/
smgrextend(dst, blkno, buf, true);
smgrextend(dst, forkNum, blkno, buf, true);
}
/*
......@@ -6641,8 +6649,8 @@ copy_relation_data(Relation rel, SMgrRelation dst)
* wouldn't replay our earlier WAL entries. If we do not fsync those pages
* here, they might still not be on disk when the crash occurs.
*/
if (!rel->rd_istemp)
smgrimmedsync(dst);
if (!istemp)
smgrimmedsync(dst, forkNum);
}
/*
......
......@@ -37,7 +37,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.50 2008/05/12 00:00:50 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.51 2008/08/11 11:05:11 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -113,6 +113,7 @@
typedef struct
{
RelFileNode rnode;
ForkNumber forknum;
BlockNumber segno; /* see md.c for special values */
/* might add a real request-type field later; not needed yet */
} BgWriterRequest;
......@@ -990,7 +991,7 @@ RequestCheckpoint(int flags)
* than we have to here.
*/
bool
ForwardFsyncRequest(RelFileNode rnode, BlockNumber segno)
ForwardFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
{
BgWriterRequest *request;
......@@ -1067,7 +1068,7 @@ AbsorbFsyncRequests(void)
LWLockRelease(BgWriterCommLock);
for (request = requests; n > 0; request++, n--)
RememberFsyncRequest(request->rnode, request->segno);
RememberFsyncRequest(request->rnode, request->forknum, request->segno);
if (requests)
pfree(requests);
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/rewrite/rewriteDefine.c,v 1.127 2008/06/19 00:46:05 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/rewrite/rewriteDefine.c,v 1.128 2008/08/11 11:05:11 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -482,8 +482,14 @@ DefineQueryRewrite(char *rulename,
*/
if (RelisBecomingView)
{
ForkNumber forknum;
RelationOpenSmgr(event_relation);
smgrscheduleunlink(event_relation->rd_smgr, event_relation->rd_istemp);
for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
if (smgrexists(event_relation->rd_smgr, forknum))
smgrscheduleunlink(event_relation->rd_smgr, forknum,
event_relation->rd_istemp);
RelationCloseSmgr(event_relation);
}
/* Close rel, but keep lock till commit... */
......
This diff is collapsed.
......@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.80 2008/06/12 09:12:31 heikki Exp $
* $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.81 2008/08/11 11:05:11 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -61,7 +61,8 @@ static Block GetLocalBufferStorage(void);
* (hence, usage_count is always advanced).
*/
BufferDesc *
LocalBufferAlloc(SMgrRelation smgr, BlockNumber blockNum, bool *foundPtr)
LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
bool *foundPtr)
{
BufferTag newTag; /* identity of requested block */
LocalBufferLookupEnt *hresult;
......@@ -70,7 +71,7 @@ LocalBufferAlloc(SMgrRelation smgr, BlockNumber blockNum, bool *foundPtr)
int trycounter;
bool found;
INIT_BUFFERTAG(newTag, smgr->smgr_rnode, blockNum);
INIT_BUFFERTAG(newTag, smgr->smgr_rnode, forkNum, blockNum);
/* Initialize local buffers if first request in this session */
if (LocalBufHash == NULL)
......@@ -162,6 +163,7 @@ LocalBufferAlloc(SMgrRelation smgr, BlockNumber blockNum, bool *foundPtr)
/* And write... */
smgrwrite(oreln,
bufHdr->tag.forkNum,
bufHdr->tag.blockNum,
(char *) LocalBufHdrGetBlock(bufHdr),
true);
......@@ -250,7 +252,8 @@ MarkLocalBufferDirty(Buffer buffer)
* See DropRelFileNodeBuffers in bufmgr.c for more notes.
*/
void
DropRelFileNodeLocalBuffers(RelFileNode rnode, BlockNumber firstDelBlock)
DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
BlockNumber firstDelBlock)
{
int i;
......@@ -261,6 +264,7 @@ DropRelFileNodeLocalBuffers(RelFileNode rnode, BlockNumber firstDelBlock)
if ((bufHdr->flags & BM_TAG_VALID) &&
RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
bufHdr->tag.forkNum == forkNum &&
bufHdr->tag.blockNum >= firstDelBlock)
{
if (LocalRefCount[i] != 0)
......
$PostgreSQL: pgsql/src/backend/storage/smgr/README,v 1.5 2008/03/21 13:23:28 momjian Exp $
$PostgreSQL: pgsql/src/backend/storage/smgr/README,v 1.6 2008/08/11 11:05:11 heikki Exp $
Storage Manager
===============
......@@ -32,3 +32,20 @@ The files in this directory, and their contents, are
md.c The magnetic disk storage manager.
Note that md.c in turn relies on src/backend/storage/file/fd.c.
Relation Forks
==============
Since 8.4, a single smgr relation can be comprised of multiple physical
files, called relation forks. This allows storing additional metadata like
Free Space information in additional forks, which can be grown and truncated
independently of the main data file, while still treating it all as a single
physical relation in system catalogs.
It is assumed that the main fork, fork number 0 or MAIN_FORKNUM, always
exists. Fork numbers are assigned in src/include/storage/relfilenode.h.
Functions in smgr.c and md.c take an extra fork number argument, in addition
to relfilenode and block number, to identify which relation fork you want to
access. Since most code wants to access the main fork, a shortcut version of
ReadBuffer that accesses MAIN_FORKNUM is provided in the buffer manager for
convenience.
This diff is collapsed.
This diff is collapsed.
......@@ -5,7 +5,7 @@
* Copyright (c) 2002-2008, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/dbsize.c,v 1.19 2008/06/19 00:46:05 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/dbsize.c,v 1.20 2008/08/11 11:05:11 heikki Exp $
*
*/
......@@ -255,7 +255,8 @@ calculate_relation_size(RelFileNode *rfn)
char pathname[MAXPGPATH];
unsigned int segcount = 0;
relationpath = relpath(*rfn);
/* XXX: This ignores the other forks. */
relationpath = relpath(*rfn, MAIN_FORKNUM);
for (segcount = 0;; segcount++)
{
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.137 2008/06/19 00:46:06 alvherre Exp $
* $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.138 2008/08/11 11:05:11 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -127,7 +127,8 @@ extern XLogRecPtr log_heap_clean(Relation reln, Buffer buffer,
extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer,
TransactionId cutoff_xid,
OffsetNumber *offsets, int offcnt);
extern XLogRecPtr log_newpage(RelFileNode *rnode, BlockNumber blk, Page page);
extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
BlockNumber blk, Page page);
/* in heap/pruneheap.c */
extern void heap_page_prune_opt(Relation relation, Buffer buffer,
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/htup.h,v 1.100 2008/07/13 20:45:47 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/htup.h,v 1.101 2008/08/11 11:05:11 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -670,6 +670,7 @@ typedef struct xl_heap_clean
typedef struct xl_heap_newpage
{
RelFileNode node;
ForkNumber forknum;
BlockNumber blkno; /* location of new page */
/* entire page contents follow at end of record */
} xl_heap_newpage;
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/xact.h,v 1.94 2008/03/04 19:54:06 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/xact.h,v 1.95 2008/08/11 11:05:11 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -88,10 +88,10 @@ typedef void (*SubXactCallback) (SubXactEvent event, SubTransactionId mySubid,
typedef struct xl_xact_commit
{
TimestampTz xact_time; /* time of commit */
int nrels; /* number of RelFileNodes */
int nrels; /* number of RelFileForks */
int nsubxacts; /* number of subtransaction XIDs */
/* Array of RelFileNode(s) to drop at commit */
RelFileNode xnodes[1]; /* VARIABLE LENGTH ARRAY */
/* Array of RelFileFork(s) to drop at commit */
RelFileFork xnodes[1]; /* VARIABLE LENGTH ARRAY */
/* ARRAY OF COMMITTED SUBTRANSACTION XIDs FOLLOWS */
} xl_xact_commit;
......@@ -100,10 +100,10 @@ typedef struct xl_xact_commit
typedef struct xl_xact_abort
{
TimestampTz xact_time; /* time of abort */
int nrels; /* number of RelFileNodes */
int nrels; /* number of RelFileForks */
int nsubxacts; /* number of subtransaction XIDs */
/* Array of RelFileNode(s) to drop at abort */
RelFileNode xnodes[1]; /* VARIABLE LENGTH ARRAY */
/* Array of RelFileFork(s) to drop at abort */
RelFileFork xnodes[1]; /* VARIABLE LENGTH ARRAY */
/* ARRAY OF ABORTED SUBTRANSACTION XIDs FOLLOWS */
} xl_xact_abort;
......
......@@ -11,7 +11,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.23 2008/02/17 02:09:30 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.24 2008/08/11 11:05:11 heikki Exp $
*/
#ifndef XLOG_INTERNAL_H
#define XLOG_INTERNAL_H
......@@ -40,6 +40,7 @@
typedef struct BkpBlock
{
RelFileNode node; /* relation containing block */
ForkNumber fork; /* fork within the relation */
BlockNumber block; /* block number */
uint16 hole_offset; /* number of bytes before "hole" */
uint16 hole_length; /* number of bytes in "hole" */
......
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/xlogutils.h,v 1.25 2008/06/19 00:46:06 alvherre Exp $
* $PostgreSQL: pgsql/src/include/access/xlogutils.h,v 1.26 2008/08/11 11:05:11 heikki Exp $
*/
#ifndef XLOG_UTILS_H
#define XLOG_UTILS_H
......@@ -19,11 +19,14 @@
extern void XLogCheckInvalidPages(void);
extern void XLogDropRelation(RelFileNode rnode);
extern void XLogDropRelation(RelFileNode rnode, ForkNumber forknum);
extern void XLogDropDatabase(Oid dbid);
extern void XLogTruncateRelation(RelFileNode rnode, BlockNumber nblocks);
extern void XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
BlockNumber nblocks);
extern Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init);
extern Buffer XLogReadBufferWithFork(RelFileNode rnode, ForkNumber forknum,
BlockNumber blkno, bool init);
extern Relation CreateFakeRelcacheEntry(RelFileNode rnode);
extern void FreeFakeRelcacheEntry(Relation fakerel);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/catalog.h,v 1.40 2008/06/19 00:46:06 alvherre Exp $
* $PostgreSQL: pgsql/src/include/catalog/catalog.h,v 1.41 2008/08/11 11:05:11 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -19,7 +19,7 @@
#include "utils/relcache.h"
extern char *relpath(RelFileNode rnode);
extern char *relpath(RelFileNode rnode, ForkNumber forknum);
extern char *GetDatabasePath(Oid dbNode, Oid spcNode);
extern bool IsSystemRelation(Relation relation);
......
......@@ -5,7 +5,7 @@
*
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
*
* $PostgreSQL: pgsql/src/include/postmaster/bgwriter.h,v 1.11 2008/01/01 19:45:58 momjian Exp $
* $PostgreSQL: pgsql/src/include/postmaster/bgwriter.h,v 1.12 2008/08/11 11:05:11 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -27,7 +27,8 @@ extern void BackgroundWriterMain(void);
extern void RequestCheckpoint(int flags);
extern void CheckpointWriteDelay(int flags, double progress);
extern bool ForwardFsyncRequest(RelFileNode rnode, BlockNumber segno);
extern bool ForwardFsyncRequest(RelFileNode rnode, ForkNumber forknum,
BlockNumber segno);
extern void AbsorbFsyncRequests(void);
extern Size BgWriterShmemSize(void);
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.97 2008/06/19 00:46:06 alvherre Exp $
* $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.98 2008/08/11 11:05:11 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -65,6 +65,7 @@ typedef bits16 BufFlags;
typedef struct buftag
{
RelFileNode rnode; /* physical relation identifier */
ForkNumber forkNum;
BlockNumber blockNum; /* blknum relative to begin of reln */
} BufferTag;
......@@ -73,19 +74,22 @@ typedef struct buftag
(a).rnode.spcNode = InvalidOid, \
(a).rnode.dbNode = InvalidOid, \
(a).rnode.relNode = InvalidOid, \
(a).forkNum = InvalidForkNumber, \
(a).blockNum = InvalidBlockNumber \
)
#define INIT_BUFFERTAG(a,xx_rnode,xx_blockNum) \
#define INIT_BUFFERTAG(a,xx_rnode,xx_forkNum,xx_blockNum) \
( \
(a).rnode = (xx_rnode), \
(a).forkNum = (xx_forkNum), \
(a).blockNum = (xx_blockNum) \
)
#define BUFFERTAGS_EQUAL(a,b) \
( \
RelFileNodeEquals((a).rnode, (b).rnode) && \
(a).blockNum == (b).blockNum \
(a).blockNum == (b).blockNum && \
(a).forkNum == (b).forkNum \
)
/*
......@@ -202,10 +206,10 @@ extern int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode);
/* localbuf.c */
extern BufferDesc *LocalBufferAlloc(SMgrRelation reln, BlockNumber blockNum,
bool *foundPtr);
extern BufferDesc *LocalBufferAlloc(SMgrRelation reln, ForkNumber forkNum,
BlockNumber blockNum, bool *foundPtr);
extern void MarkLocalBufferDirty(Buffer buffer);
extern void DropRelFileNodeLocalBuffers(RelFileNode rnode,
extern void DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
BlockNumber firstDelBlock);
extern void AtEOXact_LocalBuffers(bool isCommit);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.114 2008/06/19 00:46:06 alvherre Exp $
* $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.115 2008/08/11 11:05:11 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -144,11 +144,13 @@ extern PGDLLIMPORT int32 *LocalRefCount;
* prototypes for functions in bufmgr.c
*/
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
extern Buffer ReadBufferWithFork(Relation reln, ForkNumber forkNum, BlockNumber blockNum);
extern Buffer ReadBufferWithStrategy(Relation reln, BlockNumber blockNum,
BufferAccessStrategy strategy);
extern Buffer ReadOrZeroBuffer(Relation reln, BlockNumber blockNum);
extern Buffer ReadOrZeroBuffer(Relation reln, ForkNumber forkNum,
BlockNumber blockNum);
extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp,
BlockNumber blockNum, bool zeroPage);
ForkNumber forkNum, BlockNumber blockNum, bool zeroPage);
extern void ReleaseBuffer(Buffer buffer);
extern void UnlockReleaseBuffer(Buffer buffer);
extern void MarkBufferDirty(Buffer buffer);
......@@ -169,15 +171,16 @@ extern BlockNumber RelationGetNumberOfBlocks(Relation relation);
extern void RelationTruncate(Relation rel, BlockNumber nblocks);
extern void FlushRelationBuffers(Relation rel);
extern void FlushDatabaseBuffers(Oid dbid);
extern void DropRelFileNodeBuffers(RelFileNode rnode, bool istemp,
BlockNumber firstDelBlock);
extern void DropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum,
bool istemp, BlockNumber firstDelBlock);
extern void DropDatabaseBuffers(Oid dbid);
#ifdef NOT_USED
extern void PrintPinnedBufs(void);
#endif
extern Size BufferShmemSize(void);
extern RelFileNode BufferGetFileNode(Buffer buffer);
extern void BufferGetTag(Buffer buffer, RelFileNode *rnode,
ForkNumber *forknum, BlockNumber *blknum);
extern void SetBufferCommitInfoNeedsSave(Buffer buffer);
......
......@@ -7,16 +7,33 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/relfilenode.h,v 1.15 2008/01/01 19:45:59 momjian Exp $
* $PostgreSQL: pgsql/src/include/storage/relfilenode.h,v 1.16 2008/08/11 11:05:11 heikki Exp $
*
*-------------------------------------------------------------------------
*/
#ifndef RELFILENODE_H
#define RELFILENODE_H
/*
* The physical storage of a relation consists of one or more forks. The
* main fork is always created, but in addition to that there can be
* additional forks for storing various metadata. ForkNumber is used when
* we need to refer to a specific fork in a relation.
*/
typedef enum ForkNumber
{
InvalidForkNumber = -1,
MAIN_FORKNUM = 0
/* NOTE: change NUM_FORKS below when you add new forks */
} ForkNumber;
#define MAX_FORKNUM MAIN_FORKNUM
/*
* RelFileNode must provide all that we need to know to physically access
* a relation.
* a relation. Note, however, that a "physical" relation is comprised of
* multiple files on the filesystem, as each fork is stored as a separate
* file, and each fork can be divided into multiple segments. See md.c.
*
* spcNode identifies the tablespace of the relation. It corresponds to
* pg_tablespace.oid.
......@@ -57,4 +74,13 @@ typedef struct RelFileNode
(node1).dbNode == (node2).dbNode && \
(node1).spcNode == (node2).spcNode)
/*
* RelFileFork identifies a particular fork of a relation.
*/
typedef struct RelFileFork
{
RelFileNode rnode;
ForkNumber forknum;
} RelFileFork;
#endif /* RELFILENODE_H */
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.62 2008/01/01 19:45:59 momjian Exp $
* $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.63 2008/08/11 11:05:11 heikki Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -51,7 +51,8 @@ typedef struct SMgrRelationData
*/
int smgr_which; /* storage manager selector */
struct _MdfdVec *md_fd; /* for md.c; NULL if not open */
/* for md.c; NULL for forks that are not open */
struct _MdfdVec *md_fd[MAX_FORKNUM + 1];
} SMgrRelationData;
typedef SMgrRelationData *SMgrRelation;
......@@ -59,24 +60,29 @@ typedef SMgrRelationData *SMgrRelation;
extern void smgrinit(void);
extern SMgrRelation smgropen(RelFileNode rnode);
extern bool smgrexists(SMgrRelation reln, ForkNumber forknum);
extern void smgrsetowner(SMgrRelation *owner, SMgrRelation reln);
extern void smgrclose(SMgrRelation reln);
extern void smgrcloseall(void);
extern void smgrclosenode(RelFileNode rnode);
extern void smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo);
extern void smgrscheduleunlink(SMgrRelation reln, bool isTemp);
extern void smgrdounlink(SMgrRelation reln, bool isTemp, bool isRedo);
extern void smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer,
bool isTemp);
extern void smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer);
extern void smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer,
bool isTemp);
extern BlockNumber smgrnblocks(SMgrRelation reln);
extern void smgrtruncate(SMgrRelation reln, BlockNumber nblocks,
bool isTemp);
extern void smgrimmedsync(SMgrRelation reln);
extern void smgrcreate(SMgrRelation reln, ForkNumber forknum,
bool isTemp, bool isRedo);
extern void smgrscheduleunlink(SMgrRelation reln, ForkNumber forknum,
bool isTemp);
extern void smgrdounlink(SMgrRelation reln, ForkNumber forknum,
bool isTemp, bool isRedo);
extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool isTemp);
extern void smgrread(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer);
extern void smgrwrite(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool isTemp);
extern BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum);
extern void smgrtruncate(SMgrRelation reln, ForkNumber forknum,
BlockNumber nblocks, bool isTemp);
extern void smgrimmedsync(SMgrRelation reln, ForkNumber forknum);
extern void smgrDoPendingDeletes(bool isCommit);
extern int smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr,
extern int smgrGetPendingDeletes(bool forCommit, RelFileFork **ptr,
bool *haveNonTemp);
extern void AtSubCommit_smgr(void);
extern void AtSubAbort_smgr(void);
......@@ -95,23 +101,27 @@ extern void smgr_desc(StringInfo buf, uint8 xl_info, char *rec);
/* in md.c */
extern void mdinit(void);
extern void mdclose(SMgrRelation reln);
extern void mdcreate(SMgrRelation reln, bool isRedo);
extern void mdunlink(RelFileNode rnode, bool isRedo);
extern void mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer,
bool isTemp);
extern void mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer);
extern void mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer,
bool isTemp);
extern BlockNumber mdnblocks(SMgrRelation reln);
extern void mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp);
extern void mdimmedsync(SMgrRelation reln);
extern void mdclose(SMgrRelation reln, ForkNumber forknum);
extern void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
extern bool mdexists(SMgrRelation reln, ForkNumber forknum);
extern void mdunlink(RelFileNode rnode, ForkNumber forknum, bool isRedo);
extern void mdextend(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool isTemp);
extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer);
extern void mdwrite(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool isTemp);
extern BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum);
extern void mdtruncate(SMgrRelation reln, ForkNumber forknum,
BlockNumber nblocks, bool isTemp);
extern void mdimmedsync(SMgrRelation reln, ForkNumber forknum);
extern void mdpreckpt(void);
extern void mdsync(void);
extern void mdpostckpt(void);
extern void RememberFsyncRequest(RelFileNode rnode, BlockNumber segno);
extern void ForgetRelationFsyncRequests(RelFileNode rnode);
extern void RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum,
BlockNumber segno);
extern void ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum);
extern void ForgetDatabaseFsyncRequests(Oid dbid);
/* smgrtype.c */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment