Commit 3f0e808c authored by Heikki Linnakangas's avatar Heikki Linnakangas

Introduce the concept of relation forks. An smgr relation can now consist

of multiple forks, and each fork can be created and grown separately.

The bulk of this patch is about changing the smgr API to include an extra
ForkNumber argument in every smgr function. Also, smgrscheduleunlink and
smgrdounlink no longer implicitly call smgrclose, because other forks might
still exist after unlinking one. The callers of those functions have been
modified to call smgrclose instead.

This patch in itself doesn't have any user-visible effect, but provides the
infrastructure needed for upcoming patches. The additional forks envisioned
are a rewritten FSM implementation that doesn't rely on a fixed-size shared
memory block, and a visibility map to allow skipping portions of a table in
VACUUM that have no dead tuples.
parent eca13886
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.75 2008/05/12 00:00:44 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.76 2008/08/11 11:05:10 heikki Exp $
* *
* NOTES * NOTES
* Postgres hash pages look like ordinary relation pages. The opaque * Postgres hash pages look like ordinary relation pages. The opaque
...@@ -158,7 +158,7 @@ _hash_getinitbuf(Relation rel, BlockNumber blkno) ...@@ -158,7 +158,7 @@ _hash_getinitbuf(Relation rel, BlockNumber blkno)
if (blkno == P_NEW) if (blkno == P_NEW)
elog(ERROR, "hash AM does not use P_NEW"); elog(ERROR, "hash AM does not use P_NEW");
buf = ReadOrZeroBuffer(rel, blkno); buf = ReadOrZeroBuffer(rel, MAIN_FORKNUM, blkno);
LockBuffer(buf, HASH_WRITE); LockBuffer(buf, HASH_WRITE);
...@@ -203,7 +203,7 @@ _hash_getnewbuf(Relation rel, BlockNumber blkno) ...@@ -203,7 +203,7 @@ _hash_getnewbuf(Relation rel, BlockNumber blkno)
BufferGetBlockNumber(buf), blkno); BufferGetBlockNumber(buf), blkno);
} }
else else
buf = ReadOrZeroBuffer(rel, blkno); buf = ReadOrZeroBuffer(rel, MAIN_FORKNUM, blkno);
LockBuffer(buf, HASH_WRITE); LockBuffer(buf, HASH_WRITE);
...@@ -737,7 +737,7 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks) ...@@ -737,7 +737,7 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
MemSet(zerobuf, 0, sizeof(zerobuf)); MemSet(zerobuf, 0, sizeof(zerobuf));
RelationOpenSmgr(rel); RelationOpenSmgr(rel);
smgrextend(rel->rd_smgr, lastblock, zerobuf, rel->rd_istemp); smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf, rel->rd_istemp);
return true; return true;
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.261 2008/07/13 20:45:47 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.262 2008/08/11 11:05:10 heikki Exp $
* *
* *
* INTERFACE ROUTINES * INTERFACE ROUTINES
...@@ -3906,7 +3906,8 @@ log_heap_move(Relation reln, Buffer oldbuf, ItemPointerData from, ...@@ -3906,7 +3906,8 @@ log_heap_move(Relation reln, Buffer oldbuf, ItemPointerData from,
* not do anything that assumes we are touching a heap. * not do anything that assumes we are touching a heap.
*/ */
XLogRecPtr XLogRecPtr
log_newpage(RelFileNode *rnode, BlockNumber blkno, Page page) log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
Page page)
{ {
xl_heap_newpage xlrec; xl_heap_newpage xlrec;
XLogRecPtr recptr; XLogRecPtr recptr;
...@@ -3916,6 +3917,7 @@ log_newpage(RelFileNode *rnode, BlockNumber blkno, Page page) ...@@ -3916,6 +3917,7 @@ log_newpage(RelFileNode *rnode, BlockNumber blkno, Page page)
START_CRIT_SECTION(); START_CRIT_SECTION();
xlrec.node = *rnode; xlrec.node = *rnode;
xlrec.forknum = forkNum;
xlrec.blkno = blkno; xlrec.blkno = blkno;
rdata[0].data = (char *) &xlrec; rdata[0].data = (char *) &xlrec;
...@@ -4714,7 +4716,7 @@ heap_sync(Relation rel) ...@@ -4714,7 +4716,7 @@ heap_sync(Relation rel)
/* main heap */ /* main heap */
FlushRelationBuffers(rel); FlushRelationBuffers(rel);
/* FlushRelationBuffers will have opened rd_smgr */ /* FlushRelationBuffers will have opened rd_smgr */
smgrimmedsync(rel->rd_smgr); smgrimmedsync(rel->rd_smgr, MAIN_FORKNUM);
/* toast heap, if any */ /* toast heap, if any */
if (OidIsValid(rel->rd_rel->reltoastrelid)) if (OidIsValid(rel->rd_rel->reltoastrelid))
...@@ -4723,7 +4725,7 @@ heap_sync(Relation rel) ...@@ -4723,7 +4725,7 @@ heap_sync(Relation rel)
toastrel = heap_open(rel->rd_rel->reltoastrelid, AccessShareLock); toastrel = heap_open(rel->rd_rel->reltoastrelid, AccessShareLock);
FlushRelationBuffers(toastrel); FlushRelationBuffers(toastrel);
smgrimmedsync(toastrel->rd_smgr); smgrimmedsync(toastrel->rd_smgr, MAIN_FORKNUM);
heap_close(toastrel, AccessShareLock); heap_close(toastrel, AccessShareLock);
} }
} }
...@@ -96,7 +96,7 @@ ...@@ -96,7 +96,7 @@
* Portions Copyright (c) 1994-5, Regents of the University of California * Portions Copyright (c) 1994-5, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.14 2008/06/19 00:46:03 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.15 2008/08/11 11:05:10 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -270,10 +270,11 @@ end_heap_rewrite(RewriteState state) ...@@ -270,10 +270,11 @@ end_heap_rewrite(RewriteState state)
{ {
if (state->rs_use_wal) if (state->rs_use_wal)
log_newpage(&state->rs_new_rel->rd_node, log_newpage(&state->rs_new_rel->rd_node,
MAIN_FORKNUM,
state->rs_blockno, state->rs_blockno,
state->rs_buffer); state->rs_buffer);
RelationOpenSmgr(state->rs_new_rel); RelationOpenSmgr(state->rs_new_rel);
smgrextend(state->rs_new_rel->rd_smgr, state->rs_blockno, smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM, state->rs_blockno,
(char *) state->rs_buffer, true); (char *) state->rs_buffer, true);
} }
...@@ -606,6 +607,7 @@ raw_heap_insert(RewriteState state, HeapTuple tup) ...@@ -606,6 +607,7 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
/* XLOG stuff */ /* XLOG stuff */
if (state->rs_use_wal) if (state->rs_use_wal)
log_newpage(&state->rs_new_rel->rd_node, log_newpage(&state->rs_new_rel->rd_node,
MAIN_FORKNUM,
state->rs_blockno, state->rs_blockno,
page); page);
...@@ -616,8 +618,8 @@ raw_heap_insert(RewriteState state, HeapTuple tup) ...@@ -616,8 +618,8 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
* end_heap_rewrite. * end_heap_rewrite.
*/ */
RelationOpenSmgr(state->rs_new_rel); RelationOpenSmgr(state->rs_new_rel);
smgrextend(state->rs_new_rel->rd_smgr, state->rs_blockno, smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM,
(char *) page, true); state->rs_blockno, (char *) page, true);
state->rs_blockno++; state->rs_blockno++;
state->rs_buffer_valid = false; state->rs_buffer_valid = false;
......
...@@ -57,7 +57,7 @@ ...@@ -57,7 +57,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.116 2008/06/19 00:46:03 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.117 2008/08/11 11:05:10 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -267,7 +267,7 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) ...@@ -267,7 +267,7 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
if (wstate->btws_use_wal) if (wstate->btws_use_wal)
{ {
/* We use the heap NEWPAGE record type for this */ /* We use the heap NEWPAGE record type for this */
log_newpage(&wstate->index->rd_node, blkno, page); log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page);
} }
else else
{ {
...@@ -286,7 +286,8 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) ...@@ -286,7 +286,8 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
{ {
if (!wstate->btws_zeropage) if (!wstate->btws_zeropage)
wstate->btws_zeropage = (Page) palloc0(BLCKSZ); wstate->btws_zeropage = (Page) palloc0(BLCKSZ);
smgrextend(wstate->index->rd_smgr, wstate->btws_pages_written++, smgrextend(wstate->index->rd_smgr, MAIN_FORKNUM,
wstate->btws_pages_written++,
(char *) wstate->btws_zeropage, (char *) wstate->btws_zeropage,
true); true);
} }
...@@ -299,13 +300,15 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) ...@@ -299,13 +300,15 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
if (blkno == wstate->btws_pages_written) if (blkno == wstate->btws_pages_written)
{ {
/* extending the file... */ /* extending the file... */
smgrextend(wstate->index->rd_smgr, blkno, (char *) page, true); smgrextend(wstate->index->rd_smgr, MAIN_FORKNUM, blkno,
(char *) page, true);
wstate->btws_pages_written++; wstate->btws_pages_written++;
} }
else else
{ {
/* overwriting a block we zero-filled before */ /* overwriting a block we zero-filled before */
smgrwrite(wstate->index->rd_smgr, blkno, (char *) page, true); smgrwrite(wstate->index->rd_smgr, MAIN_FORKNUM, blkno,
(char *) page, true);
} }
pfree(page); pfree(page);
...@@ -809,6 +812,6 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2) ...@@ -809,6 +812,6 @@ _bt_load(BTWriteState *wstate, BTSpool *btspool, BTSpool *btspool2)
if (!wstate->index->rd_istemp) if (!wstate->index->rd_istemp)
{ {
RelationOpenSmgr(wstate->index); RelationOpenSmgr(wstate->index);
smgrimmedsync(wstate->index->rd_smgr); smgrimmedsync(wstate->index->rd_smgr, MAIN_FORKNUM);
} }
} }
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.44 2008/08/01 13:16:08 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/twophase.c,v 1.45 2008/08/11 11:05:10 heikki Exp $
* *
* NOTES * NOTES
* Each global transaction is associated with a global transaction * Each global transaction is associated with a global transaction
...@@ -141,12 +141,12 @@ static void RecordTransactionCommitPrepared(TransactionId xid, ...@@ -141,12 +141,12 @@ static void RecordTransactionCommitPrepared(TransactionId xid,
int nchildren, int nchildren,
TransactionId *children, TransactionId *children,
int nrels, int nrels,
RelFileNode *rels); RelFileFork *rels);
static void RecordTransactionAbortPrepared(TransactionId xid, static void RecordTransactionAbortPrepared(TransactionId xid,
int nchildren, int nchildren,
TransactionId *children, TransactionId *children,
int nrels, int nrels,
RelFileNode *rels); RelFileFork *rels);
static void ProcessRecords(char *bufptr, TransactionId xid, static void ProcessRecords(char *bufptr, TransactionId xid,
const TwoPhaseCallback callbacks[]); const TwoPhaseCallback callbacks[]);
...@@ -694,8 +694,8 @@ TwoPhaseGetDummyProc(TransactionId xid) ...@@ -694,8 +694,8 @@ TwoPhaseGetDummyProc(TransactionId xid)
* *
* 1. TwoPhaseFileHeader * 1. TwoPhaseFileHeader
* 2. TransactionId[] (subtransactions) * 2. TransactionId[] (subtransactions)
* 3. RelFileNode[] (files to be deleted at commit) * 3. RelFileFork[] (files to be deleted at commit)
* 4. RelFileNode[] (files to be deleted at abort) * 4. RelFileFork[] (files to be deleted at abort)
* 5. TwoPhaseRecordOnDisk * 5. TwoPhaseRecordOnDisk
* 6. ... * 6. ...
* 7. TwoPhaseRecordOnDisk (end sentinel, rmid == TWOPHASE_RM_END_ID) * 7. TwoPhaseRecordOnDisk (end sentinel, rmid == TWOPHASE_RM_END_ID)
...@@ -793,8 +793,8 @@ StartPrepare(GlobalTransaction gxact) ...@@ -793,8 +793,8 @@ StartPrepare(GlobalTransaction gxact)
TransactionId xid = gxact->proc.xid; TransactionId xid = gxact->proc.xid;
TwoPhaseFileHeader hdr; TwoPhaseFileHeader hdr;
TransactionId *children; TransactionId *children;
RelFileNode *commitrels; RelFileFork *commitrels;
RelFileNode *abortrels; RelFileFork *abortrels;
/* Initialize linked list */ /* Initialize linked list */
records.head = palloc0(sizeof(XLogRecData)); records.head = palloc0(sizeof(XLogRecData));
...@@ -832,12 +832,12 @@ StartPrepare(GlobalTransaction gxact) ...@@ -832,12 +832,12 @@ StartPrepare(GlobalTransaction gxact)
} }
if (hdr.ncommitrels > 0) if (hdr.ncommitrels > 0)
{ {
save_state_data(commitrels, hdr.ncommitrels * sizeof(RelFileNode)); save_state_data(commitrels, hdr.ncommitrels * sizeof(RelFileFork));
pfree(commitrels); pfree(commitrels);
} }
if (hdr.nabortrels > 0) if (hdr.nabortrels > 0)
{ {
save_state_data(abortrels, hdr.nabortrels * sizeof(RelFileNode)); save_state_data(abortrels, hdr.nabortrels * sizeof(RelFileFork));
pfree(abortrels); pfree(abortrels);
} }
} }
...@@ -1140,8 +1140,8 @@ FinishPreparedTransaction(const char *gid, bool isCommit) ...@@ -1140,8 +1140,8 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
TwoPhaseFileHeader *hdr; TwoPhaseFileHeader *hdr;
TransactionId latestXid; TransactionId latestXid;
TransactionId *children; TransactionId *children;
RelFileNode *commitrels; RelFileFork *commitrels;
RelFileNode *abortrels; RelFileFork *abortrels;
int i; int i;
/* /*
...@@ -1169,10 +1169,10 @@ FinishPreparedTransaction(const char *gid, bool isCommit) ...@@ -1169,10 +1169,10 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader)); bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
children = (TransactionId *) bufptr; children = (TransactionId *) bufptr;
bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId)); bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
commitrels = (RelFileNode *) bufptr; commitrels = (RelFileFork *) bufptr;
bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode)); bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileFork));
abortrels = (RelFileNode *) bufptr; abortrels = (RelFileFork *) bufptr;
bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode)); bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileFork));
/* compute latestXid among all children */ /* compute latestXid among all children */
latestXid = TransactionIdLatest(xid, hdr->nsubxacts, children); latestXid = TransactionIdLatest(xid, hdr->nsubxacts, children);
...@@ -1215,12 +1215,20 @@ FinishPreparedTransaction(const char *gid, bool isCommit) ...@@ -1215,12 +1215,20 @@ FinishPreparedTransaction(const char *gid, bool isCommit)
if (isCommit) if (isCommit)
{ {
for (i = 0; i < hdr->ncommitrels; i++) for (i = 0; i < hdr->ncommitrels; i++)
smgrdounlink(smgropen(commitrels[i]), false, false); {
SMgrRelation srel = smgropen(commitrels[i].rnode);
smgrdounlink(srel, commitrels[i].forknum, false, false);
smgrclose(srel);
}
} }
else else
{ {
for (i = 0; i < hdr->nabortrels; i++) for (i = 0; i < hdr->nabortrels; i++)
smgrdounlink(smgropen(abortrels[i]), false, false); {
SMgrRelation srel = smgropen(abortrels[i].rnode);
smgrdounlink(srel, abortrels[i].forknum, false, false);
smgrclose(srel);
}
} }
/* And now do the callbacks */ /* And now do the callbacks */
...@@ -1631,8 +1639,8 @@ RecoverPreparedTransactions(void) ...@@ -1631,8 +1639,8 @@ RecoverPreparedTransactions(void)
bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader)); bufptr = buf + MAXALIGN(sizeof(TwoPhaseFileHeader));
subxids = (TransactionId *) bufptr; subxids = (TransactionId *) bufptr;
bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId)); bufptr += MAXALIGN(hdr->nsubxacts * sizeof(TransactionId));
bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileNode)); bufptr += MAXALIGN(hdr->ncommitrels * sizeof(RelFileFork));
bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileNode)); bufptr += MAXALIGN(hdr->nabortrels * sizeof(RelFileFork));
/* /*
* Reconstruct subtrans state for the transaction --- needed * Reconstruct subtrans state for the transaction --- needed
...@@ -1685,7 +1693,7 @@ RecordTransactionCommitPrepared(TransactionId xid, ...@@ -1685,7 +1693,7 @@ RecordTransactionCommitPrepared(TransactionId xid,
int nchildren, int nchildren,
TransactionId *children, TransactionId *children,
int nrels, int nrels,
RelFileNode *rels) RelFileFork *rels)
{ {
XLogRecData rdata[3]; XLogRecData rdata[3];
int lastrdata = 0; int lastrdata = 0;
...@@ -1710,7 +1718,7 @@ RecordTransactionCommitPrepared(TransactionId xid, ...@@ -1710,7 +1718,7 @@ RecordTransactionCommitPrepared(TransactionId xid,
{ {
rdata[0].next = &(rdata[1]); rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) rels; rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode); rdata[1].len = nrels * sizeof(RelFileFork);
rdata[1].buffer = InvalidBuffer; rdata[1].buffer = InvalidBuffer;
lastrdata = 1; lastrdata = 1;
} }
...@@ -1760,7 +1768,7 @@ RecordTransactionAbortPrepared(TransactionId xid, ...@@ -1760,7 +1768,7 @@ RecordTransactionAbortPrepared(TransactionId xid,
int nchildren, int nchildren,
TransactionId *children, TransactionId *children,
int nrels, int nrels,
RelFileNode *rels) RelFileFork *rels)
{ {
XLogRecData rdata[3]; XLogRecData rdata[3];
int lastrdata = 0; int lastrdata = 0;
...@@ -1790,7 +1798,7 @@ RecordTransactionAbortPrepared(TransactionId xid, ...@@ -1790,7 +1798,7 @@ RecordTransactionAbortPrepared(TransactionId xid,
{ {
rdata[0].next = &(rdata[1]); rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) rels; rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode); rdata[1].len = nrels * sizeof(RelFileFork);
rdata[1].buffer = InvalidBuffer; rdata[1].buffer = InvalidBuffer;
lastrdata = 1; lastrdata = 1;
} }
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.264 2008/05/12 20:01:58 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.265 2008/08/11 11:05:10 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -819,7 +819,7 @@ RecordTransactionCommit(void) ...@@ -819,7 +819,7 @@ RecordTransactionCommit(void)
bool markXidCommitted = TransactionIdIsValid(xid); bool markXidCommitted = TransactionIdIsValid(xid);
TransactionId latestXid = InvalidTransactionId; TransactionId latestXid = InvalidTransactionId;
int nrels; int nrels;
RelFileNode *rels; RelFileFork *rels;
bool haveNonTemp; bool haveNonTemp;
int nchildren; int nchildren;
TransactionId *children; TransactionId *children;
...@@ -900,7 +900,7 @@ RecordTransactionCommit(void) ...@@ -900,7 +900,7 @@ RecordTransactionCommit(void)
{ {
rdata[0].next = &(rdata[1]); rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) rels; rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode); rdata[1].len = nrels * sizeof(RelFileFork);
rdata[1].buffer = InvalidBuffer; rdata[1].buffer = InvalidBuffer;
lastrdata = 1; lastrdata = 1;
} }
...@@ -1203,7 +1203,7 @@ RecordTransactionAbort(bool isSubXact) ...@@ -1203,7 +1203,7 @@ RecordTransactionAbort(bool isSubXact)
TransactionId xid = GetCurrentTransactionIdIfAny(); TransactionId xid = GetCurrentTransactionIdIfAny();
TransactionId latestXid; TransactionId latestXid;
int nrels; int nrels;
RelFileNode *rels; RelFileFork *rels;
int nchildren; int nchildren;
TransactionId *children; TransactionId *children;
XLogRecData rdata[3]; XLogRecData rdata[3];
...@@ -1264,7 +1264,7 @@ RecordTransactionAbort(bool isSubXact) ...@@ -1264,7 +1264,7 @@ RecordTransactionAbort(bool isSubXact)
{ {
rdata[0].next = &(rdata[1]); rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) rels; rdata[1].data = (char *) rels;
rdata[1].len = nrels * sizeof(RelFileNode); rdata[1].len = nrels * sizeof(RelFileFork);
rdata[1].buffer = InvalidBuffer; rdata[1].buffer = InvalidBuffer;
lastrdata = 1; lastrdata = 1;
} }
...@@ -4282,8 +4282,13 @@ xact_redo_commit(xl_xact_commit *xlrec, TransactionId xid) ...@@ -4282,8 +4282,13 @@ xact_redo_commit(xl_xact_commit *xlrec, TransactionId xid)
/* Make sure files supposed to be dropped are dropped */ /* Make sure files supposed to be dropped are dropped */
for (i = 0; i < xlrec->nrels; i++) for (i = 0; i < xlrec->nrels; i++)
{ {
XLogDropRelation(xlrec->xnodes[i]); SMgrRelation srel;
smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
XLogDropRelation(xlrec->xnodes[i].rnode, xlrec->xnodes[i].forknum);
srel = smgropen(xlrec->xnodes[i].rnode);
smgrdounlink(srel, xlrec->xnodes[i].forknum, false, true);
smgrclose(srel);
} }
} }
...@@ -4317,8 +4322,13 @@ xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid) ...@@ -4317,8 +4322,13 @@ xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid)
/* Make sure files supposed to be dropped are dropped */ /* Make sure files supposed to be dropped are dropped */
for (i = 0; i < xlrec->nrels; i++) for (i = 0; i < xlrec->nrels; i++)
{ {
XLogDropRelation(xlrec->xnodes[i]); SMgrRelation srel;
smgrdounlink(smgropen(xlrec->xnodes[i]), false, true);
XLogDropRelation(xlrec->xnodes[i].rnode, xlrec->xnodes[i].forknum);
srel = smgropen(xlrec->xnodes[i].rnode);
smgrdounlink(srel, xlrec->xnodes[i].forknum, false, true);
smgrclose(srel);
} }
} }
...@@ -4374,10 +4384,12 @@ xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec) ...@@ -4374,10 +4384,12 @@ xact_desc_commit(StringInfo buf, xl_xact_commit *xlrec)
appendStringInfo(buf, "; rels:"); appendStringInfo(buf, "; rels:");
for (i = 0; i < xlrec->nrels; i++) for (i = 0; i < xlrec->nrels; i++)
{ {
RelFileNode rnode = xlrec->xnodes[i]; RelFileNode rnode = xlrec->xnodes[i].rnode;
ForkNumber forknum = xlrec->xnodes[i].forknum;
appendStringInfo(buf, " %u/%u/%u", appendStringInfo(buf, " %u/%u/%u/%u",
rnode.spcNode, rnode.dbNode, rnode.relNode); rnode.spcNode, rnode.dbNode, rnode.relNode,
forknum);
} }
} }
if (xlrec->nsubxacts > 0) if (xlrec->nsubxacts > 0)
...@@ -4402,10 +4414,12 @@ xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec) ...@@ -4402,10 +4414,12 @@ xact_desc_abort(StringInfo buf, xl_xact_abort *xlrec)
appendStringInfo(buf, "; rels:"); appendStringInfo(buf, "; rels:");
for (i = 0; i < xlrec->nrels; i++) for (i = 0; i < xlrec->nrels; i++)
{ {
RelFileNode rnode = xlrec->xnodes[i]; RelFileNode rnode = xlrec->xnodes[i].rnode;
ForkNumber forknum = xlrec->xnodes[i].forknum;
appendStringInfo(buf, " %u/%u/%u", appendStringInfo(buf, " %u/%u/%u/%u",
rnode.spcNode, rnode.dbNode, rnode.relNode); rnode.spcNode, rnode.dbNode, rnode.relNode,
forknum);
} }
} }
if (xlrec->nsubxacts > 0) if (xlrec->nsubxacts > 0)
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.316 2008/07/13 20:45:47 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.317 2008/08/11 11:05:10 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1034,8 +1034,7 @@ XLogCheckBuffer(XLogRecData *rdata, bool doPageWrites, ...@@ -1034,8 +1034,7 @@ XLogCheckBuffer(XLogRecData *rdata, bool doPageWrites,
/* /*
* The page needs to be backed up, so set up *bkpb * The page needs to be backed up, so set up *bkpb
*/ */
bkpb->node = BufferGetFileNode(rdata->buffer); BufferGetTag(rdata->buffer, &bkpb->node, &bkpb->fork, &bkpb->block);
bkpb->block = BufferGetBlockNumber(rdata->buffer);
if (rdata->buffer_std) if (rdata->buffer_std)
{ {
...@@ -2855,7 +2854,8 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn) ...@@ -2855,7 +2854,8 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn)
memcpy(&bkpb, blk, sizeof(BkpBlock)); memcpy(&bkpb, blk, sizeof(BkpBlock));
blk += sizeof(BkpBlock); blk += sizeof(BkpBlock);
buffer = XLogReadBuffer(bkpb.node, bkpb.block, true); buffer = XLogReadBufferWithFork(bkpb.node, bkpb.fork, bkpb.block,
true);
Assert(BufferIsValid(buffer)); Assert(BufferIsValid(buffer));
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.57 2008/07/13 20:45:47 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.58 2008/08/11 11:05:10 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
typedef struct xl_invalid_page_key typedef struct xl_invalid_page_key
{ {
RelFileNode node; /* the relation */ RelFileNode node; /* the relation */
ForkNumber forkno; /* the fork number */
BlockNumber blkno; /* the page */ BlockNumber blkno; /* the page */
} xl_invalid_page_key; } xl_invalid_page_key;
...@@ -51,7 +52,8 @@ static HTAB *invalid_page_tab = NULL; ...@@ -51,7 +52,8 @@ static HTAB *invalid_page_tab = NULL;
/* Log a reference to an invalid page */ /* Log a reference to an invalid page */
static void static void
log_invalid_page(RelFileNode node, BlockNumber blkno, bool present) log_invalid_page(RelFileNode node, ForkNumber forkno, BlockNumber blkno,
bool present)
{ {
xl_invalid_page_key key; xl_invalid_page_key key;
xl_invalid_page *hentry; xl_invalid_page *hentry;
...@@ -63,11 +65,11 @@ log_invalid_page(RelFileNode node, BlockNumber blkno, bool present) ...@@ -63,11 +65,11 @@ log_invalid_page(RelFileNode node, BlockNumber blkno, bool present)
* something about the XLOG record that generated the reference). * something about the XLOG record that generated the reference).
*/ */
if (present) if (present)
elog(DEBUG1, "page %u of relation %u/%u/%u is uninitialized", elog(DEBUG1, "page %u of relation %u/%u/%u/%u is uninitialized",
blkno, node.spcNode, node.dbNode, node.relNode); blkno, node.spcNode, node.dbNode, node.relNode, forkno);
else else
elog(DEBUG1, "page %u of relation %u/%u/%u does not exist", elog(DEBUG1, "page %u of relation %u/%u/%u/%u does not exist",
blkno, node.spcNode, node.dbNode, node.relNode); blkno, node.spcNode, node.dbNode, node.relNode, forkno);
if (invalid_page_tab == NULL) if (invalid_page_tab == NULL)
{ {
...@@ -87,6 +89,7 @@ log_invalid_page(RelFileNode node, BlockNumber blkno, bool present) ...@@ -87,6 +89,7 @@ log_invalid_page(RelFileNode node, BlockNumber blkno, bool present)
/* we currently assume xl_invalid_page_key contains no padding */ /* we currently assume xl_invalid_page_key contains no padding */
key.node = node; key.node = node;
key.forkno = forkno;
key.blkno = blkno; key.blkno = blkno;
hentry = (xl_invalid_page *) hentry = (xl_invalid_page *)
hash_search(invalid_page_tab, (void *) &key, HASH_ENTER, &found); hash_search(invalid_page_tab, (void *) &key, HASH_ENTER, &found);
...@@ -104,7 +107,7 @@ log_invalid_page(RelFileNode node, BlockNumber blkno, bool present) ...@@ -104,7 +107,7 @@ log_invalid_page(RelFileNode node, BlockNumber blkno, bool present)
/* Forget any invalid pages >= minblkno, because they've been dropped */ /* Forget any invalid pages >= minblkno, because they've been dropped */
static void static void
forget_invalid_pages(RelFileNode node, BlockNumber minblkno) forget_invalid_pages(RelFileNode node, ForkNumber forkno, BlockNumber minblkno)
{ {
HASH_SEQ_STATUS status; HASH_SEQ_STATUS status;
xl_invalid_page *hentry; xl_invalid_page *hentry;
...@@ -117,11 +120,12 @@ forget_invalid_pages(RelFileNode node, BlockNumber minblkno) ...@@ -117,11 +120,12 @@ forget_invalid_pages(RelFileNode node, BlockNumber minblkno)
while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL) while ((hentry = (xl_invalid_page *) hash_seq_search(&status)) != NULL)
{ {
if (RelFileNodeEquals(hentry->key.node, node) && if (RelFileNodeEquals(hentry->key.node, node) &&
hentry->key.forkno == forkno &&
hentry->key.blkno >= minblkno) hentry->key.blkno >= minblkno)
{ {
elog(DEBUG2, "page %u of relation %u/%u/%u has been dropped", elog(DEBUG2, "page %u of relation %u/%u/%u/%u has been dropped",
hentry->key.blkno, hentry->key.node.spcNode, hentry->key.blkno, hentry->key.node.spcNode,
hentry->key.node.dbNode, hentry->key.node.relNode); hentry->key.node.dbNode, hentry->key.node.relNode, forkno);
if (hash_search(invalid_page_tab, if (hash_search(invalid_page_tab,
(void *) &hentry->key, (void *) &hentry->key,
...@@ -223,6 +227,18 @@ XLogCheckInvalidPages(void) ...@@ -223,6 +227,18 @@ XLogCheckInvalidPages(void)
*/ */
Buffer Buffer
XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
{
return XLogReadBufferWithFork(rnode, MAIN_FORKNUM, blkno, init);
}
/*
* XLogReadBufferWithFork
* Like XLogReadBuffer, but for reading other relation forks than
* the main one.
*/
Buffer
XLogReadBufferWithFork(RelFileNode rnode, ForkNumber forknum,
BlockNumber blkno, bool init)
{ {
BlockNumber lastblock; BlockNumber lastblock;
Buffer buffer; Buffer buffer;
...@@ -241,21 +257,21 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) ...@@ -241,21 +257,21 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
* filesystem loses an inode during a crash. Better to write the data * filesystem loses an inode during a crash. Better to write the data
* until we are actually told to delete the file.) * until we are actually told to delete the file.)
*/ */
smgrcreate(smgr, false, true); smgrcreate(smgr, forknum, false, true);
lastblock = smgrnblocks(smgr); lastblock = smgrnblocks(smgr, forknum);
if (blkno < lastblock) if (blkno < lastblock)
{ {
/* page exists in file */ /* page exists in file */
buffer = ReadBufferWithoutRelcache(rnode, false, blkno, init); buffer = ReadBufferWithoutRelcache(rnode, false, forknum, blkno, init);
} }
else else
{ {
/* hm, page doesn't exist in file */ /* hm, page doesn't exist in file */
if (!init) if (!init)
{ {
log_invalid_page(rnode, blkno, false); log_invalid_page(rnode, forknum, blkno, false);
return InvalidBuffer; return InvalidBuffer;
} }
/* OK to extend the file */ /* OK to extend the file */
...@@ -266,7 +282,8 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) ...@@ -266,7 +282,8 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
{ {
if (buffer != InvalidBuffer) if (buffer != InvalidBuffer)
ReleaseBuffer(buffer); ReleaseBuffer(buffer);
buffer = ReadBufferWithoutRelcache(rnode, false, P_NEW, false); buffer = ReadBufferWithoutRelcache(rnode, false, forknum,
P_NEW, false);
lastblock++; lastblock++;
} }
Assert(BufferGetBlockNumber(buffer) == blkno); Assert(BufferGetBlockNumber(buffer) == blkno);
...@@ -282,7 +299,7 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init) ...@@ -282,7 +299,7 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
if (PageIsNew(page)) if (PageIsNew(page))
{ {
UnlockReleaseBuffer(buffer); UnlockReleaseBuffer(buffer);
log_invalid_page(rnode, blkno, true); log_invalid_page(rnode, forknum, blkno, true);
return InvalidBuffer; return InvalidBuffer;
} }
} }
...@@ -363,12 +380,9 @@ FreeFakeRelcacheEntry(Relation fakerel) ...@@ -363,12 +380,9 @@ FreeFakeRelcacheEntry(Relation fakerel)
* any open "invalid-page" records for the relation. * any open "invalid-page" records for the relation.
*/ */
void void
XLogDropRelation(RelFileNode rnode) XLogDropRelation(RelFileNode rnode, ForkNumber forknum)
{ {
/* Tell smgr to forget about this relation as well */ forget_invalid_pages(rnode, forknum, 0);
smgrclosenode(rnode);
forget_invalid_pages(rnode, 0);
} }
/* /*
...@@ -397,7 +411,8 @@ XLogDropDatabase(Oid dbid) ...@@ -397,7 +411,8 @@ XLogDropDatabase(Oid dbid)
* We need to clean up any open "invalid-page" records for the dropped pages. * We need to clean up any open "invalid-page" records for the dropped pages.
*/ */
void void
XLogTruncateRelation(RelFileNode rnode, BlockNumber nblocks) XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
BlockNumber nblocks)
{ {
forget_invalid_pages(rnode, nblocks); forget_invalid_pages(rnode, forkNum, nblocks);
} }
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/catalog/catalog.c,v 1.77 2008/06/19 00:46:04 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/catalog/catalog.c,v 1.78 2008/08/11 11:05:10 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -42,7 +42,8 @@ ...@@ -42,7 +42,8 @@
#include "utils/tqual.h" #include "utils/tqual.h"
#define OIDCHARS 10 /* max chars printed by %u */ #define OIDCHARS 10 /* max chars printed by %u */
#define FORKNUMCHARS 1 /* max chars for a fork number */
/* /*
...@@ -51,7 +52,7 @@ ...@@ -51,7 +52,7 @@
* Result is a palloc'd string. * Result is a palloc'd string.
*/ */
char * char *
relpath(RelFileNode rnode) relpath(RelFileNode rnode, ForkNumber forknum)
{ {
int pathlen; int pathlen;
char *path; char *path;
...@@ -60,26 +61,38 @@ relpath(RelFileNode rnode) ...@@ -60,26 +61,38 @@ relpath(RelFileNode rnode)
{ {
/* Shared system relations live in {datadir}/global */ /* Shared system relations live in {datadir}/global */
Assert(rnode.dbNode == 0); Assert(rnode.dbNode == 0);
pathlen = 7 + OIDCHARS + 1; pathlen = 7 + OIDCHARS + 1 + FORKNUMCHARS + 1;
path = (char *) palloc(pathlen); path = (char *) palloc(pathlen);
snprintf(path, pathlen, "global/%u", if (forknum != MAIN_FORKNUM)
rnode.relNode); snprintf(path, pathlen, "global/%u_%u",
rnode.relNode, forknum);
else
snprintf(path, pathlen, "global/%u", rnode.relNode);
} }
else if (rnode.spcNode == DEFAULTTABLESPACE_OID) else if (rnode.spcNode == DEFAULTTABLESPACE_OID)
{ {
/* The default tablespace is {datadir}/base */ /* The default tablespace is {datadir}/base */
pathlen = 5 + OIDCHARS + 1 + OIDCHARS + 1; pathlen = 5 + OIDCHARS + 1 + OIDCHARS + 1 + FORKNUMCHARS + 1;
path = (char *) palloc(pathlen); path = (char *) palloc(pathlen);
snprintf(path, pathlen, "base/%u/%u", if (forknum != MAIN_FORKNUM)
rnode.dbNode, rnode.relNode); snprintf(path, pathlen, "base/%u/%u_%u",
rnode.dbNode, rnode.relNode, forknum);
else
snprintf(path, pathlen, "base/%u/%u",
rnode.dbNode, rnode.relNode);
} }
else else
{ {
/* All other tablespaces are accessed via symlinks */ /* All other tablespaces are accessed via symlinks */
pathlen = 10 + OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1; pathlen = 10 + OIDCHARS + 1 + OIDCHARS + 1 + OIDCHARS + 1
+ FORKNUMCHARS + 1;
path = (char *) palloc(pathlen); path = (char *) palloc(pathlen);
snprintf(path, pathlen, "pg_tblspc/%u/%u/%u", if (forknum != MAIN_FORKNUM)
rnode.spcNode, rnode.dbNode, rnode.relNode); snprintf(path, pathlen, "pg_tblspc/%u/%u/%u_%u",
rnode.spcNode, rnode.dbNode, rnode.relNode, forknum);
else
snprintf(path, pathlen, "pg_tblspc/%u/%u/%u",
rnode.spcNode, rnode.dbNode, rnode.relNode);
} }
return path; return path;
} }
...@@ -431,7 +444,7 @@ GetNewRelFileNode(Oid reltablespace, bool relisshared, Relation pg_class) ...@@ -431,7 +444,7 @@ GetNewRelFileNode(Oid reltablespace, bool relisshared, Relation pg_class)
rnode.relNode = GetNewObjectId(); rnode.relNode = GetNewObjectId();
/* Check for existing file of same name */ /* Check for existing file of same name */
rpath = relpath(rnode); rpath = relpath(rnode, MAIN_FORKNUM);
fd = BasicOpenFile(rpath, O_RDONLY | PG_BINARY, 0); fd = BasicOpenFile(rpath, O_RDONLY | PG_BINARY, 0);
if (fd >= 0) if (fd >= 0)
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.336 2008/07/30 19:35:13 tgl Exp $ * $PostgreSQL: pgsql/src/backend/catalog/heap.c,v 1.337 2008/08/11 11:05:10 heikki Exp $
* *
* *
* INTERFACE ROUTINES * INTERFACE ROUTINES
...@@ -292,13 +292,16 @@ heap_create(const char *relname, ...@@ -292,13 +292,16 @@ heap_create(const char *relname,
shared_relation); shared_relation);
/* /*
* have the storage manager create the relation's disk file, if needed. * Have the storage manager create the relation's disk file, if needed.
*
* We only create storage for the main fork here. The caller is
* responsible for creating any additional forks if needed.
*/ */
if (create_storage) if (create_storage)
{ {
Assert(rel->rd_smgr == NULL); Assert(rel->rd_smgr == NULL);
RelationOpenSmgr(rel); RelationOpenSmgr(rel);
smgrcreate(rel->rd_smgr, rel->rd_istemp, false); smgrcreate(rel->rd_smgr, MAIN_FORKNUM, rel->rd_istemp, false);
} }
return rel; return rel;
...@@ -1385,13 +1388,18 @@ heap_drop_with_catalog(Oid relid) ...@@ -1385,13 +1388,18 @@ heap_drop_with_catalog(Oid relid)
rel = relation_open(relid, AccessExclusiveLock); rel = relation_open(relid, AccessExclusiveLock);
/* /*
* Schedule unlinking of the relation's physical file at commit. * Schedule unlinking of the relation's physical files at commit.
*/ */
if (rel->rd_rel->relkind != RELKIND_VIEW && if (rel->rd_rel->relkind != RELKIND_VIEW &&
rel->rd_rel->relkind != RELKIND_COMPOSITE_TYPE) rel->rd_rel->relkind != RELKIND_COMPOSITE_TYPE)
{ {
ForkNumber forknum;
RelationOpenSmgr(rel); RelationOpenSmgr(rel);
smgrscheduleunlink(rel->rd_smgr, rel->rd_istemp); for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
if (smgrexists(rel->rd_smgr, forknum))
smgrscheduleunlink(rel->rd_smgr, forknum, rel->rd_istemp);
RelationCloseSmgr(rel);
} }
/* /*
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.301 2008/08/10 19:02:33 tgl Exp $ * $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.302 2008/08/11 11:05:10 heikki Exp $
* *
* *
* INTERFACE ROUTINES * INTERFACE ROUTINES
...@@ -874,6 +874,7 @@ index_drop(Oid indexId) ...@@ -874,6 +874,7 @@ index_drop(Oid indexId)
Relation indexRelation; Relation indexRelation;
HeapTuple tuple; HeapTuple tuple;
bool hasexprs; bool hasexprs;
ForkNumber forknum;
/* /*
* To drop an index safely, we must grab exclusive lock on its parent * To drop an index safely, we must grab exclusive lock on its parent
...@@ -892,11 +893,14 @@ index_drop(Oid indexId) ...@@ -892,11 +893,14 @@ index_drop(Oid indexId)
userIndexRelation = index_open(indexId, AccessExclusiveLock); userIndexRelation = index_open(indexId, AccessExclusiveLock);
/* /*
* Schedule physical removal of the file * Schedule physical removal of the files
*/ */
RelationOpenSmgr(userIndexRelation); RelationOpenSmgr(userIndexRelation);
smgrscheduleunlink(userIndexRelation->rd_smgr, for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
userIndexRelation->rd_istemp); if (smgrexists(userIndexRelation->rd_smgr, forknum))
smgrscheduleunlink(userIndexRelation->rd_smgr, forknum,
userIndexRelation->rd_istemp);
RelationCloseSmgr(userIndexRelation);
/* /*
* Close and flush the index's relcache entry, to ensure relcache doesn't * Close and flush the index's relcache entry, to ensure relcache doesn't
...@@ -1260,6 +1264,7 @@ setNewRelfilenode(Relation relation, TransactionId freezeXid) ...@@ -1260,6 +1264,7 @@ setNewRelfilenode(Relation relation, TransactionId freezeXid)
Relation pg_class; Relation pg_class;
HeapTuple tuple; HeapTuple tuple;
Form_pg_class rd_rel; Form_pg_class rd_rel;
ForkNumber i;
/* Can't change relfilenode for nailed tables (indexes ok though) */ /* Can't change relfilenode for nailed tables (indexes ok though) */
Assert(!relation->rd_isnailed || Assert(!relation->rd_isnailed ||
...@@ -1290,18 +1295,29 @@ setNewRelfilenode(Relation relation, TransactionId freezeXid) ...@@ -1290,18 +1295,29 @@ setNewRelfilenode(Relation relation, TransactionId freezeXid)
RelationGetRelid(relation)); RelationGetRelid(relation));
rd_rel = (Form_pg_class) GETSTRUCT(tuple); rd_rel = (Form_pg_class) GETSTRUCT(tuple);
/* create another storage file. Is it a little ugly ? */ RelationOpenSmgr(relation);
/* NOTE: any conflict in relfilenode value will be caught here */
/*
* ... and create storage for corresponding forks in the new relfilenode.
*
* NOTE: any conflict in relfilenode value will be caught here
*/
newrnode = relation->rd_node; newrnode = relation->rd_node;
newrnode.relNode = newrelfilenode; newrnode.relNode = newrelfilenode;
srel = smgropen(newrnode); srel = smgropen(newrnode);
smgrcreate(srel, relation->rd_istemp, false);
smgrclose(srel);
/* schedule unlinking old relfilenode */ /* Create the main fork, like heap_create() does */
RelationOpenSmgr(relation); smgrcreate(srel, MAIN_FORKNUM, relation->rd_istemp, false);
smgrscheduleunlink(relation->rd_smgr, relation->rd_istemp);
/* schedule unlinking old files */
for (i = 0; i <= MAX_FORKNUM; i++)
{
if (smgrexists(relation->rd_smgr, i))
smgrscheduleunlink(relation->rd_smgr, i, relation->rd_istemp);
}
smgrclose(srel);
RelationCloseSmgr(relation);
/* update the pg_class row */ /* update the pg_class row */
rd_rel->relfilenode = newrelfilenode; rd_rel->relfilenode = newrelfilenode;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.261 2008/07/16 19:33:25 tgl Exp $ * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.262 2008/08/11 11:05:11 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -318,7 +318,8 @@ static void ATExecEnableDisableRule(Relation rel, char *rulename, ...@@ -318,7 +318,8 @@ static void ATExecEnableDisableRule(Relation rel, char *rulename,
char fires_when); char fires_when);
static void ATExecAddInherit(Relation rel, RangeVar *parent); static void ATExecAddInherit(Relation rel, RangeVar *parent);
static void ATExecDropInherit(Relation rel, RangeVar *parent); static void ATExecDropInherit(Relation rel, RangeVar *parent);
static void copy_relation_data(Relation rel, SMgrRelation dst); static void copy_relation_data(SMgrRelation rel, SMgrRelation dst,
ForkNumber forkNum, bool istemp);
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
...@@ -6483,6 +6484,7 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace) ...@@ -6483,6 +6484,7 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace)
Relation pg_class; Relation pg_class;
HeapTuple tuple; HeapTuple tuple;
Form_pg_class rd_rel; Form_pg_class rd_rel;
ForkNumber forkNum;
/* /*
* Need lock here in case we are recursing to toast table or index * Need lock here in case we are recursing to toast table or index
...@@ -6538,26 +6540,42 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace) ...@@ -6538,26 +6540,42 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace)
elog(ERROR, "cache lookup failed for relation %u", tableOid); elog(ERROR, "cache lookup failed for relation %u", tableOid);
rd_rel = (Form_pg_class) GETSTRUCT(tuple); rd_rel = (Form_pg_class) GETSTRUCT(tuple);
/* create another storage file. Is it a little ugly ? */ /*
/* NOTE: any conflict in relfilenode value will be caught here */ * Since we copy the file directly without looking at the shared buffers,
* we'd better first flush out any pages of the source relation that are
* in shared buffers. We assume no new changes will be made while we are
* holding exclusive lock on the rel.
*/
FlushRelationBuffers(rel);
/* Open old and new relation */
newrnode = rel->rd_node; newrnode = rel->rd_node;
newrnode.spcNode = newTableSpace; newrnode.spcNode = newTableSpace;
dstrel = smgropen(newrnode); dstrel = smgropen(newrnode);
smgrcreate(dstrel, rel->rd_istemp, false);
/* copy relation data to the new physical file */
copy_relation_data(rel, dstrel);
/* schedule unlinking old physical file */
RelationOpenSmgr(rel); RelationOpenSmgr(rel);
smgrscheduleunlink(rel->rd_smgr, rel->rd_istemp);
/* /*
* Now drop smgr references. The source was already dropped by * Create and copy all forks of the relation, and schedule unlinking
* smgrscheduleunlink. * of old physical files.
*
* NOTE: any conflict in relfilenode value will be caught in
* smgrcreate() below.
*/ */
for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
{
if (smgrexists(rel->rd_smgr, forkNum))
{
smgrcreate(dstrel, forkNum, rel->rd_istemp, false);
copy_relation_data(rel->rd_smgr, dstrel, forkNum, rel->rd_istemp);
smgrscheduleunlink(rel->rd_smgr, forkNum, rel->rd_istemp);
}
}
/* Close old and new relation */
smgrclose(dstrel); smgrclose(dstrel);
RelationCloseSmgr(rel);
/* update the pg_class row */ /* update the pg_class row */
rd_rel->reltablespace = (newTableSpace == MyDatabaseTableSpace) ? InvalidOid : newTableSpace; rd_rel->reltablespace = (newTableSpace == MyDatabaseTableSpace) ? InvalidOid : newTableSpace;
...@@ -6584,47 +6602,37 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace) ...@@ -6584,47 +6602,37 @@ ATExecSetTableSpace(Oid tableOid, Oid newTableSpace)
* Copy data, block by block * Copy data, block by block
*/ */
static void static void
copy_relation_data(Relation rel, SMgrRelation dst) copy_relation_data(SMgrRelation src, SMgrRelation dst,
ForkNumber forkNum, bool istemp)
{ {
SMgrRelation src;
bool use_wal; bool use_wal;
BlockNumber nblocks; BlockNumber nblocks;
BlockNumber blkno; BlockNumber blkno;
char buf[BLCKSZ]; char buf[BLCKSZ];
Page page = (Page) buf; Page page = (Page) buf;
/*
* Since we copy the file directly without looking at the shared buffers,
* we'd better first flush out any pages of the source relation that are
* in shared buffers. We assume no new changes will be made while we are
* holding exclusive lock on the rel.
*/
FlushRelationBuffers(rel);
/* /*
* We need to log the copied data in WAL iff WAL archiving is enabled AND * We need to log the copied data in WAL iff WAL archiving is enabled AND
* it's not a temp rel. * it's not a temp rel.
*/ */
use_wal = XLogArchivingActive() && !rel->rd_istemp; use_wal = XLogArchivingActive() && !istemp;
nblocks = RelationGetNumberOfBlocks(rel); nblocks = smgrnblocks(src, forkNum);
/* RelationGetNumberOfBlocks will certainly have opened rd_smgr */
src = rel->rd_smgr;
for (blkno = 0; blkno < nblocks; blkno++) for (blkno = 0; blkno < nblocks; blkno++)
{ {
smgrread(src, blkno, buf); smgrread(src, forkNum, blkno, buf);
/* XLOG stuff */ /* XLOG stuff */
if (use_wal) if (use_wal)
log_newpage(&dst->smgr_rnode, blkno, page); log_newpage(&dst->smgr_rnode, forkNum, blkno, page);
/* /*
* Now write the page. We say isTemp = true even if it's not a temp * Now write the page. We say isTemp = true even if it's not a temp
* rel, because there's no need for smgr to schedule an fsync for this * rel, because there's no need for smgr to schedule an fsync for this
* write; we'll do it ourselves below. * write; we'll do it ourselves below.
*/ */
smgrextend(dst, blkno, buf, true); smgrextend(dst, forkNum, blkno, buf, true);
} }
/* /*
...@@ -6641,8 +6649,8 @@ copy_relation_data(Relation rel, SMgrRelation dst) ...@@ -6641,8 +6649,8 @@ copy_relation_data(Relation rel, SMgrRelation dst)
* wouldn't replay our earlier WAL entries. If we do not fsync those pages * wouldn't replay our earlier WAL entries. If we do not fsync those pages
* here, they might still not be on disk when the crash occurs. * here, they might still not be on disk when the crash occurs.
*/ */
if (!rel->rd_istemp) if (!istemp)
smgrimmedsync(dst); smgrimmedsync(dst, forkNum);
} }
/* /*
......
...@@ -37,7 +37,7 @@ ...@@ -37,7 +37,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.50 2008/05/12 00:00:50 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/postmaster/bgwriter.c,v 1.51 2008/08/11 11:05:11 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -113,6 +113,7 @@ ...@@ -113,6 +113,7 @@
typedef struct typedef struct
{ {
RelFileNode rnode; RelFileNode rnode;
ForkNumber forknum;
BlockNumber segno; /* see md.c for special values */ BlockNumber segno; /* see md.c for special values */
/* might add a real request-type field later; not needed yet */ /* might add a real request-type field later; not needed yet */
} BgWriterRequest; } BgWriterRequest;
...@@ -990,7 +991,7 @@ RequestCheckpoint(int flags) ...@@ -990,7 +991,7 @@ RequestCheckpoint(int flags)
* than we have to here. * than we have to here.
*/ */
bool bool
ForwardFsyncRequest(RelFileNode rnode, BlockNumber segno) ForwardFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
{ {
BgWriterRequest *request; BgWriterRequest *request;
...@@ -1067,7 +1068,7 @@ AbsorbFsyncRequests(void) ...@@ -1067,7 +1068,7 @@ AbsorbFsyncRequests(void)
LWLockRelease(BgWriterCommLock); LWLockRelease(BgWriterCommLock);
for (request = requests; n > 0; request++, n--) for (request = requests; n > 0; request++, n--)
RememberFsyncRequest(request->rnode, request->segno); RememberFsyncRequest(request->rnode, request->forknum, request->segno);
if (requests) if (requests)
pfree(requests); pfree(requests);
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/rewrite/rewriteDefine.c,v 1.127 2008/06/19 00:46:05 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/rewrite/rewriteDefine.c,v 1.128 2008/08/11 11:05:11 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -482,8 +482,14 @@ DefineQueryRewrite(char *rulename, ...@@ -482,8 +482,14 @@ DefineQueryRewrite(char *rulename,
*/ */
if (RelisBecomingView) if (RelisBecomingView)
{ {
ForkNumber forknum;
RelationOpenSmgr(event_relation); RelationOpenSmgr(event_relation);
smgrscheduleunlink(event_relation->rd_smgr, event_relation->rd_istemp); for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
if (smgrexists(event_relation->rd_smgr, forknum))
smgrscheduleunlink(event_relation->rd_smgr, forknum,
event_relation->rd_istemp);
RelationCloseSmgr(event_relation);
} }
/* Close rel, but keep lock till commit... */ /* Close rel, but keep lock till commit... */
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.236 2008/08/05 15:09:04 tgl Exp $ * $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.237 2008/08/11 11:05:11 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -78,9 +78,10 @@ static bool IsForInput; ...@@ -78,9 +78,10 @@ static bool IsForInput;
static volatile BufferDesc *PinCountWaitBuf = NULL; static volatile BufferDesc *PinCountWaitBuf = NULL;
static Buffer ReadBuffer_relcache(Relation reln, BlockNumber blockNum, static Buffer ReadBuffer_relcache(Relation reln, ForkNumber forkNum,
bool zeroPage, BufferAccessStrategy strategy); BlockNumber blockNum, bool zeroPage, BufferAccessStrategy strategy);
static Buffer ReadBuffer_common(SMgrRelation reln, bool isLocalBuf, BlockNumber blockNum, static Buffer ReadBuffer_common(SMgrRelation reln, bool isLocalBuf,
ForkNumber forkNum, BlockNumber blockNum,
bool zeroPage, BufferAccessStrategy strategy, bool *hit); bool zeroPage, BufferAccessStrategy strategy, bool *hit);
static bool PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy); static bool PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy);
static void PinBuffer_Locked(volatile BufferDesc *buf); static void PinBuffer_Locked(volatile BufferDesc *buf);
...@@ -92,7 +93,8 @@ static bool StartBufferIO(volatile BufferDesc *buf, bool forInput); ...@@ -92,7 +93,8 @@ static bool StartBufferIO(volatile BufferDesc *buf, bool forInput);
static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty, static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
int set_flag_bits); int set_flag_bits);
static void buffer_write_error_callback(void *arg); static void buffer_write_error_callback(void *arg);
static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, BlockNumber blockNum, static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
BlockNumber blockNum,
BufferAccessStrategy strategy, BufferAccessStrategy strategy,
bool *foundPtr); bool *foundPtr);
static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln); static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);
...@@ -117,7 +119,17 @@ static void AtProcExit_Buffers(int code, Datum arg); ...@@ -117,7 +119,17 @@ static void AtProcExit_Buffers(int code, Datum arg);
Buffer Buffer
ReadBuffer(Relation reln, BlockNumber blockNum) ReadBuffer(Relation reln, BlockNumber blockNum)
{ {
return ReadBuffer_relcache(reln, blockNum, false, NULL); return ReadBuffer_relcache(reln, MAIN_FORKNUM, blockNum, false, NULL);
}
/*
* ReadBufferWithFork -- same as ReadBuffer, but for accessing relation
* forks other than MAIN_FORKNUM.
*/
Buffer
ReadBufferWithFork(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
{
return ReadBuffer_relcache(reln, forkNum, blockNum, false, NULL);
} }
/* /*
...@@ -128,7 +140,7 @@ Buffer ...@@ -128,7 +140,7 @@ Buffer
ReadBufferWithStrategy(Relation reln, BlockNumber blockNum, ReadBufferWithStrategy(Relation reln, BlockNumber blockNum,
BufferAccessStrategy strategy) BufferAccessStrategy strategy)
{ {
return ReadBuffer_relcache(reln, blockNum, false, strategy); return ReadBuffer_relcache(reln, MAIN_FORKNUM, blockNum, false, strategy);
} }
/* /*
...@@ -143,32 +155,32 @@ ReadBufferWithStrategy(Relation reln, BlockNumber blockNum, ...@@ -143,32 +155,32 @@ ReadBufferWithStrategy(Relation reln, BlockNumber blockNum,
* the page is modified and written out. P_NEW is OK, though. * the page is modified and written out. P_NEW is OK, though.
*/ */
Buffer Buffer
ReadOrZeroBuffer(Relation reln, BlockNumber blockNum) ReadOrZeroBuffer(Relation reln, ForkNumber forkNum, BlockNumber blockNum)
{ {
return ReadBuffer_relcache(reln, blockNum, true, NULL); return ReadBuffer_relcache(reln, forkNum, blockNum, true, NULL);
} }
/* /*
* ReadBufferWithoutRelcache -- like ReadBuffer, but doesn't require a * ReadBufferWithoutRelcache -- like ReadBuffer, but doesn't require a
* relcache entry for the relation. If zeroPage is true, this behaves * relcache entry for the relation. If zeroPage is true, this behaves
* like ReadOrZeroBuffer rather than ReadBuffer. * like ReadOrZeroBuffer rather than ReadBuffer.
*/ */
Buffer Buffer
ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp, ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp,
BlockNumber blockNum, bool zeroPage) ForkNumber forkNum, BlockNumber blockNum, bool zeroPage)
{ {
bool hit; bool hit;
SMgrRelation smgr = smgropen(rnode); SMgrRelation smgr = smgropen(rnode);
return ReadBuffer_common(smgr, isTemp, blockNum, zeroPage, NULL, &hit); return ReadBuffer_common(smgr, isTemp, forkNum, blockNum, zeroPage, NULL, &hit);
} }
/* /*
* ReadBuffer_relcache -- common logic for ReadBuffer-variants that * ReadBuffer_relcache -- common logic for ReadBuffer-variants that
* operate on a Relation. * operate on a Relation.
*/ */
static Buffer static Buffer
ReadBuffer_relcache(Relation reln, BlockNumber blockNum, ReadBuffer_relcache(Relation reln, ForkNumber forkNum, BlockNumber blockNum,
bool zeroPage, BufferAccessStrategy strategy) bool zeroPage, BufferAccessStrategy strategy)
{ {
bool hit; bool hit;
...@@ -182,7 +194,7 @@ ReadBuffer_relcache(Relation reln, BlockNumber blockNum, ...@@ -182,7 +194,7 @@ ReadBuffer_relcache(Relation reln, BlockNumber blockNum,
* hit or miss. * hit or miss.
*/ */
pgstat_count_buffer_read(reln); pgstat_count_buffer_read(reln);
buf = ReadBuffer_common(reln->rd_smgr, reln->rd_istemp, blockNum, buf = ReadBuffer_common(reln->rd_smgr, reln->rd_istemp, forkNum, blockNum,
zeroPage, strategy, &hit); zeroPage, strategy, &hit);
if (hit) if (hit)
pgstat_count_buffer_hit(reln); pgstat_count_buffer_hit(reln);
...@@ -195,8 +207,9 @@ ReadBuffer_relcache(Relation reln, BlockNumber blockNum, ...@@ -195,8 +207,9 @@ ReadBuffer_relcache(Relation reln, BlockNumber blockNum,
* *hit is set to true if the request was satisfied from shared buffer cache. * *hit is set to true if the request was satisfied from shared buffer cache.
*/ */
static Buffer static Buffer
ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum, ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, ForkNumber forkNum,
bool zeroPage, BufferAccessStrategy strategy, bool *hit) BlockNumber blockNum, bool zeroPage,
BufferAccessStrategy strategy, bool *hit)
{ {
volatile BufferDesc *bufHdr; volatile BufferDesc *bufHdr;
Block bufBlock; Block bufBlock;
...@@ -212,7 +225,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum, ...@@ -212,7 +225,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
/* Substitute proper block number if caller asked for P_NEW */ /* Substitute proper block number if caller asked for P_NEW */
if (isExtend) if (isExtend)
blockNum = smgrnblocks(smgr); blockNum = smgrnblocks(smgr, forkNum);
TRACE_POSTGRESQL_BUFFER_READ_START(blockNum, smgr->smgr_rnode.spcNode, TRACE_POSTGRESQL_BUFFER_READ_START(blockNum, smgr->smgr_rnode.spcNode,
smgr->smgr_rnode.dbNode, smgr->smgr_rnode.relNode, isLocalBuf); smgr->smgr_rnode.dbNode, smgr->smgr_rnode.relNode, isLocalBuf);
...@@ -220,7 +233,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum, ...@@ -220,7 +233,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
if (isLocalBuf) if (isLocalBuf)
{ {
ReadLocalBufferCount++; ReadLocalBufferCount++;
bufHdr = LocalBufferAlloc(smgr, blockNum, &found); bufHdr = LocalBufferAlloc(smgr, forkNum, blockNum, &found);
if (found) if (found)
{ {
LocalBufferHitCount++; LocalBufferHitCount++;
...@@ -239,7 +252,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum, ...@@ -239,7 +252,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
* lookup the buffer. IO_IN_PROGRESS is set if the requested block is * lookup the buffer. IO_IN_PROGRESS is set if the requested block is
* not currently in memory. * not currently in memory.
*/ */
bufHdr = BufferAlloc(smgr, blockNum, strategy, &found); bufHdr = BufferAlloc(smgr, forkNum, blockNum, strategy, &found);
if (found) if (found)
{ {
BufferHitCount++; BufferHitCount++;
...@@ -341,7 +354,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum, ...@@ -341,7 +354,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
{ {
/* new buffers are zero-filled */ /* new buffers are zero-filled */
MemSet((char *) bufBlock, 0, BLCKSZ); MemSet((char *) bufBlock, 0, BLCKSZ);
smgrextend(smgr, blockNum, (char *) bufBlock, isLocalBuf); smgrextend(smgr, forkNum, blockNum, (char *) bufBlock, isLocalBuf);
} }
else else
{ {
...@@ -353,7 +366,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum, ...@@ -353,7 +366,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
MemSet((char *) bufBlock, 0, BLCKSZ); MemSet((char *) bufBlock, 0, BLCKSZ);
else else
{ {
smgrread(smgr, blockNum, (char *) bufBlock); smgrread(smgr, forkNum, blockNum, (char *) bufBlock);
/* check for garbage data */ /* check for garbage data */
if (!PageHeaderIsValid((PageHeader) bufBlock)) if (!PageHeaderIsValid((PageHeader) bufBlock))
...@@ -363,7 +376,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum, ...@@ -363,7 +376,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
ereport(WARNING, ereport(WARNING,
(errcode(ERRCODE_DATA_CORRUPTED), (errcode(ERRCODE_DATA_CORRUPTED),
errmsg("invalid page header in block %u of relation %u/%u/%u; zeroing out page", errmsg("invalid page header in block %u of relation %u/%u/%u; zeroing out page",
blockNum, blockNum,
smgr->smgr_rnode.spcNode, smgr->smgr_rnode.spcNode,
smgr->smgr_rnode.dbNode, smgr->smgr_rnode.dbNode,
smgr->smgr_rnode.relNode))); smgr->smgr_rnode.relNode)));
...@@ -421,7 +434,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum, ...@@ -421,7 +434,7 @@ ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, BlockNumber blockNum,
* No locks are held either at entry or exit. * No locks are held either at entry or exit.
*/ */
static volatile BufferDesc * static volatile BufferDesc *
BufferAlloc(SMgrRelation smgr, BufferAlloc(SMgrRelation smgr, ForkNumber forkNum,
BlockNumber blockNum, BlockNumber blockNum,
BufferAccessStrategy strategy, BufferAccessStrategy strategy,
bool *foundPtr) bool *foundPtr)
...@@ -438,7 +451,7 @@ BufferAlloc(SMgrRelation smgr, ...@@ -438,7 +451,7 @@ BufferAlloc(SMgrRelation smgr,
bool valid; bool valid;
/* create a tag so we can lookup the buffer */ /* create a tag so we can lookup the buffer */
INIT_BUFFERTAG(newTag, smgr->smgr_rnode, blockNum); INIT_BUFFERTAG(newTag, smgr->smgr_rnode, forkNum, blockNum);
/* determine its hash code and partition lock ID */ /* determine its hash code and partition lock ID */
newHash = BufTableHashCode(&newTag); newHash = BufTableHashCode(&newTag);
...@@ -903,6 +916,7 @@ ReleaseAndReadBuffer(Buffer buffer, ...@@ -903,6 +916,7 @@ ReleaseAndReadBuffer(Buffer buffer,
Relation relation, Relation relation,
BlockNumber blockNum) BlockNumber blockNum)
{ {
ForkNumber forkNum = MAIN_FORKNUM;
volatile BufferDesc *bufHdr; volatile BufferDesc *bufHdr;
if (BufferIsValid(buffer)) if (BufferIsValid(buffer))
...@@ -912,7 +926,8 @@ ReleaseAndReadBuffer(Buffer buffer, ...@@ -912,7 +926,8 @@ ReleaseAndReadBuffer(Buffer buffer,
Assert(LocalRefCount[-buffer - 1] > 0); Assert(LocalRefCount[-buffer - 1] > 0);
bufHdr = &LocalBufferDescriptors[-buffer - 1]; bufHdr = &LocalBufferDescriptors[-buffer - 1];
if (bufHdr->tag.blockNum == blockNum && if (bufHdr->tag.blockNum == blockNum &&
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node)) RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
bufHdr->tag.forkNum == forkNum)
return buffer; return buffer;
ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer); ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);
LocalRefCount[-buffer - 1]--; LocalRefCount[-buffer - 1]--;
...@@ -923,7 +938,8 @@ ReleaseAndReadBuffer(Buffer buffer, ...@@ -923,7 +938,8 @@ ReleaseAndReadBuffer(Buffer buffer,
bufHdr = &BufferDescriptors[buffer - 1]; bufHdr = &BufferDescriptors[buffer - 1];
/* we have pin, so it's ok to examine tag without spinlock */ /* we have pin, so it's ok to examine tag without spinlock */
if (bufHdr->tag.blockNum == blockNum && if (bufHdr->tag.blockNum == blockNum &&
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node)) RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node) &&
bufHdr->tag.forkNum == forkNum)
return buffer; return buffer;
UnpinBuffer(bufHdr, true); UnpinBuffer(bufHdr, true);
} }
...@@ -1734,23 +1750,28 @@ BufferGetBlockNumber(Buffer buffer) ...@@ -1734,23 +1750,28 @@ BufferGetBlockNumber(Buffer buffer)
} }
/* /*
* BufferGetFileNode * BufferGetTag
* Returns the relation ID (RelFileNode) associated with a buffer. * Returns the relfilenode, fork number and block number associated with
* * a buffer.
* This should make the same checks as BufferGetBlockNumber, but since the
* two are generally called together, we don't bother.
*/ */
RelFileNode void
BufferGetFileNode(Buffer buffer) BufferGetTag(Buffer buffer, RelFileNode *rnode, ForkNumber *forknum,
BlockNumber *blknum)
{ {
volatile BufferDesc *bufHdr; volatile BufferDesc *bufHdr;
/* Do the same checks as BufferGetBlockNumber. */
Assert(BufferIsPinned(buffer));
if (BufferIsLocal(buffer)) if (BufferIsLocal(buffer))
bufHdr = &(LocalBufferDescriptors[-buffer - 1]); bufHdr = &(LocalBufferDescriptors[-buffer - 1]);
else else
bufHdr = &BufferDescriptors[buffer - 1]; bufHdr = &BufferDescriptors[buffer - 1];
return bufHdr->tag.rnode; /* pinned, so OK to read tag without spinlock */
*rnode = bufHdr->tag.rnode;
*forknum = bufHdr->tag.forkNum;
*blknum = bufHdr->tag.blockNum;
} }
/* /*
...@@ -1820,6 +1841,7 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln) ...@@ -1820,6 +1841,7 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
UnlockBufHdr(buf); UnlockBufHdr(buf);
smgrwrite(reln, smgrwrite(reln,
buf->tag.forkNum,
buf->tag.blockNum, buf->tag.blockNum,
(char *) BufHdrGetBlock(buf), (char *) BufHdrGetBlock(buf),
false); false);
...@@ -1849,7 +1871,7 @@ RelationGetNumberOfBlocks(Relation relation) ...@@ -1849,7 +1871,7 @@ RelationGetNumberOfBlocks(Relation relation)
/* Open it at the smgr level if not already done */ /* Open it at the smgr level if not already done */
RelationOpenSmgr(relation); RelationOpenSmgr(relation);
return smgrnblocks(relation->rd_smgr); return smgrnblocks(relation->rd_smgr, MAIN_FORKNUM);
} }
/* /*
...@@ -1869,7 +1891,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks) ...@@ -1869,7 +1891,7 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
rel->rd_targblock = InvalidBlockNumber; rel->rd_targblock = InvalidBlockNumber;
/* Do the real work */ /* Do the real work */
smgrtruncate(rel->rd_smgr, nblocks, rel->rd_istemp); smgrtruncate(rel->rd_smgr, MAIN_FORKNUM, nblocks, rel->rd_istemp);
} }
/* --------------------------------------------------------------------- /* ---------------------------------------------------------------------
...@@ -1899,14 +1921,14 @@ RelationTruncate(Relation rel, BlockNumber nblocks) ...@@ -1899,14 +1921,14 @@ RelationTruncate(Relation rel, BlockNumber nblocks)
* -------------------------------------------------------------------- * --------------------------------------------------------------------
*/ */
void void
DropRelFileNodeBuffers(RelFileNode rnode, bool istemp, DropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum, bool istemp,
BlockNumber firstDelBlock) BlockNumber firstDelBlock)
{ {
int i; int i;
if (istemp) if (istemp)
{ {
DropRelFileNodeLocalBuffers(rnode, firstDelBlock); DropRelFileNodeLocalBuffers(rnode, forkNum, firstDelBlock);
return; return;
} }
...@@ -1916,6 +1938,7 @@ DropRelFileNodeBuffers(RelFileNode rnode, bool istemp, ...@@ -1916,6 +1938,7 @@ DropRelFileNodeBuffers(RelFileNode rnode, bool istemp,
LockBufHdr(bufHdr); LockBufHdr(bufHdr);
if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) && if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
bufHdr->tag.forkNum == forkNum &&
bufHdr->tag.blockNum >= firstDelBlock) bufHdr->tag.blockNum >= firstDelBlock)
InvalidateBuffer(bufHdr); /* releases spinlock */ InvalidateBuffer(bufHdr); /* releases spinlock */
else else
...@@ -2055,6 +2078,7 @@ FlushRelationBuffers(Relation rel) ...@@ -2055,6 +2078,7 @@ FlushRelationBuffers(Relation rel)
error_context_stack = &errcontext; error_context_stack = &errcontext;
smgrwrite(rel->rd_smgr, smgrwrite(rel->rd_smgr,
bufHdr->tag.forkNum,
bufHdr->tag.blockNum, bufHdr->tag.blockNum,
(char *) LocalBufHdrGetBlock(bufHdr), (char *) LocalBufHdrGetBlock(bufHdr),
true); true);
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.80 2008/06/12 09:12:31 heikki Exp $ * $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.81 2008/08/11 11:05:11 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -61,7 +61,8 @@ static Block GetLocalBufferStorage(void); ...@@ -61,7 +61,8 @@ static Block GetLocalBufferStorage(void);
* (hence, usage_count is always advanced). * (hence, usage_count is always advanced).
*/ */
BufferDesc * BufferDesc *
LocalBufferAlloc(SMgrRelation smgr, BlockNumber blockNum, bool *foundPtr) LocalBufferAlloc(SMgrRelation smgr, ForkNumber forkNum, BlockNumber blockNum,
bool *foundPtr)
{ {
BufferTag newTag; /* identity of requested block */ BufferTag newTag; /* identity of requested block */
LocalBufferLookupEnt *hresult; LocalBufferLookupEnt *hresult;
...@@ -70,7 +71,7 @@ LocalBufferAlloc(SMgrRelation smgr, BlockNumber blockNum, bool *foundPtr) ...@@ -70,7 +71,7 @@ LocalBufferAlloc(SMgrRelation smgr, BlockNumber blockNum, bool *foundPtr)
int trycounter; int trycounter;
bool found; bool found;
INIT_BUFFERTAG(newTag, smgr->smgr_rnode, blockNum); INIT_BUFFERTAG(newTag, smgr->smgr_rnode, forkNum, blockNum);
/* Initialize local buffers if first request in this session */ /* Initialize local buffers if first request in this session */
if (LocalBufHash == NULL) if (LocalBufHash == NULL)
...@@ -162,6 +163,7 @@ LocalBufferAlloc(SMgrRelation smgr, BlockNumber blockNum, bool *foundPtr) ...@@ -162,6 +163,7 @@ LocalBufferAlloc(SMgrRelation smgr, BlockNumber blockNum, bool *foundPtr)
/* And write... */ /* And write... */
smgrwrite(oreln, smgrwrite(oreln,
bufHdr->tag.forkNum,
bufHdr->tag.blockNum, bufHdr->tag.blockNum,
(char *) LocalBufHdrGetBlock(bufHdr), (char *) LocalBufHdrGetBlock(bufHdr),
true); true);
...@@ -250,7 +252,8 @@ MarkLocalBufferDirty(Buffer buffer) ...@@ -250,7 +252,8 @@ MarkLocalBufferDirty(Buffer buffer)
* See DropRelFileNodeBuffers in bufmgr.c for more notes. * See DropRelFileNodeBuffers in bufmgr.c for more notes.
*/ */
void void
DropRelFileNodeLocalBuffers(RelFileNode rnode, BlockNumber firstDelBlock) DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
BlockNumber firstDelBlock)
{ {
int i; int i;
...@@ -261,6 +264,7 @@ DropRelFileNodeLocalBuffers(RelFileNode rnode, BlockNumber firstDelBlock) ...@@ -261,6 +264,7 @@ DropRelFileNodeLocalBuffers(RelFileNode rnode, BlockNumber firstDelBlock)
if ((bufHdr->flags & BM_TAG_VALID) && if ((bufHdr->flags & BM_TAG_VALID) &&
RelFileNodeEquals(bufHdr->tag.rnode, rnode) && RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
bufHdr->tag.forkNum == forkNum &&
bufHdr->tag.blockNum >= firstDelBlock) bufHdr->tag.blockNum >= firstDelBlock)
{ {
if (LocalRefCount[i] != 0) if (LocalRefCount[i] != 0)
......
$PostgreSQL: pgsql/src/backend/storage/smgr/README,v 1.5 2008/03/21 13:23:28 momjian Exp $ $PostgreSQL: pgsql/src/backend/storage/smgr/README,v 1.6 2008/08/11 11:05:11 heikki Exp $
Storage Manager Storage Manager
=============== ===============
...@@ -32,3 +32,20 @@ The files in this directory, and their contents, are ...@@ -32,3 +32,20 @@ The files in this directory, and their contents, are
md.c The magnetic disk storage manager. md.c The magnetic disk storage manager.
Note that md.c in turn relies on src/backend/storage/file/fd.c. Note that md.c in turn relies on src/backend/storage/file/fd.c.
Relation Forks
==============
Since 8.4, a single smgr relation can be comprised of multiple physical
files, called relation forks. This allows storing additional metadata like
Free Space information in additional forks, which can be grown and truncated
independently of the main data file, while still treating it all as a single
physical relation in system catalogs.
It is assumed that the main fork, fork number 0 or MAIN_FORKNUM, always
exists. Fork numbers are assigned in src/include/storage/relfilenode.h.
Functions in smgr.c and md.c take an extra fork number argument, in addition
to relfilenode and block number, to identify which relation fork you want to
access. Since most code wants to access the main fork, a shortcut version of
ReadBuffer that accesses MAIN_FORKNUM is provided in the buffer manager for
convenience.
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.138 2008/05/02 01:08:27 tgl Exp $ * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.139 2008/08/11 11:05:11 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "postmaster/bgwriter.h" #include "postmaster/bgwriter.h"
#include "storage/fd.h" #include "storage/fd.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/relfilenode.h"
#include "storage/smgr.h" #include "storage/smgr.h"
#include "utils/hsearch.h" #include "utils/hsearch.h"
#include "utils/memutils.h" #include "utils/memutils.h"
...@@ -118,6 +119,7 @@ static MemoryContext MdCxt; /* context for all md.c allocations */ ...@@ -118,6 +119,7 @@ static MemoryContext MdCxt; /* context for all md.c allocations */
typedef struct typedef struct
{ {
RelFileNode rnode; /* the targeted relation */ RelFileNode rnode; /* the targeted relation */
ForkNumber forknum;
BlockNumber segno; /* which segment */ BlockNumber segno; /* which segment */
} PendingOperationTag; } PendingOperationTag;
...@@ -151,15 +153,18 @@ typedef enum /* behavior for mdopen & _mdfd_getseg */ ...@@ -151,15 +153,18 @@ typedef enum /* behavior for mdopen & _mdfd_getseg */
} ExtensionBehavior; } ExtensionBehavior;
/* local routines */ /* local routines */
static MdfdVec *mdopen(SMgrRelation reln, ExtensionBehavior behavior); static MdfdVec *mdopen(SMgrRelation reln, ForkNumber forknum,
static void register_dirty_segment(SMgrRelation reln, MdfdVec *seg); ExtensionBehavior behavior);
static void register_dirty_segment(SMgrRelation reln, ForkNumber forknum,
MdfdVec *seg);
static void register_unlink(RelFileNode rnode); static void register_unlink(RelFileNode rnode);
static MdfdVec *_fdvec_alloc(void); static MdfdVec *_fdvec_alloc(void);
static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno, static MdfdVec *_mdfd_openseg(SMgrRelation reln, ForkNumber forkno,
int oflags); BlockNumber segno, int oflags);
static MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno, static MdfdVec *_mdfd_getseg(SMgrRelation reln, ForkNumber forkno,
bool isTemp, ExtensionBehavior behavior); BlockNumber blkno, bool isTemp, ExtensionBehavior behavior);
static BlockNumber _mdnblocks(SMgrRelation reln, MdfdVec *seg); static BlockNumber _mdnblocks(SMgrRelation reln, ForkNumber forknum,
MdfdVec *seg);
/* /*
...@@ -197,23 +202,40 @@ mdinit(void) ...@@ -197,23 +202,40 @@ mdinit(void)
} }
} }
/*
* mdexists() -- Does the physical file exist?
*
* Note: this will return true for lingering files, with pending deletions
*/
bool
mdexists(SMgrRelation reln, ForkNumber forkNum)
{
/*
* Close it first, to ensure that we notice if the fork has been
* unlinked since we opened it.
*/
mdclose(reln, forkNum);
return (mdopen(reln, forkNum, EXTENSION_RETURN_NULL) != NULL);
}
/* /*
* mdcreate() -- Create a new relation on magnetic disk. * mdcreate() -- Create a new relation on magnetic disk.
* *
* If isRedo is true, it's okay for the relation to exist already. * If isRedo is true, it's okay for the relation to exist already.
*/ */
void void
mdcreate(SMgrRelation reln, bool isRedo) mdcreate(SMgrRelation reln, ForkNumber forkNum, bool isRedo)
{ {
char *path; char *path;
File fd; File fd;
if (isRedo && reln->md_fd != NULL) if (isRedo && reln->md_fd[forkNum] != NULL)
return; /* created and opened already... */ return; /* created and opened already... */
Assert(reln->md_fd == NULL); Assert(reln->md_fd[forkNum] == NULL);
path = relpath(reln->smgr_rnode); path = relpath(reln->smgr_rnode, forkNum);
fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600); fd = PathNameOpenFile(path, O_RDWR | O_CREAT | O_EXCL | PG_BINARY, 0600);
...@@ -236,20 +258,21 @@ mdcreate(SMgrRelation reln, bool isRedo) ...@@ -236,20 +258,21 @@ mdcreate(SMgrRelation reln, bool isRedo)
errno = save_errno; errno = save_errno;
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not create relation %u/%u/%u: %m", errmsg("could not create relation %u/%u/%u/%u: %m",
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode))); reln->smgr_rnode.relNode,
forkNum)));
} }
} }
pfree(path); pfree(path);
reln->md_fd = _fdvec_alloc(); reln->md_fd[forkNum] = _fdvec_alloc();
reln->md_fd->mdfd_vfd = fd; reln->md_fd[forkNum]->mdfd_vfd = fd;
reln->md_fd->mdfd_segno = 0; reln->md_fd[forkNum]->mdfd_segno = 0;
reln->md_fd->mdfd_chain = NULL; reln->md_fd[forkNum]->mdfd_chain = NULL;
} }
/* /*
...@@ -285,7 +308,7 @@ mdcreate(SMgrRelation reln, bool isRedo) ...@@ -285,7 +308,7 @@ mdcreate(SMgrRelation reln, bool isRedo)
* we are usually not in a transaction anymore when this is called. * we are usually not in a transaction anymore when this is called.
*/ */
void void
mdunlink(RelFileNode rnode, bool isRedo) mdunlink(RelFileNode rnode, ForkNumber forkNum, bool isRedo)
{ {
char *path; char *path;
int ret; int ret;
...@@ -294,14 +317,14 @@ mdunlink(RelFileNode rnode, bool isRedo) ...@@ -294,14 +317,14 @@ mdunlink(RelFileNode rnode, bool isRedo)
* We have to clean out any pending fsync requests for the doomed * We have to clean out any pending fsync requests for the doomed
* relation, else the next mdsync() will fail. * relation, else the next mdsync() will fail.
*/ */
ForgetRelationFsyncRequests(rnode); ForgetRelationFsyncRequests(rnode, forkNum);
path = relpath(rnode); path = relpath(rnode, forkNum);
/* /*
* Delete or truncate the first segment. * Delete or truncate the first segment.
*/ */
if (isRedo) if (isRedo || forkNum != MAIN_FORKNUM)
ret = unlink(path); ret = unlink(path);
else else
{ {
...@@ -326,10 +349,11 @@ mdunlink(RelFileNode rnode, bool isRedo) ...@@ -326,10 +349,11 @@ mdunlink(RelFileNode rnode, bool isRedo)
if (!isRedo || errno != ENOENT) if (!isRedo || errno != ENOENT)
ereport(WARNING, ereport(WARNING,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not remove relation %u/%u/%u: %m", errmsg("could not remove relation %u/%u/%u/%u: %m",
rnode.spcNode, rnode.spcNode,
rnode.dbNode, rnode.dbNode,
rnode.relNode))); rnode.relNode,
forkNum)));
} }
/* /*
...@@ -353,11 +377,12 @@ mdunlink(RelFileNode rnode, bool isRedo) ...@@ -353,11 +377,12 @@ mdunlink(RelFileNode rnode, bool isRedo)
if (errno != ENOENT) if (errno != ENOENT)
ereport(WARNING, ereport(WARNING,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not remove segment %u of relation %u/%u/%u: %m", errmsg("could not remove segment %u of relation %u/%u/%u/%u: %m",
segno, segno,
rnode.spcNode, rnode.spcNode,
rnode.dbNode, rnode.dbNode,
rnode.relNode))); rnode.relNode,
forkNum)));
break; break;
} }
} }
...@@ -367,7 +392,7 @@ mdunlink(RelFileNode rnode, bool isRedo) ...@@ -367,7 +392,7 @@ mdunlink(RelFileNode rnode, bool isRedo)
pfree(path); pfree(path);
/* Register request to unlink first segment later */ /* Register request to unlink first segment later */
if (!isRedo) if (!isRedo && forkNum == MAIN_FORKNUM)
register_unlink(rnode); register_unlink(rnode);
} }
...@@ -381,7 +406,8 @@ mdunlink(RelFileNode rnode, bool isRedo) ...@@ -381,7 +406,8 @@ mdunlink(RelFileNode rnode, bool isRedo)
* causes intervening file space to become filled with zeroes. * causes intervening file space to become filled with zeroes.
*/ */
void void
mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) mdextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer, bool isTemp)
{ {
off_t seekpos; off_t seekpos;
int nbytes; int nbytes;
...@@ -389,7 +415,7 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) ...@@ -389,7 +415,7 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
/* This assert is too expensive to have on normally ... */ /* This assert is too expensive to have on normally ... */
#ifdef CHECK_WRITE_VS_EXTEND #ifdef CHECK_WRITE_VS_EXTEND
Assert(blocknum >= mdnblocks(reln)); Assert(blocknum >= mdnblocks(reln, forknum));
#endif #endif
/* /*
...@@ -400,13 +426,14 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) ...@@ -400,13 +426,14 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
if (blocknum == InvalidBlockNumber) if (blocknum == InvalidBlockNumber)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("cannot extend relation %u/%u/%u beyond %u blocks", errmsg("cannot extend relation %u/%u/%u/%u beyond %u blocks",
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode, reln->smgr_rnode.relNode,
forknum,
InvalidBlockNumber))); InvalidBlockNumber)));
v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_CREATE); v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_CREATE);
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
...@@ -423,37 +450,40 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) ...@@ -423,37 +450,40 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not seek to block %u of relation %u/%u/%u: %m", errmsg("could not seek to block %u of relation %u/%u/%u/%u: %m",
blocknum, blocknum,
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode))); reln->smgr_rnode.relNode,
forknum)));
if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
{ {
if (nbytes < 0) if (nbytes < 0)
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not extend relation %u/%u/%u: %m", errmsg("could not extend relation %u/%u/%u/%u: %m",
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode), reln->smgr_rnode.relNode,
forknum),
errhint("Check free disk space."))); errhint("Check free disk space.")));
/* short write: complain appropriately */ /* short write: complain appropriately */
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_DISK_FULL), (errcode(ERRCODE_DISK_FULL),
errmsg("could not extend relation %u/%u/%u: wrote only %d of %d bytes at block %u", errmsg("could not extend relation %u/%u/%u/%u: wrote only %d of %d bytes at block %u",
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode, reln->smgr_rnode.relNode,
forknum,
nbytes, BLCKSZ, blocknum), nbytes, BLCKSZ, blocknum),
errhint("Check free disk space."))); errhint("Check free disk space.")));
} }
if (!isTemp) if (!isTemp)
register_dirty_segment(reln, v); register_dirty_segment(reln, forknum, v);
Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE)); Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
} }
/* /*
...@@ -467,17 +497,17 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) ...@@ -467,17 +497,17 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
* invent one out of whole cloth. * invent one out of whole cloth.
*/ */
static MdfdVec * static MdfdVec *
mdopen(SMgrRelation reln, ExtensionBehavior behavior) mdopen(SMgrRelation reln, ForkNumber forknum, ExtensionBehavior behavior)
{ {
MdfdVec *mdfd; MdfdVec *mdfd;
char *path; char *path;
File fd; File fd;
/* No work if already open */ /* No work if already open */
if (reln->md_fd) if (reln->md_fd[forknum])
return reln->md_fd; return reln->md_fd[forknum];
path = relpath(reln->smgr_rnode); path = relpath(reln->smgr_rnode, forknum);
fd = PathNameOpenFile(path, O_RDWR | PG_BINARY, 0600); fd = PathNameOpenFile(path, O_RDWR | PG_BINARY, 0600);
...@@ -499,21 +529,22 @@ mdopen(SMgrRelation reln, ExtensionBehavior behavior) ...@@ -499,21 +529,22 @@ mdopen(SMgrRelation reln, ExtensionBehavior behavior)
return NULL; return NULL;
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not open relation %u/%u/%u: %m", errmsg("could not open relation %u/%u/%u/%u: %m",
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode))); reln->smgr_rnode.relNode,
forknum)));
} }
} }
pfree(path); pfree(path);
reln->md_fd = mdfd = _fdvec_alloc(); reln->md_fd[forknum] = mdfd = _fdvec_alloc();
mdfd->mdfd_vfd = fd; mdfd->mdfd_vfd = fd;
mdfd->mdfd_segno = 0; mdfd->mdfd_segno = 0;
mdfd->mdfd_chain = NULL; mdfd->mdfd_chain = NULL;
Assert(_mdnblocks(reln, mdfd) <= ((BlockNumber) RELSEG_SIZE)); Assert(_mdnblocks(reln, forknum, mdfd) <= ((BlockNumber) RELSEG_SIZE));
return mdfd; return mdfd;
} }
...@@ -522,15 +553,15 @@ mdopen(SMgrRelation reln, ExtensionBehavior behavior) ...@@ -522,15 +553,15 @@ mdopen(SMgrRelation reln, ExtensionBehavior behavior)
* mdclose() -- Close the specified relation, if it isn't closed already. * mdclose() -- Close the specified relation, if it isn't closed already.
*/ */
void void
mdclose(SMgrRelation reln) mdclose(SMgrRelation reln, ForkNumber forknum)
{ {
MdfdVec *v = reln->md_fd; MdfdVec *v = reln->md_fd[forknum];
/* No work if already closed */ /* No work if already closed */
if (v == NULL) if (v == NULL)
return; return;
reln->md_fd = NULL; /* prevent dangling pointer after error */ reln->md_fd[forknum] = NULL; /* prevent dangling pointer after error */
while (v != NULL) while (v != NULL)
{ {
...@@ -549,13 +580,14 @@ mdclose(SMgrRelation reln) ...@@ -549,13 +580,14 @@ mdclose(SMgrRelation reln)
* mdread() -- Read the specified block from a relation. * mdread() -- Read the specified block from a relation.
*/ */
void void
mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer) mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer)
{ {
off_t seekpos; off_t seekpos;
int nbytes; int nbytes;
MdfdVec *v; MdfdVec *v;
v = _mdfd_getseg(reln, blocknum, false, EXTENSION_FAIL); v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL);
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
...@@ -563,22 +595,24 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer) ...@@ -563,22 +595,24 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not seek to block %u of relation %u/%u/%u: %m", errmsg("could not seek to block %u of relation %u/%u/%u/%u: %m",
blocknum, blocknum,
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode))); reln->smgr_rnode.relNode,
forknum)));
if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
{ {
if (nbytes < 0) if (nbytes < 0)
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not read block %u of relation %u/%u/%u: %m", errmsg("could not read block %u of relation %u/%u/%u/%u: %m",
blocknum, blocknum,
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode))); reln->smgr_rnode.relNode,
forknum)));
/* /*
* Short read: we are at or past EOF, or we read a partial block at * Short read: we are at or past EOF, or we read a partial block at
...@@ -593,11 +627,12 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer) ...@@ -593,11 +627,12 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
else else
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED), (errcode(ERRCODE_DATA_CORRUPTED),
errmsg("could not read block %u of relation %u/%u/%u: read only %d of %d bytes", errmsg("could not read block %u of relation %u/%u/%u/%u: read only %d of %d bytes",
blocknum, blocknum,
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode, reln->smgr_rnode.relNode,
forknum,
nbytes, BLCKSZ))); nbytes, BLCKSZ)));
} }
} }
...@@ -610,7 +645,8 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer) ...@@ -610,7 +645,8 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
* use mdextend(). * use mdextend().
*/ */
void void
mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer, bool isTemp)
{ {
off_t seekpos; off_t seekpos;
int nbytes; int nbytes;
...@@ -618,10 +654,10 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) ...@@ -618,10 +654,10 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
/* This assert is too expensive to have on normally ... */ /* This assert is too expensive to have on normally ... */
#ifdef CHECK_WRITE_VS_EXTEND #ifdef CHECK_WRITE_VS_EXTEND
Assert(blocknum < mdnblocks(reln)); Assert(blocknum < mdnblocks(reln, forknum));
#endif #endif
v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_FAIL); v = _mdfd_getseg(reln, forknum, blocknum, isTemp, EXTENSION_FAIL);
seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)); seekpos = (off_t) BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE));
Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE); Assert(seekpos < (off_t) BLCKSZ * RELSEG_SIZE);
...@@ -629,36 +665,39 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) ...@@ -629,36 +665,39 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos) if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not seek to block %u of relation %u/%u/%u: %m", errmsg("could not seek to block %u of relation %u/%u/%u/%u: %m",
blocknum, blocknum,
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode))); reln->smgr_rnode.relNode,
forknum)));
if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ) if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
{ {
if (nbytes < 0) if (nbytes < 0)
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not write block %u of relation %u/%u/%u: %m", errmsg("could not write block %u of relation %u/%u/%u/%u: %m",
blocknum, blocknum,
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode))); reln->smgr_rnode.relNode,
forknum)));
/* short write: complain appropriately */ /* short write: complain appropriately */
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_DISK_FULL), (errcode(ERRCODE_DISK_FULL),
errmsg("could not write block %u of relation %u/%u/%u: wrote only %d of %d bytes", errmsg("could not write block %u of relation %u/%u/%u/%u: wrote only %d of %d bytes",
blocknum, blocknum,
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode, reln->smgr_rnode.relNode,
forknum,
nbytes, BLCKSZ), nbytes, BLCKSZ),
errhint("Check free disk space."))); errhint("Check free disk space.")));
} }
if (!isTemp) if (!isTemp)
register_dirty_segment(reln, v); register_dirty_segment(reln, forknum, v);
} }
/* /*
...@@ -670,9 +709,9 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) ...@@ -670,9 +709,9 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
* are present in the chain. * are present in the chain.
*/ */
BlockNumber BlockNumber
mdnblocks(SMgrRelation reln) mdnblocks(SMgrRelation reln, ForkNumber forknum)
{ {
MdfdVec *v = mdopen(reln, EXTENSION_FAIL); MdfdVec *v = mdopen(reln, forknum, EXTENSION_FAIL);
BlockNumber nblocks; BlockNumber nblocks;
BlockNumber segno = 0; BlockNumber segno = 0;
...@@ -696,7 +735,7 @@ mdnblocks(SMgrRelation reln) ...@@ -696,7 +735,7 @@ mdnblocks(SMgrRelation reln)
for (;;) for (;;)
{ {
nblocks = _mdnblocks(reln, v); nblocks = _mdnblocks(reln, forknum, v);
if (nblocks > ((BlockNumber) RELSEG_SIZE)) if (nblocks > ((BlockNumber) RELSEG_SIZE))
elog(FATAL, "segment too big"); elog(FATAL, "segment too big");
if (nblocks < ((BlockNumber) RELSEG_SIZE)) if (nblocks < ((BlockNumber) RELSEG_SIZE))
...@@ -715,15 +754,16 @@ mdnblocks(SMgrRelation reln) ...@@ -715,15 +754,16 @@ mdnblocks(SMgrRelation reln)
* RELSEG_SIZE. While perhaps not strictly necessary, this keeps * RELSEG_SIZE. While perhaps not strictly necessary, this keeps
* the logic simple. * the logic simple.
*/ */
v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT); v->mdfd_chain = _mdfd_openseg(reln, forknum, segno, O_CREAT);
if (v->mdfd_chain == NULL) if (v->mdfd_chain == NULL)
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not open segment %u of relation %u/%u/%u: %m", errmsg("could not open segment %u of relation %u/%u/%u/%u: %m",
segno, segno,
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode))); reln->smgr_rnode.relNode,
forknum)));
} }
v = v->mdfd_chain; v = v->mdfd_chain;
...@@ -734,7 +774,8 @@ mdnblocks(SMgrRelation reln) ...@@ -734,7 +774,8 @@ mdnblocks(SMgrRelation reln)
* mdtruncate() -- Truncate relation to specified number of blocks. * mdtruncate() -- Truncate relation to specified number of blocks.
*/ */
void void
mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks,
bool isTemp)
{ {
MdfdVec *v; MdfdVec *v;
BlockNumber curnblk; BlockNumber curnblk;
...@@ -744,23 +785,24 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) ...@@ -744,23 +785,24 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
* NOTE: mdnblocks makes sure we have opened all active segments, so that * NOTE: mdnblocks makes sure we have opened all active segments, so that
* truncation loop will get them all! * truncation loop will get them all!
*/ */
curnblk = mdnblocks(reln); curnblk = mdnblocks(reln, forknum);
if (nblocks > curnblk) if (nblocks > curnblk)
{ {
/* Bogus request ... but no complaint if InRecovery */ /* Bogus request ... but no complaint if InRecovery */
if (InRecovery) if (InRecovery)
return; return;
ereport(ERROR, ereport(ERROR,
(errmsg("could not truncate relation %u/%u/%u to %u blocks: it's only %u blocks now", (errmsg("could not truncate relation %u/%u/%u/%u to %u blocks: it's only %u blocks now",
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode, reln->smgr_rnode.relNode,
forknum,
nblocks, curnblk))); nblocks, curnblk)));
} }
if (nblocks == curnblk) if (nblocks == curnblk)
return; /* no work */ return; /* no work */
v = mdopen(reln, EXTENSION_FAIL); v = mdopen(reln, forknum, EXTENSION_FAIL);
priorblocks = 0; priorblocks = 0;
while (v != NULL) while (v != NULL)
...@@ -777,15 +819,16 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) ...@@ -777,15 +819,16 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
if (FileTruncate(v->mdfd_vfd, 0) < 0) if (FileTruncate(v->mdfd_vfd, 0) < 0)
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not truncate relation %u/%u/%u to %u blocks: %m", errmsg("could not truncate relation %u/%u/%u/%u to %u blocks: %m",
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode, reln->smgr_rnode.relNode,
forknum,
nblocks))); nblocks)));
if (!isTemp) if (!isTemp)
register_dirty_segment(reln, v); register_dirty_segment(reln, forknum, v);
v = v->mdfd_chain; v = v->mdfd_chain;
Assert(ov != reln->md_fd); /* we never drop the 1st segment */ Assert(ov != reln->md_fd[forknum]); /* we never drop the 1st segment */
pfree(ov); pfree(ov);
} }
else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks) else if (priorblocks + ((BlockNumber) RELSEG_SIZE) > nblocks)
...@@ -803,13 +846,14 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) ...@@ -803,13 +846,14 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ) < 0) if (FileTruncate(v->mdfd_vfd, (off_t) lastsegblocks * BLCKSZ) < 0)
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not truncate relation %u/%u/%u to %u blocks: %m", errmsg("could not truncate relation %u/%u/%u/%u to %u blocks: %m",
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode, reln->smgr_rnode.relNode,
forknum,
nblocks))); nblocks)));
if (!isTemp) if (!isTemp)
register_dirty_segment(reln, v); register_dirty_segment(reln, forknum, v);
v = v->mdfd_chain; v = v->mdfd_chain;
ov->mdfd_chain = NULL; ov->mdfd_chain = NULL;
} }
...@@ -832,7 +876,7 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) ...@@ -832,7 +876,7 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
* nothing of dirty buffers that may exist inside the buffer manager. * nothing of dirty buffers that may exist inside the buffer manager.
*/ */
void void
mdimmedsync(SMgrRelation reln) mdimmedsync(SMgrRelation reln, ForkNumber forknum)
{ {
MdfdVec *v; MdfdVec *v;
BlockNumber curnblk; BlockNumber curnblk;
...@@ -841,20 +885,21 @@ mdimmedsync(SMgrRelation reln) ...@@ -841,20 +885,21 @@ mdimmedsync(SMgrRelation reln)
* NOTE: mdnblocks makes sure we have opened all active segments, so that * NOTE: mdnblocks makes sure we have opened all active segments, so that
* fsync loop will get them all! * fsync loop will get them all!
*/ */
curnblk = mdnblocks(reln); curnblk = mdnblocks(reln, forknum);
v = mdopen(reln, EXTENSION_FAIL); v = mdopen(reln, forknum, EXTENSION_FAIL);
while (v != NULL) while (v != NULL)
{ {
if (FileSync(v->mdfd_vfd) < 0) if (FileSync(v->mdfd_vfd) < 0)
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not fsync segment %u of relation %u/%u/%u: %m", errmsg("could not fsync segment %u of relation %u/%u/%u/%u: %m",
v->mdfd_segno, v->mdfd_segno,
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode))); reln->smgr_rnode.relNode,
forknum)));
v = v->mdfd_chain; v = v->mdfd_chain;
} }
} }
...@@ -1008,7 +1053,7 @@ mdsync(void) ...@@ -1008,7 +1053,7 @@ mdsync(void)
* FileSync, since fd.c might have closed the file behind our * FileSync, since fd.c might have closed the file behind our
* back. * back.
*/ */
seg = _mdfd_getseg(reln, seg = _mdfd_getseg(reln, entry->tag.forknum,
entry->tag.segno * ((BlockNumber) RELSEG_SIZE), entry->tag.segno * ((BlockNumber) RELSEG_SIZE),
false, EXTENSION_RETURN_NULL); false, EXTENSION_RETURN_NULL);
if (seg != NULL && if (seg != NULL &&
...@@ -1024,19 +1069,21 @@ mdsync(void) ...@@ -1024,19 +1069,21 @@ mdsync(void)
failures > 0) failures > 0)
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not fsync segment %u of relation %u/%u/%u: %m", errmsg("could not fsync segment %u of relation %u/%u/%u/%u: %m",
entry->tag.segno, entry->tag.segno,
entry->tag.rnode.spcNode, entry->tag.rnode.spcNode,
entry->tag.rnode.dbNode, entry->tag.rnode.dbNode,
entry->tag.rnode.relNode))); entry->tag.rnode.relNode,
entry->tag.forknum)));
else else
ereport(DEBUG1, ereport(DEBUG1,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not fsync segment %u of relation %u/%u/%u, but retrying: %m", errmsg("could not fsync segment %u of relation %u/%u/%u/%u but retrying: %m",
entry->tag.segno, entry->tag.segno,
entry->tag.rnode.spcNode, entry->tag.rnode.spcNode,
entry->tag.rnode.dbNode, entry->tag.rnode.dbNode,
entry->tag.rnode.relNode))); entry->tag.rnode.relNode,
entry->tag.forknum)));
/* /*
* Absorb incoming requests and check to see if canceled. * Absorb incoming requests and check to see if canceled.
...@@ -1126,7 +1173,7 @@ mdpostckpt(void) ...@@ -1126,7 +1173,7 @@ mdpostckpt(void)
Assert((CycleCtr) (entry->cycle_ctr + 1) == mdckpt_cycle_ctr); Assert((CycleCtr) (entry->cycle_ctr + 1) == mdckpt_cycle_ctr);
/* Unlink the file */ /* Unlink the file */
path = relpath(entry->rnode); path = relpath(entry->rnode, MAIN_FORKNUM);
if (unlink(path) < 0) if (unlink(path) < 0)
{ {
/* /*
...@@ -1139,10 +1186,11 @@ mdpostckpt(void) ...@@ -1139,10 +1186,11 @@ mdpostckpt(void)
if (errno != ENOENT) if (errno != ENOENT)
ereport(WARNING, ereport(WARNING,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not remove relation %u/%u/%u: %m", errmsg("could not remove relation %u/%u/%u/%u: %m",
entry->rnode.spcNode, entry->rnode.spcNode,
entry->rnode.dbNode, entry->rnode.dbNode,
entry->rnode.relNode))); entry->rnode.relNode,
MAIN_FORKNUM)));
} }
pfree(path); pfree(path);
...@@ -1161,26 +1209,27 @@ mdpostckpt(void) ...@@ -1161,26 +1209,27 @@ mdpostckpt(void)
* to be a performance problem). * to be a performance problem).
*/ */
static void static void
register_dirty_segment(SMgrRelation reln, MdfdVec *seg) register_dirty_segment(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
{ {
if (pendingOpsTable) if (pendingOpsTable)
{ {
/* push it into local pending-ops table */ /* push it into local pending-ops table */
RememberFsyncRequest(reln->smgr_rnode, seg->mdfd_segno); RememberFsyncRequest(reln->smgr_rnode, forknum, seg->mdfd_segno);
} }
else else
{ {
if (ForwardFsyncRequest(reln->smgr_rnode, seg->mdfd_segno)) if (ForwardFsyncRequest(reln->smgr_rnode, forknum, seg->mdfd_segno))
return; /* passed it off successfully */ return; /* passed it off successfully */
if (FileSync(seg->mdfd_vfd) < 0) if (FileSync(seg->mdfd_vfd) < 0)
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not fsync segment %u of relation %u/%u/%u: %m", errmsg("could not fsync segment %u of relation %u/%u/%u/%u: %m",
seg->mdfd_segno, seg->mdfd_segno,
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode))); reln->smgr_rnode.relNode,
forknum)));
} }
} }
...@@ -1196,7 +1245,7 @@ register_unlink(RelFileNode rnode) ...@@ -1196,7 +1245,7 @@ register_unlink(RelFileNode rnode)
if (pendingOpsTable) if (pendingOpsTable)
{ {
/* push it into local pending-ops table */ /* push it into local pending-ops table */
RememberFsyncRequest(rnode, UNLINK_RELATION_REQUEST); RememberFsyncRequest(rnode, MAIN_FORKNUM, UNLINK_RELATION_REQUEST);
} }
else else
{ {
...@@ -1208,7 +1257,8 @@ register_unlink(RelFileNode rnode) ...@@ -1208,7 +1257,8 @@ register_unlink(RelFileNode rnode)
* XXX should we just leave the file orphaned instead? * XXX should we just leave the file orphaned instead?
*/ */
Assert(IsUnderPostmaster); Assert(IsUnderPostmaster);
while (!ForwardFsyncRequest(rnode, UNLINK_RELATION_REQUEST)) while (!ForwardFsyncRequest(rnode, MAIN_FORKNUM,
UNLINK_RELATION_REQUEST))
pg_usleep(10000L); /* 10 msec seems a good number */ pg_usleep(10000L); /* 10 msec seems a good number */
} }
} }
...@@ -1233,7 +1283,7 @@ register_unlink(RelFileNode rnode) ...@@ -1233,7 +1283,7 @@ register_unlink(RelFileNode rnode)
* structure for them.) * structure for them.)
*/ */
void void
RememberFsyncRequest(RelFileNode rnode, BlockNumber segno) RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum, BlockNumber segno)
{ {
Assert(pendingOpsTable); Assert(pendingOpsTable);
...@@ -1246,7 +1296,8 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno) ...@@ -1246,7 +1296,8 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
hash_seq_init(&hstat, pendingOpsTable); hash_seq_init(&hstat, pendingOpsTable);
while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL) while ((entry = (PendingOperationEntry *) hash_seq_search(&hstat)) != NULL)
{ {
if (RelFileNodeEquals(entry->tag.rnode, rnode)) if (RelFileNodeEquals(entry->tag.rnode, rnode) &&
entry->tag.forknum == forknum)
{ {
/* Okay, cancel this entry */ /* Okay, cancel this entry */
entry->canceled = true; entry->canceled = true;
...@@ -1313,6 +1364,7 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno) ...@@ -1313,6 +1364,7 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
/* ensure any pad bytes in the hash key are zeroed */ /* ensure any pad bytes in the hash key are zeroed */
MemSet(&key, 0, sizeof(key)); MemSet(&key, 0, sizeof(key));
key.rnode = rnode; key.rnode = rnode;
key.forknum = forknum;
key.segno = segno; key.segno = segno;
entry = (PendingOperationEntry *) hash_search(pendingOpsTable, entry = (PendingOperationEntry *) hash_search(pendingOpsTable,
...@@ -1346,12 +1398,12 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno) ...@@ -1346,12 +1398,12 @@ RememberFsyncRequest(RelFileNode rnode, BlockNumber segno)
* ForgetRelationFsyncRequests -- forget any fsyncs for a rel * ForgetRelationFsyncRequests -- forget any fsyncs for a rel
*/ */
void void
ForgetRelationFsyncRequests(RelFileNode rnode) ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum)
{ {
if (pendingOpsTable) if (pendingOpsTable)
{ {
/* standalone backend or startup process: fsync state is local */ /* standalone backend or startup process: fsync state is local */
RememberFsyncRequest(rnode, FORGET_RELATION_FSYNC); RememberFsyncRequest(rnode, forknum, FORGET_RELATION_FSYNC);
} }
else if (IsUnderPostmaster) else if (IsUnderPostmaster)
{ {
...@@ -1365,7 +1417,7 @@ ForgetRelationFsyncRequests(RelFileNode rnode) ...@@ -1365,7 +1417,7 @@ ForgetRelationFsyncRequests(RelFileNode rnode)
* which would be bad, so I'm inclined to assume that the bgwriter * which would be bad, so I'm inclined to assume that the bgwriter
* will always empty the queue soon. * will always empty the queue soon.
*/ */
while (!ForwardFsyncRequest(rnode, FORGET_RELATION_FSYNC)) while (!ForwardFsyncRequest(rnode, forknum, FORGET_RELATION_FSYNC))
pg_usleep(10000L); /* 10 msec seems a good number */ pg_usleep(10000L); /* 10 msec seems a good number */
/* /*
...@@ -1390,12 +1442,13 @@ ForgetDatabaseFsyncRequests(Oid dbid) ...@@ -1390,12 +1442,13 @@ ForgetDatabaseFsyncRequests(Oid dbid)
if (pendingOpsTable) if (pendingOpsTable)
{ {
/* standalone backend or startup process: fsync state is local */ /* standalone backend or startup process: fsync state is local */
RememberFsyncRequest(rnode, FORGET_DATABASE_FSYNC); RememberFsyncRequest(rnode, InvalidForkNumber, FORGET_DATABASE_FSYNC);
} }
else if (IsUnderPostmaster) else if (IsUnderPostmaster)
{ {
/* see notes in ForgetRelationFsyncRequests */ /* see notes in ForgetRelationFsyncRequests */
while (!ForwardFsyncRequest(rnode, FORGET_DATABASE_FSYNC)) while (!ForwardFsyncRequest(rnode, InvalidForkNumber,
FORGET_DATABASE_FSYNC))
pg_usleep(10000L); /* 10 msec seems a good number */ pg_usleep(10000L); /* 10 msec seems a good number */
} }
} }
...@@ -1415,14 +1468,15 @@ _fdvec_alloc(void) ...@@ -1415,14 +1468,15 @@ _fdvec_alloc(void)
* and make a MdfdVec object for it. Returns NULL on failure. * and make a MdfdVec object for it. Returns NULL on failure.
*/ */
static MdfdVec * static MdfdVec *
_mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags) _mdfd_openseg(SMgrRelation reln, ForkNumber forknum, BlockNumber segno,
int oflags)
{ {
MdfdVec *v; MdfdVec *v;
int fd; int fd;
char *path, char *path,
*fullpath; *fullpath;
path = relpath(reln->smgr_rnode); path = relpath(reln->smgr_rnode, forknum);
if (segno > 0) if (segno > 0)
{ {
...@@ -1449,7 +1503,7 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags) ...@@ -1449,7 +1503,7 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
v->mdfd_vfd = fd; v->mdfd_vfd = fd;
v->mdfd_segno = segno; v->mdfd_segno = segno;
v->mdfd_chain = NULL; v->mdfd_chain = NULL;
Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE)); Assert(_mdnblocks(reln, forknum, v) <= ((BlockNumber) RELSEG_SIZE));
/* all done */ /* all done */
return v; return v;
...@@ -1464,10 +1518,10 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags) ...@@ -1464,10 +1518,10 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
* in the EXTENSION_CREATE case. * in the EXTENSION_CREATE case.
*/ */
static MdfdVec * static MdfdVec *
_mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp, _mdfd_getseg(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno,
ExtensionBehavior behavior) bool isTemp, ExtensionBehavior behavior)
{ {
MdfdVec *v = mdopen(reln, behavior); MdfdVec *v = mdopen(reln, forknum, behavior);
BlockNumber targetseg; BlockNumber targetseg;
BlockNumber nextsegno; BlockNumber nextsegno;
...@@ -1497,20 +1551,21 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp, ...@@ -1497,20 +1551,21 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
*/ */
if (behavior == EXTENSION_CREATE || InRecovery) if (behavior == EXTENSION_CREATE || InRecovery)
{ {
if (_mdnblocks(reln, v) < RELSEG_SIZE) if (_mdnblocks(reln, forknum, v) < RELSEG_SIZE)
{ {
char *zerobuf = palloc0(BLCKSZ); char *zerobuf = palloc0(BLCKSZ);
mdextend(reln, nextsegno * ((BlockNumber) RELSEG_SIZE) - 1, mdextend(reln, forknum,
nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
zerobuf, isTemp); zerobuf, isTemp);
pfree(zerobuf); pfree(zerobuf);
} }
v->mdfd_chain = _mdfd_openseg(reln, nextsegno, O_CREAT); v->mdfd_chain = _mdfd_openseg(reln, forknum, +nextsegno, O_CREAT);
} }
else else
{ {
/* We won't create segment if not existent */ /* We won't create segment if not existent */
v->mdfd_chain = _mdfd_openseg(reln, nextsegno, 0); v->mdfd_chain = _mdfd_openseg(reln, forknum, nextsegno, 0);
} }
if (v->mdfd_chain == NULL) if (v->mdfd_chain == NULL)
{ {
...@@ -1519,11 +1574,12 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp, ...@@ -1519,11 +1574,12 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
return NULL; return NULL;
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not open segment %u of relation %u/%u/%u (target block %u): %m", errmsg("could not open segment %u of relation %u/%u/%u/%u (target block %u): %m",
nextsegno, nextsegno,
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode, reln->smgr_rnode.relNode,
forknum,
blkno))); blkno)));
} }
} }
...@@ -1536,7 +1592,7 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp, ...@@ -1536,7 +1592,7 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
* Get number of blocks present in a single disk file * Get number of blocks present in a single disk file
*/ */
static BlockNumber static BlockNumber
_mdnblocks(SMgrRelation reln, MdfdVec *seg) _mdnblocks(SMgrRelation reln, ForkNumber forknum, MdfdVec *seg)
{ {
off_t len; off_t len;
...@@ -1544,11 +1600,12 @@ _mdnblocks(SMgrRelation reln, MdfdVec *seg) ...@@ -1544,11 +1600,12 @@ _mdnblocks(SMgrRelation reln, MdfdVec *seg)
if (len < 0) if (len < 0)
ereport(ERROR, ereport(ERROR,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not seek to end of segment %u of relation %u/%u/%u: %m", errmsg("could not seek to end of segment %u of relation %u/%u/%u/%u: %m",
seg->mdfd_segno, seg->mdfd_segno,
reln->smgr_rnode.spcNode, reln->smgr_rnode.spcNode,
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
reln->smgr_rnode.relNode))); reln->smgr_rnode.relNode,
forknum)));
/* note that this calculation will ignore any partial block at EOF */ /* note that this calculation will ignore any partial block at EOF */
return (BlockNumber) (len / BLCKSZ); return (BlockNumber) (len / BLCKSZ);
} }
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.110 2008/06/12 09:12:31 heikki Exp $ * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.111 2008/08/11 11:05:11 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -42,19 +42,22 @@ typedef struct f_smgr ...@@ -42,19 +42,22 @@ typedef struct f_smgr
{ {
void (*smgr_init) (void); /* may be NULL */ void (*smgr_init) (void); /* may be NULL */
void (*smgr_shutdown) (void); /* may be NULL */ void (*smgr_shutdown) (void); /* may be NULL */
void (*smgr_close) (SMgrRelation reln); void (*smgr_close) (SMgrRelation reln, ForkNumber forknum);
void (*smgr_create) (SMgrRelation reln, bool isRedo); void (*smgr_create) (SMgrRelation reln, ForkNumber forknum,
void (*smgr_unlink) (RelFileNode rnode, bool isRedo); bool isRedo);
void (*smgr_extend) (SMgrRelation reln, BlockNumber blocknum, bool (*smgr_exists) (SMgrRelation reln, ForkNumber forknum);
char *buffer, bool isTemp); void (*smgr_unlink) (RelFileNode rnode, ForkNumber forknum,
void (*smgr_read) (SMgrRelation reln, BlockNumber blocknum, bool isRedo);
char *buffer); void (*smgr_extend) (SMgrRelation reln, ForkNumber forknum,
void (*smgr_write) (SMgrRelation reln, BlockNumber blocknum, BlockNumber blocknum, char *buffer, bool isTemp);
char *buffer, bool isTemp); void (*smgr_read) (SMgrRelation reln, ForkNumber forknum,
BlockNumber (*smgr_nblocks) (SMgrRelation reln); BlockNumber blocknum, char *buffer);
void (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks, void (*smgr_write) (SMgrRelation reln, ForkNumber forknum,
bool isTemp); BlockNumber blocknum, char *buffer, bool isTemp);
void (*smgr_immedsync) (SMgrRelation reln); BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
BlockNumber nblocks, bool isTemp);
void (*smgr_immedsync) (SMgrRelation reln, ForkNumber forknum);
void (*smgr_commit) (void); /* may be NULL */ void (*smgr_commit) (void); /* may be NULL */
void (*smgr_abort) (void); /* may be NULL */ void (*smgr_abort) (void); /* may be NULL */
void (*smgr_pre_ckpt) (void); /* may be NULL */ void (*smgr_pre_ckpt) (void); /* may be NULL */
...@@ -65,7 +68,7 @@ typedef struct f_smgr ...@@ -65,7 +68,7 @@ typedef struct f_smgr
static const f_smgr smgrsw[] = { static const f_smgr smgrsw[] = {
/* magnetic disk */ /* magnetic disk */
{mdinit, NULL, mdclose, mdcreate, mdunlink, mdextend, {mdinit, NULL, mdclose, mdcreate, mdexists, mdunlink, mdextend,
mdread, mdwrite, mdnblocks, mdtruncate, mdimmedsync, mdread, mdwrite, mdnblocks, mdtruncate, mdimmedsync,
NULL, NULL, mdpreckpt, mdsync, mdpostckpt NULL, NULL, mdpreckpt, mdsync, mdpostckpt
} }
...@@ -102,6 +105,7 @@ static HTAB *SMgrRelationHash = NULL; ...@@ -102,6 +105,7 @@ static HTAB *SMgrRelationHash = NULL;
typedef struct PendingRelDelete typedef struct PendingRelDelete
{ {
RelFileNode relnode; /* relation that may need to be deleted */ RelFileNode relnode; /* relation that may need to be deleted */
ForkNumber forknum; /* fork number that may need to be deleted */
int which; /* which storage manager? */ int which; /* which storage manager? */
bool isTemp; /* is it a temporary relation? */ bool isTemp; /* is it a temporary relation? */
bool atCommit; /* T=delete at commit; F=delete at abort */ bool atCommit; /* T=delete at commit; F=delete at abort */
...@@ -126,19 +130,21 @@ static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */ ...@@ -126,19 +130,21 @@ static PendingRelDelete *pendingDeletes = NULL; /* head of linked list */
typedef struct xl_smgr_create typedef struct xl_smgr_create
{ {
RelFileNode rnode; RelFileNode rnode;
ForkNumber forknum;
} xl_smgr_create; } xl_smgr_create;
typedef struct xl_smgr_truncate typedef struct xl_smgr_truncate
{ {
BlockNumber blkno; BlockNumber blkno;
RelFileNode rnode; RelFileNode rnode;
ForkNumber forknum;
} xl_smgr_truncate; } xl_smgr_truncate;
/* local function prototypes */ /* local function prototypes */
static void smgrshutdown(int code, Datum arg); static void smgrshutdown(int code, Datum arg);
static void smgr_internal_unlink(RelFileNode rnode, int which, static void smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum,
bool isTemp, bool isRedo); int which, bool isTemp, bool isRedo);
/* /*
...@@ -211,10 +217,15 @@ smgropen(RelFileNode rnode) ...@@ -211,10 +217,15 @@ smgropen(RelFileNode rnode)
/* Initialize it if not present before */ /* Initialize it if not present before */
if (!found) if (!found)
{ {
int forknum;
/* hash_search already filled in the lookup key */ /* hash_search already filled in the lookup key */
reln->smgr_owner = NULL; reln->smgr_owner = NULL;
reln->smgr_which = 0; /* we only have md.c at present */ reln->smgr_which = 0; /* we only have md.c at present */
reln->md_fd = NULL; /* mark it not open */
/* mark it not open */
for(forknum = 0; forknum <= MAX_FORKNUM; forknum++)
reln->md_fd[forknum] = NULL;
} }
return reln; return reln;
...@@ -243,6 +254,15 @@ smgrsetowner(SMgrRelation *owner, SMgrRelation reln) ...@@ -243,6 +254,15 @@ smgrsetowner(SMgrRelation *owner, SMgrRelation reln)
*owner = reln; *owner = reln;
} }
/*
* smgrexists() -- Does the underlying file for a fork exist?
*/
bool
smgrexists(SMgrRelation reln, ForkNumber forknum)
{
return (*(smgrsw[reln->smgr_which].smgr_exists)) (reln, forknum);
}
/* /*
* smgrclose() -- Close and delete an SMgrRelation object. * smgrclose() -- Close and delete an SMgrRelation object.
*/ */
...@@ -250,8 +270,10 @@ void ...@@ -250,8 +270,10 @@ void
smgrclose(SMgrRelation reln) smgrclose(SMgrRelation reln)
{ {
SMgrRelation *owner; SMgrRelation *owner;
ForkNumber forknum;
(*(smgrsw[reln->smgr_which].smgr_close)) (reln); for (forknum = 0; forknum <= MAX_FORKNUM; forknum++)
(*(smgrsw[reln->smgr_which].smgr_close)) (reln, forknum);
owner = reln->smgr_owner; owner = reln->smgr_owner;
...@@ -315,7 +337,8 @@ smgrclosenode(RelFileNode rnode) ...@@ -315,7 +337,8 @@ smgrclosenode(RelFileNode rnode)
* smgrcreate() -- Create a new relation. * smgrcreate() -- Create a new relation.
* *
* Given an already-created (but presumably unused) SMgrRelation, * Given an already-created (but presumably unused) SMgrRelation,
* cause the underlying disk file or other storage to be created. * cause the underlying disk file or other storage for the fork
* to be created.
* *
* If isRedo is true, it is okay for the underlying file to exist * If isRedo is true, it is okay for the underlying file to exist
* already because we are in a WAL replay sequence. In this case * already because we are in a WAL replay sequence. In this case
...@@ -323,7 +346,7 @@ smgrclosenode(RelFileNode rnode) ...@@ -323,7 +346,7 @@ smgrclosenode(RelFileNode rnode)
* tell whether to drop the file. * tell whether to drop the file.
*/ */
void void
smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) smgrcreate(SMgrRelation reln, ForkNumber forknum, bool isTemp, bool isRedo)
{ {
XLogRecPtr lsn; XLogRecPtr lsn;
XLogRecData rdata; XLogRecData rdata;
...@@ -334,7 +357,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) ...@@ -334,7 +357,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
* Exit quickly in WAL replay mode if we've already opened the file. * Exit quickly in WAL replay mode if we've already opened the file.
* If it's open, it surely must exist. * If it's open, it surely must exist.
*/ */
if (isRedo && reln->md_fd != NULL) if (isRedo && reln->md_fd[forknum] != NULL)
return; return;
/* /*
...@@ -350,7 +373,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) ...@@ -350,7 +373,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
reln->smgr_rnode.dbNode, reln->smgr_rnode.dbNode,
isRedo); isRedo);
(*(smgrsw[reln->smgr_which].smgr_create)) (reln, isRedo); (*(smgrsw[reln->smgr_which].smgr_create)) (reln, forknum, isRedo);
if (isRedo) if (isRedo)
return; return;
...@@ -360,6 +383,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) ...@@ -360,6 +383,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
* will be dropped at abort time. * will be dropped at abort time.
*/ */
xlrec.rnode = reln->smgr_rnode; xlrec.rnode = reln->smgr_rnode;
xlrec.forknum = forknum;
rdata.data = (char *) &xlrec; rdata.data = (char *) &xlrec;
rdata.len = sizeof(xlrec); rdata.len = sizeof(xlrec);
...@@ -372,6 +396,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) ...@@ -372,6 +396,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
pending = (PendingRelDelete *) pending = (PendingRelDelete *)
MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
pending->relnode = reln->smgr_rnode; pending->relnode = reln->smgr_rnode;
pending->forknum = forknum;
pending->which = reln->smgr_which; pending->which = reln->smgr_which;
pending->isTemp = isTemp; pending->isTemp = isTemp;
pending->atCommit = false; /* delete if abort */ pending->atCommit = false; /* delete if abort */
...@@ -383,13 +408,11 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo) ...@@ -383,13 +408,11 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
/* /*
* smgrscheduleunlink() -- Schedule unlinking a relation at xact commit. * smgrscheduleunlink() -- Schedule unlinking a relation at xact commit.
* *
* The relation is marked to be removed from the store if we * The fork is marked to be removed from the store if we successfully
* successfully commit the current transaction. * commit the current transaction.
*
* This also implies smgrclose() on the SMgrRelation object.
*/ */
void void
smgrscheduleunlink(SMgrRelation reln, bool isTemp) smgrscheduleunlink(SMgrRelation reln, ForkNumber forknum, bool isTemp)
{ {
PendingRelDelete *pending; PendingRelDelete *pending;
...@@ -397,6 +420,7 @@ smgrscheduleunlink(SMgrRelation reln, bool isTemp) ...@@ -397,6 +420,7 @@ smgrscheduleunlink(SMgrRelation reln, bool isTemp)
pending = (PendingRelDelete *) pending = (PendingRelDelete *)
MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete)); MemoryContextAlloc(TopMemoryContext, sizeof(PendingRelDelete));
pending->relnode = reln->smgr_rnode; pending->relnode = reln->smgr_rnode;
pending->forknum = forknum;
pending->which = reln->smgr_which; pending->which = reln->smgr_which;
pending->isTemp = isTemp; pending->isTemp = isTemp;
pending->atCommit = true; /* delete if commit */ pending->atCommit = true; /* delete if commit */
...@@ -413,51 +437,49 @@ smgrscheduleunlink(SMgrRelation reln, bool isTemp) ...@@ -413,51 +437,49 @@ smgrscheduleunlink(SMgrRelation reln, bool isTemp)
* the existing list entry and delete the physical file immediately, but * the existing list entry and delete the physical file immediately, but
* for now I'll keep the logic simple. * for now I'll keep the logic simple.
*/ */
/* Now close the file and throw away the hashtable entry */
smgrclose(reln);
} }
/* /*
* smgrdounlink() -- Immediately unlink a relation. * smgrdounlink() -- Immediately unlink a relation.
* *
* The relation is removed from the store. This should not be used * The specified fork of the relation is removed from the store. This
* during transactional operations, since it can't be undone. * should not be used during transactional operations, since it can't be
* undone.
* *
* If isRedo is true, it is okay for the underlying file to be gone * If isRedo is true, it is okay for the underlying file to be gone
* already. * already.
*
* This also implies smgrclose() on the SMgrRelation object.
*/ */
void void
smgrdounlink(SMgrRelation reln, bool isTemp, bool isRedo) smgrdounlink(SMgrRelation reln, ForkNumber forknum, bool isTemp, bool isRedo)
{ {
RelFileNode rnode = reln->smgr_rnode; RelFileNode rnode = reln->smgr_rnode;
int which = reln->smgr_which; int which = reln->smgr_which;
/* Close the file and throw away the hashtable entry */ /* Close the fork */
smgrclose(reln); (*(smgrsw[which].smgr_close)) (reln, forknum);
smgr_internal_unlink(rnode, which, isTemp, isRedo); smgr_internal_unlink(rnode, forknum, which, isTemp, isRedo);
} }
/* /*
* Shared subroutine that actually does the unlink ... * Shared subroutine that actually does the unlink ...
*/ */
static void static void
smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo) smgr_internal_unlink(RelFileNode rnode, ForkNumber forknum,
int which, bool isTemp, bool isRedo)
{ {
/* /*
* Get rid of any remaining buffers for the relation. bufmgr will just * Get rid of any remaining buffers for the relation. bufmgr will just
* drop them without bothering to write the contents. * drop them without bothering to write the contents.
*/ */
DropRelFileNodeBuffers(rnode, isTemp, 0); DropRelFileNodeBuffers(rnode, forknum, isTemp, 0);
/* /*
* Tell the free space map to forget this relation. It won't be accessed * Tell the free space map to forget this relation. It won't be accessed
* any more anyway, but we may as well recycle the map space quickly. * any more anyway, but we may as well recycle the map space quickly.
*/ */
FreeSpaceMapForgetRel(&rnode); if (forknum == MAIN_FORKNUM)
FreeSpaceMapForgetRel(&rnode);
/* /*
* It'd be nice to tell the stats collector to forget it immediately, too. * It'd be nice to tell the stats collector to forget it immediately, too.
...@@ -473,7 +495,7 @@ smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo) ...@@ -473,7 +495,7 @@ smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo)
* ERROR, because we've already decided to commit or abort the current * ERROR, because we've already decided to commit or abort the current
* xact. * xact.
*/ */
(*(smgrsw[which].smgr_unlink)) (rnode, isRedo); (*(smgrsw[which].smgr_unlink)) (rnode, forknum, isRedo);
} }
/* /*
...@@ -486,9 +508,11 @@ smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo) ...@@ -486,9 +508,11 @@ smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo)
* causes intervening file space to become filled with zeroes. * causes intervening file space to become filled with zeroes.
*/ */
void void
smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer, bool isTemp)
{ {
(*(smgrsw[reln->smgr_which].smgr_extend)) (reln, blocknum, buffer, isTemp); (*(smgrsw[reln->smgr_which].smgr_extend)) (reln, forknum, blocknum,
buffer, isTemp);
} }
/* /*
...@@ -500,9 +524,10 @@ smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) ...@@ -500,9 +524,10 @@ smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
* return pages in the format that POSTGRES expects. * return pages in the format that POSTGRES expects.
*/ */
void void
smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer) smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer)
{ {
(*(smgrsw[reln->smgr_which].smgr_read)) (reln, blocknum, buffer); (*(smgrsw[reln->smgr_which].smgr_read)) (reln, forknum, blocknum, buffer);
} }
/* /*
...@@ -521,9 +546,11 @@ smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer) ...@@ -521,9 +546,11 @@ smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
* made to fsync the write before checkpointing. * made to fsync the write before checkpointing.
*/ */
void void
smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer, bool isTemp)
{ {
(*(smgrsw[reln->smgr_which].smgr_write)) (reln, blocknum, buffer, isTemp); (*(smgrsw[reln->smgr_which].smgr_write)) (reln, forknum, blocknum,
buffer, isTemp);
} }
/* /*
...@@ -531,9 +558,9 @@ smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp) ...@@ -531,9 +558,9 @@ smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
* supplied relation. * supplied relation.
*/ */
BlockNumber BlockNumber
smgrnblocks(SMgrRelation reln) smgrnblocks(SMgrRelation reln, ForkNumber forknum)
{ {
return (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln); return (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln, forknum);
} }
/* /*
...@@ -541,13 +568,14 @@ smgrnblocks(SMgrRelation reln) ...@@ -541,13 +568,14 @@ smgrnblocks(SMgrRelation reln)
* of blocks * of blocks
*/ */
void void
smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks,
bool isTemp)
{ {
/* /*
* Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
* just drop them without bothering to write the contents. * just drop them without bothering to write the contents.
*/ */
DropRelFileNodeBuffers(reln->smgr_rnode, isTemp, nblocks); DropRelFileNodeBuffers(reln->smgr_rnode, forknum, isTemp, nblocks);
/* /*
* Tell the free space map to forget anything it may have stored for the * Tell the free space map to forget anything it may have stored for the
...@@ -557,7 +585,8 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) ...@@ -557,7 +585,8 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
FreeSpaceMapTruncateRel(&reln->smgr_rnode, nblocks); FreeSpaceMapTruncateRel(&reln->smgr_rnode, nblocks);
/* Do the truncation */ /* Do the truncation */
(*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, nblocks, isTemp); (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, forknum, nblocks,
isTemp);
if (!isTemp) if (!isTemp)
{ {
...@@ -570,6 +599,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) ...@@ -570,6 +599,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
xlrec.blkno = nblocks; xlrec.blkno = nblocks;
xlrec.rnode = reln->smgr_rnode; xlrec.rnode = reln->smgr_rnode;
xlrec.forknum = forknum;
rdata.data = (char *) &xlrec; rdata.data = (char *) &xlrec;
rdata.len = sizeof(xlrec); rdata.len = sizeof(xlrec);
...@@ -604,9 +634,9 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) ...@@ -604,9 +634,9 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
* otherwise the sync is not very meaningful. * otherwise the sync is not very meaningful.
*/ */
void void
smgrimmedsync(SMgrRelation reln) smgrimmedsync(SMgrRelation reln, ForkNumber forknum)
{ {
(*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln); (*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln, forknum);
} }
...@@ -666,6 +696,7 @@ smgrDoPendingDeletes(bool isCommit) ...@@ -666,6 +696,7 @@ smgrDoPendingDeletes(bool isCommit)
/* do deletion if called for */ /* do deletion if called for */
if (pending->atCommit == isCommit) if (pending->atCommit == isCommit)
smgr_internal_unlink(pending->relnode, smgr_internal_unlink(pending->relnode,
pending->forknum,
pending->which, pending->which,
pending->isTemp, pending->isTemp,
false); false);
...@@ -680,7 +711,7 @@ smgrDoPendingDeletes(bool isCommit) ...@@ -680,7 +711,7 @@ smgrDoPendingDeletes(bool isCommit)
* smgrGetPendingDeletes() -- Get a list of relations to be deleted. * smgrGetPendingDeletes() -- Get a list of relations to be deleted.
* *
* The return value is the number of relations scheduled for termination. * The return value is the number of relations scheduled for termination.
* *ptr is set to point to a freshly-palloc'd array of RelFileNodes. * *ptr is set to point to a freshly-palloc'd array of RelFileForks.
* If there are no relations to be deleted, *ptr is set to NULL. * If there are no relations to be deleted, *ptr is set to NULL.
* *
* If haveNonTemp isn't NULL, the bool it points to gets set to true if * If haveNonTemp isn't NULL, the bool it points to gets set to true if
...@@ -690,11 +721,11 @@ smgrDoPendingDeletes(bool isCommit) ...@@ -690,11 +721,11 @@ smgrDoPendingDeletes(bool isCommit)
* by upper-level transactions. * by upper-level transactions.
*/ */
int int
smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, bool *haveNonTemp) smgrGetPendingDeletes(bool forCommit, RelFileFork **ptr, bool *haveNonTemp)
{ {
int nestLevel = GetCurrentTransactionNestLevel(); int nestLevel = GetCurrentTransactionNestLevel();
int nrels; int nrels;
RelFileNode *rptr; RelFileFork *rptr;
PendingRelDelete *pending; PendingRelDelete *pending;
nrels = 0; nrels = 0;
...@@ -710,12 +741,16 @@ smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, bool *haveNonTemp) ...@@ -710,12 +741,16 @@ smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, bool *haveNonTemp)
*ptr = NULL; *ptr = NULL;
return 0; return 0;
} }
rptr = (RelFileNode *) palloc(nrels * sizeof(RelFileNode)); rptr = (RelFileFork *) palloc(nrels * sizeof(RelFileFork));
*ptr = rptr; *ptr = rptr;
for (pending = pendingDeletes; pending != NULL; pending = pending->next) for (pending = pendingDeletes; pending != NULL; pending = pending->next)
{ {
if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit) if (pending->nestLevel >= nestLevel && pending->atCommit == forCommit)
*rptr++ = pending->relnode; {
rptr->rnode = pending->relnode;
rptr->forknum = pending->forknum;
rptr++;
}
if (haveNonTemp && !pending->isTemp) if (haveNonTemp && !pending->isTemp)
*haveNonTemp = true; *haveNonTemp = true;
} }
...@@ -843,7 +878,7 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record) ...@@ -843,7 +878,7 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
SMgrRelation reln; SMgrRelation reln;
reln = smgropen(xlrec->rnode); reln = smgropen(xlrec->rnode);
smgrcreate(reln, false, true); smgrcreate(reln, xlrec->forknum, false, true);
} }
else if (info == XLOG_SMGR_TRUNCATE) else if (info == XLOG_SMGR_TRUNCATE)
{ {
...@@ -858,7 +893,7 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record) ...@@ -858,7 +893,7 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
* XLogOpenRelation, we prefer to recreate the rel and replay the log * XLogOpenRelation, we prefer to recreate the rel and replay the log
* as best we can until the drop is seen. * as best we can until the drop is seen.
*/ */
smgrcreate(reln, false, true); smgrcreate(reln, xlrec->forknum, false, true);
/* Can't use smgrtruncate because it would try to xlog */ /* Can't use smgrtruncate because it would try to xlog */
...@@ -867,7 +902,8 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record) ...@@ -867,7 +902,8 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
* truncated blocks. We must do this, else subsequent XLogReadBuffer * truncated blocks. We must do this, else subsequent XLogReadBuffer
* operations will not re-extend the file properly. * operations will not re-extend the file properly.
*/ */
DropRelFileNodeBuffers(xlrec->rnode, false, xlrec->blkno); DropRelFileNodeBuffers(xlrec->rnode, xlrec->forknum, false,
xlrec->blkno);
/* /*
* Tell the free space map to forget anything it may have stored for * Tell the free space map to forget anything it may have stored for
...@@ -878,11 +914,12 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record) ...@@ -878,11 +914,12 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
/* Do the truncation */ /* Do the truncation */
(*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln,
xlrec->forknum,
xlrec->blkno, xlrec->blkno,
false); false);
/* Also tell xlogutils.c about it */ /* Also tell xlogutils.c about it */
XLogTruncateRelation(xlrec->rnode, xlrec->blkno); XLogTruncateRelation(xlrec->rnode, xlrec->forknum, xlrec->blkno);
} }
else else
elog(PANIC, "smgr_redo: unknown op code %u", info); elog(PANIC, "smgr_redo: unknown op code %u", info);
...@@ -897,17 +934,18 @@ smgr_desc(StringInfo buf, uint8 xl_info, char *rec) ...@@ -897,17 +934,18 @@ smgr_desc(StringInfo buf, uint8 xl_info, char *rec)
{ {
xl_smgr_create *xlrec = (xl_smgr_create *) rec; xl_smgr_create *xlrec = (xl_smgr_create *) rec;
appendStringInfo(buf, "file create: %u/%u/%u", appendStringInfo(buf, "file create: %u/%u/%u/%u",
xlrec->rnode.spcNode, xlrec->rnode.dbNode, xlrec->rnode.spcNode, xlrec->rnode.dbNode,
xlrec->rnode.relNode); xlrec->rnode.relNode, xlrec->forknum);
} }
else if (info == XLOG_SMGR_TRUNCATE) else if (info == XLOG_SMGR_TRUNCATE)
{ {
xl_smgr_truncate *xlrec = (xl_smgr_truncate *) rec; xl_smgr_truncate *xlrec = (xl_smgr_truncate *) rec;
appendStringInfo(buf, "file truncate: %u/%u/%u to %u blocks", appendStringInfo(buf, "file truncate: %u/%u/%u/%u to %u blocks",
xlrec->rnode.spcNode, xlrec->rnode.dbNode, xlrec->rnode.spcNode, xlrec->rnode.dbNode,
xlrec->rnode.relNode, xlrec->blkno); xlrec->rnode.relNode, xlrec->forknum,
xlrec->blkno);
} }
else else
appendStringInfo(buf, "UNKNOWN"); appendStringInfo(buf, "UNKNOWN");
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* Copyright (c) 2002-2008, PostgreSQL Global Development Group * Copyright (c) 2002-2008, PostgreSQL Global Development Group
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/dbsize.c,v 1.19 2008/06/19 00:46:05 alvherre Exp $ * $PostgreSQL: pgsql/src/backend/utils/adt/dbsize.c,v 1.20 2008/08/11 11:05:11 heikki Exp $
* *
*/ */
...@@ -255,7 +255,8 @@ calculate_relation_size(RelFileNode *rfn) ...@@ -255,7 +255,8 @@ calculate_relation_size(RelFileNode *rfn)
char pathname[MAXPGPATH]; char pathname[MAXPGPATH];
unsigned int segcount = 0; unsigned int segcount = 0;
relationpath = relpath(*rfn); /* XXX: This ignores the other forks. */
relationpath = relpath(*rfn, MAIN_FORKNUM);
for (segcount = 0;; segcount++) for (segcount = 0;; segcount++)
{ {
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.137 2008/06/19 00:46:06 alvherre Exp $ * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.138 2008/08/11 11:05:11 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -127,7 +127,8 @@ extern XLogRecPtr log_heap_clean(Relation reln, Buffer buffer, ...@@ -127,7 +127,8 @@ extern XLogRecPtr log_heap_clean(Relation reln, Buffer buffer,
extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer, extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer,
TransactionId cutoff_xid, TransactionId cutoff_xid,
OffsetNumber *offsets, int offcnt); OffsetNumber *offsets, int offcnt);
extern XLogRecPtr log_newpage(RelFileNode *rnode, BlockNumber blk, Page page); extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
BlockNumber blk, Page page);
/* in heap/pruneheap.c */ /* in heap/pruneheap.c */
extern void heap_page_prune_opt(Relation relation, Buffer buffer, extern void heap_page_prune_opt(Relation relation, Buffer buffer,
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/htup.h,v 1.100 2008/07/13 20:45:47 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/htup.h,v 1.101 2008/08/11 11:05:11 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -670,6 +670,7 @@ typedef struct xl_heap_clean ...@@ -670,6 +670,7 @@ typedef struct xl_heap_clean
typedef struct xl_heap_newpage typedef struct xl_heap_newpage
{ {
RelFileNode node; RelFileNode node;
ForkNumber forknum;
BlockNumber blkno; /* location of new page */ BlockNumber blkno; /* location of new page */
/* entire page contents follow at end of record */ /* entire page contents follow at end of record */
} xl_heap_newpage; } xl_heap_newpage;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/xact.h,v 1.94 2008/03/04 19:54:06 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.95 2008/08/11 11:05:11 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -88,10 +88,10 @@ typedef void (*SubXactCallback) (SubXactEvent event, SubTransactionId mySubid, ...@@ -88,10 +88,10 @@ typedef void (*SubXactCallback) (SubXactEvent event, SubTransactionId mySubid,
typedef struct xl_xact_commit typedef struct xl_xact_commit
{ {
TimestampTz xact_time; /* time of commit */ TimestampTz xact_time; /* time of commit */
int nrels; /* number of RelFileNodes */ int nrels; /* number of RelFileForks */
int nsubxacts; /* number of subtransaction XIDs */ int nsubxacts; /* number of subtransaction XIDs */
/* Array of RelFileNode(s) to drop at commit */ /* Array of RelFileFork(s) to drop at commit */
RelFileNode xnodes[1]; /* VARIABLE LENGTH ARRAY */ RelFileFork xnodes[1]; /* VARIABLE LENGTH ARRAY */
/* ARRAY OF COMMITTED SUBTRANSACTION XIDs FOLLOWS */ /* ARRAY OF COMMITTED SUBTRANSACTION XIDs FOLLOWS */
} xl_xact_commit; } xl_xact_commit;
...@@ -100,10 +100,10 @@ typedef struct xl_xact_commit ...@@ -100,10 +100,10 @@ typedef struct xl_xact_commit
typedef struct xl_xact_abort typedef struct xl_xact_abort
{ {
TimestampTz xact_time; /* time of abort */ TimestampTz xact_time; /* time of abort */
int nrels; /* number of RelFileNodes */ int nrels; /* number of RelFileForks */
int nsubxacts; /* number of subtransaction XIDs */ int nsubxacts; /* number of subtransaction XIDs */
/* Array of RelFileNode(s) to drop at abort */ /* Array of RelFileFork(s) to drop at abort */
RelFileNode xnodes[1]; /* VARIABLE LENGTH ARRAY */ RelFileFork xnodes[1]; /* VARIABLE LENGTH ARRAY */
/* ARRAY OF ABORTED SUBTRANSACTION XIDs FOLLOWS */ /* ARRAY OF ABORTED SUBTRANSACTION XIDs FOLLOWS */
} xl_xact_abort; } xl_xact_abort;
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.23 2008/02/17 02:09:30 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.24 2008/08/11 11:05:11 heikki Exp $
*/ */
#ifndef XLOG_INTERNAL_H #ifndef XLOG_INTERNAL_H
#define XLOG_INTERNAL_H #define XLOG_INTERNAL_H
...@@ -40,6 +40,7 @@ ...@@ -40,6 +40,7 @@
typedef struct BkpBlock typedef struct BkpBlock
{ {
RelFileNode node; /* relation containing block */ RelFileNode node; /* relation containing block */
ForkNumber fork; /* fork within the relation */
BlockNumber block; /* block number */ BlockNumber block; /* block number */
uint16 hole_offset; /* number of bytes before "hole" */ uint16 hole_offset; /* number of bytes before "hole" */
uint16 hole_length; /* number of bytes in "hole" */ uint16 hole_length; /* number of bytes in "hole" */
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/xlogutils.h,v 1.25 2008/06/19 00:46:06 alvherre Exp $ * $PostgreSQL: pgsql/src/include/access/xlogutils.h,v 1.26 2008/08/11 11:05:11 heikki Exp $
*/ */
#ifndef XLOG_UTILS_H #ifndef XLOG_UTILS_H
#define XLOG_UTILS_H #define XLOG_UTILS_H
...@@ -19,11 +19,14 @@ ...@@ -19,11 +19,14 @@
extern void XLogCheckInvalidPages(void); extern void XLogCheckInvalidPages(void);
extern void XLogDropRelation(RelFileNode rnode); extern void XLogDropRelation(RelFileNode rnode, ForkNumber forknum);
extern void XLogDropDatabase(Oid dbid); extern void XLogDropDatabase(Oid dbid);
extern void XLogTruncateRelation(RelFileNode rnode, BlockNumber nblocks); extern void XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
BlockNumber nblocks);
extern Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init); extern Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init);
extern Buffer XLogReadBufferWithFork(RelFileNode rnode, ForkNumber forknum,
BlockNumber blkno, bool init);
extern Relation CreateFakeRelcacheEntry(RelFileNode rnode); extern Relation CreateFakeRelcacheEntry(RelFileNode rnode);
extern void FreeFakeRelcacheEntry(Relation fakerel); extern void FreeFakeRelcacheEntry(Relation fakerel);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/catalog/catalog.h,v 1.40 2008/06/19 00:46:06 alvherre Exp $ * $PostgreSQL: pgsql/src/include/catalog/catalog.h,v 1.41 2008/08/11 11:05:11 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
#include "utils/relcache.h" #include "utils/relcache.h"
extern char *relpath(RelFileNode rnode); extern char *relpath(RelFileNode rnode, ForkNumber forknum);
extern char *GetDatabasePath(Oid dbNode, Oid spcNode); extern char *GetDatabasePath(Oid dbNode, Oid spcNode);
extern bool IsSystemRelation(Relation relation); extern bool IsSystemRelation(Relation relation);
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
* *
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/include/postmaster/bgwriter.h,v 1.11 2008/01/01 19:45:58 momjian Exp $ * $PostgreSQL: pgsql/src/include/postmaster/bgwriter.h,v 1.12 2008/08/11 11:05:11 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -27,7 +27,8 @@ extern void BackgroundWriterMain(void); ...@@ -27,7 +27,8 @@ extern void BackgroundWriterMain(void);
extern void RequestCheckpoint(int flags); extern void RequestCheckpoint(int flags);
extern void CheckpointWriteDelay(int flags, double progress); extern void CheckpointWriteDelay(int flags, double progress);
extern bool ForwardFsyncRequest(RelFileNode rnode, BlockNumber segno); extern bool ForwardFsyncRequest(RelFileNode rnode, ForkNumber forknum,
BlockNumber segno);
extern void AbsorbFsyncRequests(void); extern void AbsorbFsyncRequests(void);
extern Size BgWriterShmemSize(void); extern Size BgWriterShmemSize(void);
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.97 2008/06/19 00:46:06 alvherre Exp $ * $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.98 2008/08/11 11:05:11 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -65,6 +65,7 @@ typedef bits16 BufFlags; ...@@ -65,6 +65,7 @@ typedef bits16 BufFlags;
typedef struct buftag typedef struct buftag
{ {
RelFileNode rnode; /* physical relation identifier */ RelFileNode rnode; /* physical relation identifier */
ForkNumber forkNum;
BlockNumber blockNum; /* blknum relative to begin of reln */ BlockNumber blockNum; /* blknum relative to begin of reln */
} BufferTag; } BufferTag;
...@@ -73,19 +74,22 @@ typedef struct buftag ...@@ -73,19 +74,22 @@ typedef struct buftag
(a).rnode.spcNode = InvalidOid, \ (a).rnode.spcNode = InvalidOid, \
(a).rnode.dbNode = InvalidOid, \ (a).rnode.dbNode = InvalidOid, \
(a).rnode.relNode = InvalidOid, \ (a).rnode.relNode = InvalidOid, \
(a).forkNum = InvalidForkNumber, \
(a).blockNum = InvalidBlockNumber \ (a).blockNum = InvalidBlockNumber \
) )
#define INIT_BUFFERTAG(a,xx_rnode,xx_blockNum) \ #define INIT_BUFFERTAG(a,xx_rnode,xx_forkNum,xx_blockNum) \
( \ ( \
(a).rnode = (xx_rnode), \ (a).rnode = (xx_rnode), \
(a).forkNum = (xx_forkNum), \
(a).blockNum = (xx_blockNum) \ (a).blockNum = (xx_blockNum) \
) )
#define BUFFERTAGS_EQUAL(a,b) \ #define BUFFERTAGS_EQUAL(a,b) \
( \ ( \
RelFileNodeEquals((a).rnode, (b).rnode) && \ RelFileNodeEquals((a).rnode, (b).rnode) && \
(a).blockNum == (b).blockNum \ (a).blockNum == (b).blockNum && \
(a).forkNum == (b).forkNum \
) )
/* /*
...@@ -202,10 +206,10 @@ extern int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id); ...@@ -202,10 +206,10 @@ extern int BufTableInsert(BufferTag *tagPtr, uint32 hashcode, int buf_id);
extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode); extern void BufTableDelete(BufferTag *tagPtr, uint32 hashcode);
/* localbuf.c */ /* localbuf.c */
extern BufferDesc *LocalBufferAlloc(SMgrRelation reln, BlockNumber blockNum, extern BufferDesc *LocalBufferAlloc(SMgrRelation reln, ForkNumber forkNum,
bool *foundPtr); BlockNumber blockNum, bool *foundPtr);
extern void MarkLocalBufferDirty(Buffer buffer); extern void MarkLocalBufferDirty(Buffer buffer);
extern void DropRelFileNodeLocalBuffers(RelFileNode rnode, extern void DropRelFileNodeLocalBuffers(RelFileNode rnode, ForkNumber forkNum,
BlockNumber firstDelBlock); BlockNumber firstDelBlock);
extern void AtEOXact_LocalBuffers(bool isCommit); extern void AtEOXact_LocalBuffers(bool isCommit);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.114 2008/06/19 00:46:06 alvherre Exp $ * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.115 2008/08/11 11:05:11 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -144,11 +144,13 @@ extern PGDLLIMPORT int32 *LocalRefCount; ...@@ -144,11 +144,13 @@ extern PGDLLIMPORT int32 *LocalRefCount;
* prototypes for functions in bufmgr.c * prototypes for functions in bufmgr.c
*/ */
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum); extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
extern Buffer ReadBufferWithFork(Relation reln, ForkNumber forkNum, BlockNumber blockNum);
extern Buffer ReadBufferWithStrategy(Relation reln, BlockNumber blockNum, extern Buffer ReadBufferWithStrategy(Relation reln, BlockNumber blockNum,
BufferAccessStrategy strategy); BufferAccessStrategy strategy);
extern Buffer ReadOrZeroBuffer(Relation reln, BlockNumber blockNum); extern Buffer ReadOrZeroBuffer(Relation reln, ForkNumber forkNum,
BlockNumber blockNum);
extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp, extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp,
BlockNumber blockNum, bool zeroPage); ForkNumber forkNum, BlockNumber blockNum, bool zeroPage);
extern void ReleaseBuffer(Buffer buffer); extern void ReleaseBuffer(Buffer buffer);
extern void UnlockReleaseBuffer(Buffer buffer); extern void UnlockReleaseBuffer(Buffer buffer);
extern void MarkBufferDirty(Buffer buffer); extern void MarkBufferDirty(Buffer buffer);
...@@ -169,15 +171,16 @@ extern BlockNumber RelationGetNumberOfBlocks(Relation relation); ...@@ -169,15 +171,16 @@ extern BlockNumber RelationGetNumberOfBlocks(Relation relation);
extern void RelationTruncate(Relation rel, BlockNumber nblocks); extern void RelationTruncate(Relation rel, BlockNumber nblocks);
extern void FlushRelationBuffers(Relation rel); extern void FlushRelationBuffers(Relation rel);
extern void FlushDatabaseBuffers(Oid dbid); extern void FlushDatabaseBuffers(Oid dbid);
extern void DropRelFileNodeBuffers(RelFileNode rnode, bool istemp, extern void DropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum,
BlockNumber firstDelBlock); bool istemp, BlockNumber firstDelBlock);
extern void DropDatabaseBuffers(Oid dbid); extern void DropDatabaseBuffers(Oid dbid);
#ifdef NOT_USED #ifdef NOT_USED
extern void PrintPinnedBufs(void); extern void PrintPinnedBufs(void);
#endif #endif
extern Size BufferShmemSize(void); extern Size BufferShmemSize(void);
extern RelFileNode BufferGetFileNode(Buffer buffer); extern void BufferGetTag(Buffer buffer, RelFileNode *rnode,
ForkNumber *forknum, BlockNumber *blknum);
extern void SetBufferCommitInfoNeedsSave(Buffer buffer); extern void SetBufferCommitInfoNeedsSave(Buffer buffer);
......
...@@ -7,16 +7,33 @@ ...@@ -7,16 +7,33 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/storage/relfilenode.h,v 1.15 2008/01/01 19:45:59 momjian Exp $ * $PostgreSQL: pgsql/src/include/storage/relfilenode.h,v 1.16 2008/08/11 11:05:11 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#ifndef RELFILENODE_H #ifndef RELFILENODE_H
#define RELFILENODE_H #define RELFILENODE_H
/*
* The physical storage of a relation consists of one or more forks. The
* main fork is always created, but in addition to that there can be
* additional forks for storing various metadata. ForkNumber is used when
* we need to refer to a specific fork in a relation.
*/
typedef enum ForkNumber
{
InvalidForkNumber = -1,
MAIN_FORKNUM = 0
/* NOTE: change NUM_FORKS below when you add new forks */
} ForkNumber;
#define MAX_FORKNUM MAIN_FORKNUM
/* /*
* RelFileNode must provide all that we need to know to physically access * RelFileNode must provide all that we need to know to physically access
* a relation. * a relation. Note, however, that a "physical" relation is comprised of
* multiple files on the filesystem, as each fork is stored as a separate
* file, and each fork can be divided into multiple segments. See md.c.
* *
* spcNode identifies the tablespace of the relation. It corresponds to * spcNode identifies the tablespace of the relation. It corresponds to
* pg_tablespace.oid. * pg_tablespace.oid.
...@@ -57,4 +74,13 @@ typedef struct RelFileNode ...@@ -57,4 +74,13 @@ typedef struct RelFileNode
(node1).dbNode == (node2).dbNode && \ (node1).dbNode == (node2).dbNode && \
(node1).spcNode == (node2).spcNode) (node1).spcNode == (node2).spcNode)
/*
* RelFileFork identifies a particular fork of a relation.
*/
typedef struct RelFileFork
{
RelFileNode rnode;
ForkNumber forknum;
} RelFileFork;
#endif /* RELFILENODE_H */ #endif /* RELFILENODE_H */
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.62 2008/01/01 19:45:59 momjian Exp $ * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.63 2008/08/11 11:05:11 heikki Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -51,7 +51,8 @@ typedef struct SMgrRelationData ...@@ -51,7 +51,8 @@ typedef struct SMgrRelationData
*/ */
int smgr_which; /* storage manager selector */ int smgr_which; /* storage manager selector */
struct _MdfdVec *md_fd; /* for md.c; NULL if not open */ /* for md.c; NULL for forks that are not open */
struct _MdfdVec *md_fd[MAX_FORKNUM + 1];
} SMgrRelationData; } SMgrRelationData;
typedef SMgrRelationData *SMgrRelation; typedef SMgrRelationData *SMgrRelation;
...@@ -59,24 +60,29 @@ typedef SMgrRelationData *SMgrRelation; ...@@ -59,24 +60,29 @@ typedef SMgrRelationData *SMgrRelation;
extern void smgrinit(void); extern void smgrinit(void);
extern SMgrRelation smgropen(RelFileNode rnode); extern SMgrRelation smgropen(RelFileNode rnode);
extern bool smgrexists(SMgrRelation reln, ForkNumber forknum);
extern void smgrsetowner(SMgrRelation *owner, SMgrRelation reln); extern void smgrsetowner(SMgrRelation *owner, SMgrRelation reln);
extern void smgrclose(SMgrRelation reln); extern void smgrclose(SMgrRelation reln);
extern void smgrcloseall(void); extern void smgrcloseall(void);
extern void smgrclosenode(RelFileNode rnode); extern void smgrclosenode(RelFileNode rnode);
extern void smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo); extern void smgrcreate(SMgrRelation reln, ForkNumber forknum,
extern void smgrscheduleunlink(SMgrRelation reln, bool isTemp); bool isTemp, bool isRedo);
extern void smgrdounlink(SMgrRelation reln, bool isTemp, bool isRedo); extern void smgrscheduleunlink(SMgrRelation reln, ForkNumber forknum,
extern void smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp);
bool isTemp); extern void smgrdounlink(SMgrRelation reln, ForkNumber forknum,
extern void smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer); bool isTemp, bool isRedo);
extern void smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, extern void smgrextend(SMgrRelation reln, ForkNumber forknum,
bool isTemp); BlockNumber blocknum, char *buffer, bool isTemp);
extern BlockNumber smgrnblocks(SMgrRelation reln); extern void smgrread(SMgrRelation reln, ForkNumber forknum,
extern void smgrtruncate(SMgrRelation reln, BlockNumber nblocks, BlockNumber blocknum, char *buffer);
bool isTemp); extern void smgrwrite(SMgrRelation reln, ForkNumber forknum,
extern void smgrimmedsync(SMgrRelation reln); BlockNumber blocknum, char *buffer, bool isTemp);
extern BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum);
extern void smgrtruncate(SMgrRelation reln, ForkNumber forknum,
BlockNumber nblocks, bool isTemp);
extern void smgrimmedsync(SMgrRelation reln, ForkNumber forknum);
extern void smgrDoPendingDeletes(bool isCommit); extern void smgrDoPendingDeletes(bool isCommit);
extern int smgrGetPendingDeletes(bool forCommit, RelFileNode **ptr, extern int smgrGetPendingDeletes(bool forCommit, RelFileFork **ptr,
bool *haveNonTemp); bool *haveNonTemp);
extern void AtSubCommit_smgr(void); extern void AtSubCommit_smgr(void);
extern void AtSubAbort_smgr(void); extern void AtSubAbort_smgr(void);
...@@ -95,23 +101,27 @@ extern void smgr_desc(StringInfo buf, uint8 xl_info, char *rec); ...@@ -95,23 +101,27 @@ extern void smgr_desc(StringInfo buf, uint8 xl_info, char *rec);
/* in md.c */ /* in md.c */
extern void mdinit(void); extern void mdinit(void);
extern void mdclose(SMgrRelation reln); extern void mdclose(SMgrRelation reln, ForkNumber forknum);
extern void mdcreate(SMgrRelation reln, bool isRedo); extern void mdcreate(SMgrRelation reln, ForkNumber forknum, bool isRedo);
extern void mdunlink(RelFileNode rnode, bool isRedo); extern bool mdexists(SMgrRelation reln, ForkNumber forknum);
extern void mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, extern void mdunlink(RelFileNode rnode, ForkNumber forknum, bool isRedo);
bool isTemp); extern void mdextend(SMgrRelation reln, ForkNumber forknum,
extern void mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer); BlockNumber blocknum, char *buffer, bool isTemp);
extern void mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
bool isTemp); char *buffer);
extern BlockNumber mdnblocks(SMgrRelation reln); extern void mdwrite(SMgrRelation reln, ForkNumber forknum,
extern void mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp); BlockNumber blocknum, char *buffer, bool isTemp);
extern void mdimmedsync(SMgrRelation reln); extern BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum);
extern void mdtruncate(SMgrRelation reln, ForkNumber forknum,
BlockNumber nblocks, bool isTemp);
extern void mdimmedsync(SMgrRelation reln, ForkNumber forknum);
extern void mdpreckpt(void); extern void mdpreckpt(void);
extern void mdsync(void); extern void mdsync(void);
extern void mdpostckpt(void); extern void mdpostckpt(void);
extern void RememberFsyncRequest(RelFileNode rnode, BlockNumber segno); extern void RememberFsyncRequest(RelFileNode rnode, ForkNumber forknum,
extern void ForgetRelationFsyncRequests(RelFileNode rnode); BlockNumber segno);
extern void ForgetRelationFsyncRequests(RelFileNode rnode, ForkNumber forknum);
extern void ForgetDatabaseFsyncRequests(Oid dbid); extern void ForgetDatabaseFsyncRequests(Oid dbid);
/* smgrtype.c */ /* smgrtype.c */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment