Commit ef072219 authored by Tom Lane's avatar Tom Lane

Clean up smgr.c/md.c APIs as per discussion a couple months ago. Instead of

having md.c return a success/failure boolean to smgr.c, which was just going
to elog anyway, let md.c issue the elog messages itself.  This allows better
error reporting, particularly in cases such as "short read" or "short write"
which Peter was complaining of.  Also, remove the kluge of allowing mdread()
to return zeroes from a read-beyond-EOF: this is now an error condition
except when InRecovery or zero_damaged_pages = true.  (Hash indexes used to
require that behavior, but no more.)  Also, enforce that mdwrite() is to be
used for rewriting existing blocks while mdextend() is to be used for
extending the relation EOF.  This restriction lets us get rid of the old
ad-hoc defense against creating huge files by an accidental reference to
a bogus block number: we'll only create new segments in mdextend() not
mdwrite() or mdread().  (Again, when InRecovery we allow it anyway, since
we need to allow updates of blocks that were later truncated away.)
Also, clean up the original makeshift patch for bug #2737: move the
responsibility for padding relation segments to full length into md.c.
parent 990fea84
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.61 2006/11/19 21:33:23 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.62 2007/01/03 18:11:01 tgl Exp $
*
* NOTES
* Postgres hash pages look like ordinary relation pages. The opaque
......@@ -533,10 +533,8 @@ fail:
*
* This does not need to initialize the new bucket pages; we'll do that as
* each one is used by _hash_expandtable(). But we have to extend the logical
* EOF to the end of the splitpoint; otherwise the first overflow page
* allocated beyond the splitpoint will represent a noncontiguous access,
* which can confuse md.c (and will probably be forbidden by future changes
* to md.c).
* EOF to the end of the splitpoint; this keeps smgr's idea of the EOF in
* sync with ours, so that overflow-page allocation works correctly.
*
* We do this by writing a page of zeroes at the end of the splitpoint range.
* We expect that the filesystem will ensure that the intervening pages read
......@@ -559,7 +557,6 @@ _hash_alloc_buckets(Relation rel, uint32 nblocks)
{
BlockNumber firstblock;
BlockNumber lastblock;
BlockNumber endblock;
char zerobuf[BLCKSZ];
/*
......@@ -577,24 +574,9 @@ _hash_alloc_buckets(Relation rel, uint32 nblocks)
if (lastblock < firstblock || lastblock == InvalidBlockNumber)
return InvalidBlockNumber;
/* Note: we assume RelationGetNumberOfBlocks did RelationOpenSmgr for us */
MemSet(zerobuf, 0, sizeof(zerobuf));
/*
* XXX If the extension results in creation of new segment files,
* we have to make sure that each non-last file is correctly filled out to
* RELSEG_SIZE blocks. This ought to be done inside mdextend, but
* changing the smgr API seems best left for development cycle not late
* beta. Temporary fix for bug #2737.
*/
#ifndef LET_OS_MANAGE_FILESIZE
for (endblock = firstblock | (RELSEG_SIZE - 1);
endblock < lastblock;
endblock += RELSEG_SIZE)
smgrextend(rel->rd_smgr, endblock, zerobuf, rel->rd_istemp);
#endif
/* Note: we assume RelationGetNumberOfBlocks did RelationOpenSmgr for us */
smgrextend(rel->rd_smgr, lastblock, zerobuf, rel->rd_istemp);
return firstblock;
......
......@@ -36,9 +36,9 @@
* that is of no value (since other backends have no interest in them yet)
* and it created locking problems for CHECKPOINT, because the upper-level
* pages were held exclusive-locked for long periods. Now we just build
* the pages in local memory and smgrwrite() them as we finish them. They
* will need to be re-read into shared buffers on first use after the build
* finishes.
* the pages in local memory and smgrwrite or smgrextend them as we finish
* them. They will need to be re-read into shared buffers on first use after
* the build finishes.
*
* Since the index will never be used unless it is completely built,
* from a crash-recovery point of view there is no need to WAL-log the
......@@ -57,7 +57,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.107 2006/10/04 00:29:49 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.108 2007/01/03 18:11:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -309,9 +309,9 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
{
if (!wstate->btws_zeropage)
wstate->btws_zeropage = (Page) palloc0(BLCKSZ);
smgrwrite(wstate->index->rd_smgr, wstate->btws_pages_written++,
(char *) wstate->btws_zeropage,
true);
smgrextend(wstate->index->rd_smgr, wstate->btws_pages_written++,
(char *) wstate->btws_zeropage,
true);
}
/*
......@@ -319,10 +319,17 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
* index, because there's no need for smgr to schedule an fsync for this
* write; we'll do it ourselves before ending the build.
*/
smgrwrite(wstate->index->rd_smgr, blkno, (char *) page, true);
if (blkno == wstate->btws_pages_written)
{
/* extending the file... */
smgrextend(wstate->index->rd_smgr, blkno, (char *) page, true);
wstate->btws_pages_written++;
}
else
{
/* overwriting a block we zero-filled before */
smgrwrite(wstate->index->rd_smgr, blkno, (char *) page, true);
}
pfree(page);
}
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.208 2006/12/30 21:21:53 tgl Exp $
* $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.209 2007/01/03 18:11:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -6083,7 +6083,7 @@ copy_relation_data(Relation rel, SMgrRelation dst)
* rel, because there's no need for smgr to schedule an fsync for this
* write; we'll do it ourselves below.
*/
smgrwrite(dst, blkno, buf, true);
smgrextend(dst, blkno, buf, true);
}
/*
......
This diff is collapsed.
This diff is collapsed.
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.55 2006/03/24 04:32:13 tgl Exp $
* $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.56 2007/01/03 18:11:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -72,7 +72,7 @@ extern void smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer);
extern void smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer,
bool isTemp);
extern BlockNumber smgrnblocks(SMgrRelation reln);
extern BlockNumber smgrtruncate(SMgrRelation reln, BlockNumber nblocks,
extern void smgrtruncate(SMgrRelation reln, BlockNumber nblocks,
bool isTemp);
extern void smgrimmedsync(SMgrRelation reln);
extern void smgrDoPendingDeletes(bool isCommit);
......@@ -91,20 +91,19 @@ extern void smgr_desc(StringInfo buf, uint8 xl_info, char *rec);
/* internals: move me elsewhere -- ay 7/94 */
/* in md.c */
extern bool mdinit(void);
extern bool mdclose(SMgrRelation reln);
extern bool mdcreate(SMgrRelation reln, bool isRedo);
extern bool mdunlink(RelFileNode rnode, bool isRedo);
extern bool mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer,
extern void mdinit(void);
extern void mdclose(SMgrRelation reln);
extern void mdcreate(SMgrRelation reln, bool isRedo);
extern void mdunlink(RelFileNode rnode, bool isRedo);
extern void mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer,
bool isTemp);
extern bool mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer);
extern bool mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer,
extern void mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer);
extern void mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer,
bool isTemp);
extern BlockNumber mdnblocks(SMgrRelation reln);
extern BlockNumber mdtruncate(SMgrRelation reln, BlockNumber nblocks,
bool isTemp);
extern bool mdimmedsync(SMgrRelation reln);
extern bool mdsync(void);
extern void mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp);
extern void mdimmedsync(SMgrRelation reln);
extern void mdsync(void);
extern void RememberFsyncRequest(RelFileNode rnode, BlockNumber segno);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment