Commit 1b67fe17 authored by Vadim B. Mikheev's avatar Vadim B. Mikheev

heap' logging

parent 80c64695
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.74 2000/07/02 22:00:27 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.75 2000/07/03 02:54:15 vadim Exp $
*
*
* INTERFACE ROUTINES
......@@ -1271,10 +1271,9 @@ heap_get_latest_tid(Relation relation,
Oid
heap_insert(Relation relation, HeapTuple tup)
{
/* ----------------
* increment access statistics
* ----------------
*/
Buffer buffer;
/* increment access statistics */
tup->tableOid = relation->rd_id;
IncrHeapAccessStat(local_insert);
IncrHeapAccessStat(global_insert);
......@@ -1300,7 +1299,11 @@ heap_insert(Relation relation, HeapTuple tup)
tup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
tup->t_data->t_infomask |= HEAP_XMAX_INVALID;
RelationPutHeapTupleAtEnd(relation, tup);
/* Find buffer for this tuple */
buffer = RelationGetBufferForTuple(relation, tup->t_len, InvalidBuffer);
/* NO ELOG(ERROR) from here till changes are logged */
RelationPutHeapTuple(relation, buffer, tup);
#ifdef XLOG
/* XLOG stuff */
......@@ -1308,7 +1311,8 @@ heap_insert(Relation relation, HeapTuple tup)
xl_heap_insert xlrec;
xlrec.itid.dbId = relation->rd_lockInfo.lockRelId.dbId;
xlrec.itid.relId = relation->rd_lockInfo.lockRelId.relId;
XXX xlrec.itid.tid = tp.t_self;
xlrec.itid.cid = GetCurrentCommandId();
xlrec.itid.tid = tup->t_self;
xlrec.t_natts = tup->t_data->t_natts;
xlrec.t_oid = tup->t_data->t_oid;
xlrec.t_hoff = tup->t_data->t_hoff;
......@@ -1319,10 +1323,14 @@ XXX xlrec.itid.tid = tp.t_self;
(char*) tup->t_data + offsetof(HeapTupleHeaderData, tbits),
tup->t_len - offsetof(HeapTupleHeaderData, tbits));
dp->pd_lsn = recptr;
((PageHeader) BufferGetPage(buffer))->pd_lsn = recptr;
((PageHeader) BufferGetPage(buffer))->pd_sui = ThisStartUpID;
}
#endif
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
if (IsSystemRelationName(RelationGetRelationName(relation)))
RelationMark4RollbackHeapTuple(relation, tup);
......@@ -1417,11 +1425,13 @@ l1:
xl_heap_delete xlrec;
xlrec.dtid.dbId = relation->rd_lockInfo.lockRelId.dbId;
xlrec.dtid.relId = relation->rd_lockInfo.lockRelId.relId;
xlrec.dtid.cid = GetCurrentCommandId();
xlrec.dtid.tid = tp.t_self;
XLogRecPtr recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE,
(char*) xlrec, sizeof(xlrec), NULL, 0);
dp->pd_lsn = recptr;
dp->pd_sui = ThisStartUpID;
}
#endif
......@@ -1451,7 +1461,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
ItemId lp;
HeapTupleData oldtup;
PageHeader dp;
Buffer buffer;
Buffer buffer, newbuf;
int result;
newtup->tableOid = relation->rd_id;
......@@ -1531,43 +1541,65 @@ l2:
newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
newtup->t_data->t_infomask |= (HEAP_XMAX_INVALID | HEAP_UPDATED);
/* logically delete old item */
/* Find buffer for new tuple */
if ((unsigned) MAXALIGN(newtup->t_len) <= PageGetFreeSpace((Page) dp))
newbuf = buffer;
else
newbuf = RelationGetBufferForTuple(relation, newtup->t_len, buffer);
/* NO ELOG(ERROR) from here till changes are logged */
/* insert new tuple */
RelationPutHeapTuple(relation, newbuf, newtup);
/* logically delete old tuple */
TransactionIdStore(GetCurrentTransactionId(), &(oldtup.t_data->t_xmax));
oldtup.t_data->t_cmax = GetCurrentCommandId();
oldtup.t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
HEAP_XMAX_INVALID | HEAP_MARKED_FOR_UPDATE);
/* insert new item */
if ((unsigned) MAXALIGN(newtup->t_len) <= PageGetFreeSpace((Page) dp))
RelationPutHeapTuple(relation, buffer, newtup);
else
/* record address of new tuple in t_ctid of old one */
oldtup.t_data->t_ctid = newtup->t_self;
#ifdef XLOG
/* XLOG stuff */
{
xl_heap_update xlrec;
xlrec.dtid.dbId = relation->rd_lockInfo.lockRelId.dbId;
xlrec.dtid.relId = relation->rd_lockInfo.lockRelId.relId;
xlrec.dtid.cid = GetCurrentCommandId();
xlrec.itid.tid = newtup->t_self;
xlrec.t_natts = newtup->t_data->t_natts;
xlrec.t_hoff = newtup->t_data->t_hoff;
xlrec.mask = newtup->t_data->t_infomask;
XLogRecPtr recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_UPDATE,
(char*) xlrec, sizeof(xlrec),
(char*) newtup->t_data + offsetof(HeapTupleHeaderData, tbits),
newtup->t_len - offsetof(HeapTupleHeaderData, tbits));
/*
* New item won't fit on same page as old item, have to look for a
* new place to put it. Note that we have to unlock current buffer
* context - not good but RelationPutHeapTupleAtEnd uses extend
* lock.
*/
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
RelationPutHeapTupleAtEnd(relation, newtup);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
if (newbuf != buffer)
{
((PageHeader) BufferGetPage(newbuf))->pd_lsn = recptr;
((PageHeader) BufferGetPage(newbuf))->pd_sui = ThisStartUpID;
}
((PageHeader) BufferGetPage(buffer))->pd_lsn = recptr;
((PageHeader) BufferGetPage(buffer))->pd_sui = ThisStartUpID;
}
/* mark for rollback caches */
RelationMark4RollbackHeapTuple(relation, newtup);
/*
* New item in place, now record address of new tuple in t_ctid of old
* one.
*/
oldtup.t_data->t_ctid = newtup->t_self;
#endif
if (newbuf != buffer)
{
LockBuffer(newbuf, BUFFER_LOCK_UNLOCK);
WriteBuffer(newbuf);
}
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
/* invalidate caches */
RelationInvalidateHeapTuple(relation, &oldtup);
WriteBuffer(buffer);
RelationMark4RollbackHeapTuple(relation, newtup);
return HeapTupleMayBeUpdated;
}
......@@ -1648,6 +1680,14 @@ l3:
return result;
}
#ifdef XLOG
/*
* XLOG stuff: no logging is required as long as we have no
* savepoints. For savepoints private log could be used...
*/
((PageHeader) BufferGetPage(*buffer))->pd_sui = ThisStartUpID;
#endif
/* store transaction information of xact marking the tuple */
TransactionIdStore(GetCurrentTransactionId(), &(tuple->t_data->t_xmax));
tuple->t_data->t_cmax = GetCurrentCommandId();
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Id: hio.c,v 1.31 2000/04/12 17:14:45 momjian Exp $
* $Id: hio.c,v 1.32 2000/07/03 02:54:15 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -19,17 +19,11 @@
#include "access/hio.h"
/*
* amputunique - place tuple at tid
* Currently on errors, calls elog. Perhaps should return -1?
* Possible errors include the addition of a tuple to the page
* between the time the linep is chosen and the page is L_UP'd.
* RelationPutHeapTuple - place tuple at specified page
*
* This should be coordinated with the B-tree code.
* Probably needs to have an amdelunique to allow for
* internal index records to be deleted and reordered as needed.
* For the heap AM, this should never be needed.
* !!! ELOG(ERROR) IS DISALLOWED HERE !!!
*
* Note - we assume that caller hold BUFFER_LOCK_EXCLUSIVE on the buffer.
* Note - we assume that caller hold BUFFER_LOCK_EXCLUSIVE on the buffer.
*
*/
void
......@@ -57,62 +51,41 @@ RelationPutHeapTuple(Relation relation,
offnum = PageAddItem((Page) pageHeader, (Item) tuple->t_data,
tuple->t_len, InvalidOffsetNumber, LP_USED);
if (offnum == InvalidOffsetNumber)
elog(STOP, "RelationPutHeapTuple: failed to add tuple");
itemId = PageGetItemId((Page) pageHeader, offnum);
item = PageGetItem((Page) pageHeader, itemId);
ItemPointerSet(&((HeapTupleHeader) item)->t_ctid,
BufferGetBlockNumber(buffer), offnum);
/*
* Let the caller do this!
*
* WriteBuffer(buffer);
*/
/* return an accurate tuple */
ItemPointerSet(&tuple->t_self, BufferGetBlockNumber(buffer), offnum);
}
/*
* This routine is another in the series of attempts to reduce the number
* of I/O's and system calls executed in the various benchmarks. In
* particular, this routine is used to append data to the end of a relation
* file without excessive lseeks. This code should do no more than 2 semops
* in the ideal case.
* RelationGetBufferForTuple
*
* Eventually, we should cache the number of blocks in a relation somewhere.
* Until that time, this code will have to do an lseek to determine the number
* of blocks in a relation.
* Returns (locked) buffer to add tuple with given len.
* If Ubuf is valid then no attempt to lock it should be made -
* this is for heap_update...
*
* This code should ideally do at most 4 semops, 1 lseek, and possibly 1 write
* to do an append; it's possible to eliminate 2 of the semops if we do direct
* buffer stuff (!); the lseek and the write can go if we get
* RelationGetNumberOfBlocks to be useful.
* ELOG(ERROR) is allowed here, so this routine *must* be called
* before any (unlogged) changes are made in buffer pool.
*
* NOTE: This code presumes that we have a write lock on the relation.
* Not now - we use extend locking...
*
* Also note that this routine probably shouldn't have to exist, and does
* screw up the call graph rather badly, but we are wasting so much time and
* system resources being massively general that we are losing badly in our
* performance benchmarks.
*/
void
RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple)
Buffer
RelationGetBufferForTuple(Relation relation, Size len, Buffer Ubuf)
{
Buffer buffer;
Page pageHeader;
BlockNumber lastblock;
OffsetNumber offnum;
Size len;
ItemId itemId;
Item item;
len = MAXALIGN(tuple->t_len); /* be conservative */
len = MAXALIGN(len); /* be conservative */
/*
* If we're gonna fail for oversize tuple, do it right away... this
* code should go away eventually.
* If we're gonna fail for oversize tuple, do it right away
*/
if (len > MaxTupleSize)
elog(ERROR, "Tuple is too big: size %u, max size %ld",
......@@ -152,7 +125,8 @@ RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple)
else
buffer = ReadBuffer(relation, lastblock - 1);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
if (buffer != Ubuf)
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
pageHeader = (Page) BufferGetPage(buffer);
/*
......@@ -160,7 +134,8 @@ RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple)
*/
if (len > PageGetFreeSpace(pageHeader))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
if (buffer != Ubuf)
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
buffer = ReleaseAndReadBuffer(buffer, relation, P_NEW);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
pageHeader = (Page) BufferGetPage(buffer);
......@@ -168,36 +143,22 @@ RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple)
if (len > PageGetFreeSpace(pageHeader))
{
/*
* BUG: by elog'ing here, we leave the new buffer locked and
* not marked dirty, which may result in an invalid page
* header being left on disk. But we should not get here
* given the test at the top of the routine, and the whole
* deal should go away when we implement tuple splitting
* anyway...
*/
elog(ERROR, "Tuple is too big: size %u", len);
/* We should not get here given the test at the top */
elog(STOP, "Tuple is too big: size %u", len);
}
}
/*
* Caller should check space in Ubuf but...
*/
else if (buffer == Ubuf)
{
ReleaseBuffer(buffer);
buffer = Ubuf;
}
if (!relation->rd_myxactonly)
UnlockPage(relation, 0, ExclusiveLock);
offnum = PageAddItem((Page) pageHeader, (Item) tuple->t_data,
tuple->t_len, InvalidOffsetNumber, LP_USED);
itemId = PageGetItemId((Page) pageHeader, offnum);
item = PageGetItem((Page) pageHeader, itemId);
lastblock = BufferGetBlockNumber(buffer);
ItemPointerSet(&((HeapTupleHeader) item)->t_ctid, lastblock, offnum);
/* return an accurate tuple self-pointer */
ItemPointerSet(&tuple->t_self, lastblock, offnum);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer);
return(buffer);
}
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/page/bufpage.c,v 1.29 2000/04/12 17:15:40 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/storage/page/bufpage.c,v 1.30 2000/07/03 02:54:16 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -50,26 +50,13 @@ PageInit(Page page, Size pageSize, Size specialSize)
PageSetPageSize(page, pageSize);
}
/*
* PageAddItem
* Adds item to the given page.
*
* Note:
* This does not assume that the item resides on a single page.
* It is the responsiblity of the caller to act appropriately
* depending on this fact. The "pskip" routines provide a
* friendlier interface, in this case.
*
* This does change the status of any of the resources passed.
* The semantics may change in the future.
*
* This routine should probably be combined with others?
*/
/* ----------------
* PageAddItem
*
* add an item to a page.
*
* !!! ELOG(ERROR) IS DISALLOWED HERE !!!
*
* Notes on interface:
* If offsetNumber is valid, shuffle ItemId's down to make room
* to use it, if PageManagerShuffle is true. If PageManagerShuffle is
......@@ -126,7 +113,7 @@ PageAddItem(Page page,
if (((*itemId).lp_flags & LP_USED) ||
((*itemId).lp_len != 0))
{
elog(ERROR, "PageAddItem: tried overwrite of used ItemId");
elog(NOTICE, "PageAddItem: tried overwrite of used ItemId");
return InvalidOffsetNumber;
}
}
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: hio.h,v 1.14 2000/01/26 05:57:50 momjian Exp $
* $Id: hio.h,v 1.15 2000/07/03 02:54:17 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -18,6 +18,6 @@
extern void RelationPutHeapTuple(Relation relation, Buffer buffer,
HeapTuple tuple);
extern void RelationPutHeapTupleAtEnd(Relation relation, HeapTuple tuple);
extern Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer Ubuf);
#endif /* HIO_H */
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: htup.h,v 1.31 2000/07/02 22:01:00 momjian Exp $
* $Id: htup.h,v 1.32 2000/07/03 02:54:17 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -69,22 +69,25 @@ typedef HeapTupleHeaderData *HeapTupleHeader;
#define XLOG_HEAP_MOVE 0x30
/*
* All what we need to find changed tuple (14 bytes)
* All what we need to find changed tuple (18 bytes)
*/
typedef struct xl_heaptid
{
Oid dbId; /* database */
Oid relId; /* relation */
CommandId cid; /* this is for "better" tuple' */
/* identification - it allows to avoid */
/* "compensation" records for undo */
ItemPointerData tid; /* changed tuple id */
} xl_heaptid;
/* This is what we need to know about delete - ALIGN(14) = 16 bytes */
/* This is what we need to know about delete - ALIGN(18) = 24 bytes */
typedef struct xl_heap_delete
{
xl_heaptid dtid; /* deleted tuple id */
} xl_heap_delete;
/* This is what we need to know about insert - 22 + data */
/* This is what we need to know about insert - 26 + data */
typedef struct xl_heap_insert
{
xl_heaptid itid; /* inserted tuple id */
......@@ -108,7 +111,7 @@ typedef struct xl_heap_update
/* NEW TUPLE DATA FOLLOWS AT END OF STRUCT */
} xl_heap_update;
/* This is what we need to know about tuple move - ALIGN(20) = 24 bytes */
/* This is what we need to know about tuple move - 24 bytes */
typedef struct xl_heap_move
{
xl_heaptid ftid; /* moved from */
......
......@@ -68,6 +68,13 @@ typedef XLogPageHeaderData *XLogPageHeader;
#define XLP_FIRST_IS_SUBRECORD 0x0001
/*
* StartUpID (SUI) - system startups counter.
* It's to allow removing pg_log after shutdown.
*/
typedef uint32 StartUpID;
extern StartUpID ThisStartUpID;
extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info,
char *hdr, uint32 hdrlen,
char *buf, uint32 buflen);
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2000, PostgreSQL, Inc
* Portions Copyright (c) 1994, Regents of the University of California
*
* $Id: bufpage.h,v 1.29 2000/06/02 10:20:27 vadim Exp $
* $Id: bufpage.h,v 1.30 2000/07/03 02:54:21 vadim Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -118,8 +118,10 @@ typedef OpaqueData *Opaque;
typedef struct PageHeaderData
{
#ifdef XLOG
XLogRecPtr pd_lsn; /* XLOG: next byte after last byte of xlog */
XLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog */
/* record for last change of this page */
StartUpID pd_sui; /* SUI of last changes (currently it's */
/* used by heap AM only) */
#endif
LocationIndex pd_lower; /* offset to start of free space */
LocationIndex pd_upper; /* offset to end of free space */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment