Commit fdf6be80 authored by Vadim B. Mikheev's avatar Vadim B. Mikheev

1. Vacuum is updated for MVCC.

2. Much faster btree tuples deletion in the case when first on page
   index tuple is deleted (no movement to the left page(s)).
3. Remember blkno of new root page in BTPageOpaque of
   left/right siblings when root page is splitted.
parent d4ed1784
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.41 1999/02/13 23:14:22 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/access/heap/heapam.c,v 1.42 1999/03/28 20:31:56 vadim Exp $
* *
* *
* INTERFACE ROUTINES * INTERFACE ROUTINES
...@@ -1270,7 +1270,7 @@ l2: ...@@ -1270,7 +1270,7 @@ l2:
newtup->t_data->t_cmin = GetCurrentCommandId(); newtup->t_data->t_cmin = GetCurrentCommandId();
StoreInvalidTransactionId(&(newtup->t_data->t_xmax)); StoreInvalidTransactionId(&(newtup->t_data->t_xmax));
newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK); newtup->t_data->t_infomask &= ~(HEAP_XACT_MASK);
newtup->t_data->t_infomask |= HEAP_XMAX_INVALID; newtup->t_data->t_infomask |= (HEAP_XMAX_INVALID | HEAP_UPDATED);
/* logically delete old item */ /* logically delete old item */
TransactionIdStore(GetCurrentTransactionId(), &(oldtup.t_data->t_xmax)); TransactionIdStore(GetCurrentTransactionId(), &(oldtup.t_data->t_xmax));
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.35 1999/02/13 23:14:34 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.36 1999/03/28 20:31:56 vadim Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -853,6 +853,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright) ...@@ -853,6 +853,8 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright)
lopaque->btpo_next = BufferGetBlockNumber(rbuf); lopaque->btpo_next = BufferGetBlockNumber(rbuf);
ropaque->btpo_next = oopaque->btpo_next; ropaque->btpo_next = oopaque->btpo_next;
lopaque->btpo_parent = ropaque->btpo_parent = oopaque->btpo_parent;
/* /*
* If the page we're splitting is not the rightmost page at its level * If the page we're splitting is not the rightmost page at its level
* in the tree, then the first (0) entry on the page is the high key * in the tree, then the first (0) entry on the page is the high key
...@@ -1103,6 +1105,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) ...@@ -1103,6 +1105,7 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
/* get a new root page */ /* get a new root page */
rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE); rootbuf = _bt_getbuf(rel, P_NEW, BT_WRITE);
rootpage = BufferGetPage(rootbuf); rootpage = BufferGetPage(rootbuf);
rootbknum = BufferGetBlockNumber(rootbuf);
_bt_pageinit(rootpage, BufferGetPageSize(rootbuf)); _bt_pageinit(rootpage, BufferGetPageSize(rootbuf));
/* set btree special data */ /* set btree special data */
...@@ -1119,6 +1122,10 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) ...@@ -1119,6 +1122,10 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
lpage = BufferGetPage(lbuf); lpage = BufferGetPage(lbuf);
rpage = BufferGetPage(rbuf); rpage = BufferGetPage(rbuf);
((BTPageOpaque) PageGetSpecialPointer(lpage))->btpo_parent =
((BTPageOpaque) PageGetSpecialPointer(rpage))->btpo_parent =
rootbknum;
/* /*
* step over the high key on the left page while building the left * step over the high key on the left page while building the left
* page pointer. * page pointer.
...@@ -1156,11 +1163,13 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) ...@@ -1156,11 +1163,13 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
pfree(new_item); pfree(new_item);
/* write and let go of the root buffer */ /* write and let go of the root buffer */
rootbknum = BufferGetBlockNumber(rootbuf);
_bt_wrtbuf(rel, rootbuf); _bt_wrtbuf(rel, rootbuf);
/* update metadata page with new root block number */ /* update metadata page with new root block number */
_bt_metaproot(rel, rootbknum, 0); _bt_metaproot(rel, rootbknum, 0);
WriteNoReleaseBuffer(lbuf);
WriteNoReleaseBuffer(rbuf);
} }
/* /*
...@@ -1559,6 +1568,7 @@ _bt_shift(Relation rel, Buffer buf, BTStack stack, int keysz, ...@@ -1559,6 +1568,7 @@ _bt_shift(Relation rel, Buffer buf, BTStack stack, int keysz,
pageop->btpo_flags |= BTP_CHAIN; pageop->btpo_flags |= BTP_CHAIN;
pageop->btpo_prev = npageop->btpo_prev; /* restore prev */ pageop->btpo_prev = npageop->btpo_prev; /* restore prev */
pageop->btpo_next = nbknum; /* next points to the new page */ pageop->btpo_next = nbknum; /* next points to the new page */
pageop->btpo_parent = npageop->btpo_parent;
/* init shifted page opaque */ /* init shifted page opaque */
npageop->btpo_prev = bknum = BufferGetBlockNumber(buf); npageop->btpo_prev = bknum = BufferGetBlockNumber(buf);
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.18 1999/02/13 23:14:35 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtpage.c,v 1.19 1999/03/28 20:31:57 vadim Exp $
* *
* NOTES * NOTES
* Postgres btree pages look like ordinary relation pages. The opaque * Postgres btree pages look like ordinary relation pages. The opaque
...@@ -421,6 +421,8 @@ _bt_pageinit(Page page, Size size) ...@@ -421,6 +421,8 @@ _bt_pageinit(Page page, Size size)
MemSet(page, 0, size); MemSet(page, 0, size);
PageInit(page, size, sizeof(BTPageOpaqueData)); PageInit(page, size, sizeof(BTPageOpaqueData));
((BTPageOpaque) PageGetSpecialPointer(page))->btpo_parent =
InvalidBlockNumber;
} }
/* /*
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.36 1999/02/21 03:48:27 scrappy Exp $ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.37 1999/03/28 20:31:58 vadim Exp $
* *
* NOTES * NOTES
* This file contains only the public interface routines. * This file contains only the public interface routines.
...@@ -372,11 +372,6 @@ btinsert(Relation rel, Datum *datum, char *nulls, ItemPointer ht_ctid, Relation ...@@ -372,11 +372,6 @@ btinsert(Relation rel, Datum *datum, char *nulls, ItemPointer ht_ctid, Relation
pfree(btitem); pfree(btitem);
pfree(itup); pfree(itup);
#ifdef NOT_USED
/* adjust any active scans that will be affected by this insertion */
_bt_adjscans(rel, &(res->pointerData), BT_INSERT);
#endif
return res; return res;
} }
...@@ -396,15 +391,9 @@ btgettuple(IndexScanDesc scan, ScanDirection dir) ...@@ -396,15 +391,9 @@ btgettuple(IndexScanDesc scan, ScanDirection dir)
if (ItemPointerIsValid(&(scan->currentItemData))) if (ItemPointerIsValid(&(scan->currentItemData)))
{ {
/* /*
* Now we don't adjust scans on insertion (comments in * Restore scan position using heap TID returned
* nbtscan.c:_bt_scandel()) and I hope that we will unlock current * by previous call to btgettuple().
* index page before leaving index in LLL: this means that current
* index tuple could be moved right before we get here and we have
* to restore our scan position. We save heap TID pointed by
* current index tuple and use it. This will work untill we start
* to re-use (move heap tuples) without vacuum... - vadim 07/29/98
*/ */
_bt_restscan(scan); _bt_restscan(scan);
res = _bt_next(scan, dir); res = _bt_next(scan, dir);
...@@ -612,16 +601,12 @@ void ...@@ -612,16 +601,12 @@ void
btdelete(Relation rel, ItemPointer tid) btdelete(Relation rel, ItemPointer tid)
{ {
/* adjust any active scans that will be affected by this deletion */ /* adjust any active scans that will be affected by this deletion */
_bt_adjscans(rel, tid, BT_DELETE); _bt_adjscans(rel, tid);
/* delete the data from the page */ /* delete the data from the page */
_bt_pagedel(rel, tid); _bt_pagedel(rel, tid);
} }
/*
* Reasons are in btgettuple... We have to find index item that
* points to heap tuple returned by previous call to btgettuple().
*/
static void static void
_bt_restscan(IndexScanDesc scan) _bt_restscan(IndexScanDesc scan)
{ {
...@@ -637,6 +622,20 @@ _bt_restscan(IndexScanDesc scan) ...@@ -637,6 +622,20 @@ _bt_restscan(IndexScanDesc scan)
BTItem item; BTItem item;
BlockNumber blkno; BlockNumber blkno;
/*
* We use this as flag when first index tuple on page
* is deleted but we do not move left (this would
* slowdown vacuum) - so we set current->ip_posid
* before first index tuple on the current page
* (_bt_step will move it right)...
*/
if (!ItemPointerIsValid(&target))
{
ItemPointerSetOffsetNumber(&(scan->currentItemData),
OffsetNumberPrev(P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY));
return;
}
if (maxoff >= offnum) if (maxoff >= offnum)
{ {
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/nbtscan.c,v 1.19 1999/02/13 23:14:36 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/Attic/nbtscan.c,v 1.20 1999/03/28 20:31:58 vadim Exp $
* *
* *
* NOTES * NOTES
...@@ -43,8 +43,7 @@ typedef BTScanListData *BTScanList; ...@@ -43,8 +43,7 @@ typedef BTScanListData *BTScanList;
static BTScanList BTScans = (BTScanList) NULL; static BTScanList BTScans = (BTScanList) NULL;
static void _bt_scandel(IndexScanDesc scan, int op, BlockNumber blkno, OffsetNumber offno); static void _bt_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno);
static bool _bt_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno);
/* /*
* _bt_regscan() -- register a new scan. * _bt_regscan() -- register a new scan.
...@@ -91,7 +90,7 @@ _bt_dropscan(IndexScanDesc scan) ...@@ -91,7 +90,7 @@ _bt_dropscan(IndexScanDesc scan)
* for a given deletion or insertion * for a given deletion or insertion
*/ */
void void
_bt_adjscans(Relation rel, ItemPointer tid, int op) _bt_adjscans(Relation rel, ItemPointer tid)
{ {
BTScanList l; BTScanList l;
Oid relid; Oid relid;
...@@ -100,41 +99,25 @@ _bt_adjscans(Relation rel, ItemPointer tid, int op) ...@@ -100,41 +99,25 @@ _bt_adjscans(Relation rel, ItemPointer tid, int op)
for (l = BTScans; l != (BTScanList) NULL; l = l->btsl_next) for (l = BTScans; l != (BTScanList) NULL; l = l->btsl_next)
{ {
if (relid == RelationGetRelid(l->btsl_scan->relation)) if (relid == RelationGetRelid(l->btsl_scan->relation))
_bt_scandel(l->btsl_scan, op, _bt_scandel(l->btsl_scan,
ItemPointerGetBlockNumber(tid), ItemPointerGetBlockNumber(tid),
ItemPointerGetOffsetNumber(tid)); ItemPointerGetOffsetNumber(tid));
} }
} }
/* /*
* _bt_scandel() -- adjust a single scan * _bt_scandel() -- adjust a single scan on deletion
* *
* because each index page is always maintained as an ordered array of
* index tuples, the index tuples on a given page shift beneath any
* given scan. an index modification "behind" a scan position (i.e.,
* same page, lower or equal offset number) will therefore force us to
* adjust the scan in the following ways:
*
* - on insertion, we shift the scan forward by one item.
* - on deletion, we shift the scan backward by one item.
*
* note that:
*
* - we need not worry about the actual ScanDirection of the scan
* itself, since the problem is that the "current" scan position has
* shifted.
* - modifications "ahead" of our scan position do not change the
* array index of the current scan position and so can be ignored.
*/ */
static void static void
_bt_scandel(IndexScanDesc scan, int op, BlockNumber blkno, OffsetNumber offno) _bt_scandel(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
{ {
ItemPointer current; ItemPointer current;
Buffer buf; Buffer buf;
BTScanOpaque so; BTScanOpaque so;
OffsetNumber start;
if (!_bt_scantouched(scan, blkno, offno)) Page page;
return; BTPageOpaque opaque;
so = (BTScanOpaque) scan->opaque; so = (BTScanOpaque) scan->opaque;
buf = so->btso_curbuf; buf = so->btso_curbuf;
...@@ -144,33 +127,23 @@ _bt_scandel(IndexScanDesc scan, int op, BlockNumber blkno, OffsetNumber offno) ...@@ -144,33 +127,23 @@ _bt_scandel(IndexScanDesc scan, int op, BlockNumber blkno, OffsetNumber offno)
&& ItemPointerGetBlockNumber(current) == blkno && ItemPointerGetBlockNumber(current) == blkno
&& ItemPointerGetOffsetNumber(current) >= offno) && ItemPointerGetOffsetNumber(current) >= offno)
{ {
switch (op) page = BufferGetPage(buf);
{ opaque = (BTPageOpaque) PageGetSpecialPointer(page);
/* start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
* Problems occure when current scan page is splitted! if (ItemPointerGetOffsetNumber(current) == start)
* We saw "Non-functional updates" (ie index tuples were read twice) ItemPointerSetInvalid(&(so->curHeapIptr));
* and partial updates ("good" tuples were not read at all) - due to else
* losing scan position here. Look @ nbtree.c:btgettuple()
* what we do now... - vadim 07/29/98
case BT_INSERT:
_bt_step(scan, &buf, ForwardScanDirection);
break;
*/
case BT_DELETE:
_bt_step(scan, &buf, BackwardScanDirection);
break;
default:
elog(ERROR, "_bt_scandel: bad operation '%d'", op);
/* NOTREACHED */
}
so->btso_curbuf = buf;
if (ItemPointerIsValid(current))
{ {
Page page = BufferGetPage(buf); _bt_step(scan, &buf, BackwardScanDirection);
BTItem btitem = (BTItem) PageGetItem(page, so->btso_curbuf = buf;
PageGetItemId(page, ItemPointerGetOffsetNumber(current))); if (ItemPointerIsValid(current))
{
so->curHeapIptr = btitem->bti_itup.t_tid; Page pg = BufferGetPage(buf);
BTItem btitem = (BTItem) PageGetItem(pg,
PageGetItemId(pg, ItemPointerGetOffsetNumber(current)));
so->curHeapIptr = btitem->bti_itup.t_tid;
}
} }
} }
...@@ -179,65 +152,39 @@ _bt_scandel(IndexScanDesc scan, int op, BlockNumber blkno, OffsetNumber offno) ...@@ -179,65 +152,39 @@ _bt_scandel(IndexScanDesc scan, int op, BlockNumber blkno, OffsetNumber offno)
&& ItemPointerGetBlockNumber(current) == blkno && ItemPointerGetBlockNumber(current) == blkno
&& ItemPointerGetOffsetNumber(current) >= offno) && ItemPointerGetOffsetNumber(current) >= offno)
{ {
ItemPointerData tmp;
tmp = *current;
*current = scan->currentItemData;
scan->currentItemData = tmp;
so->btso_curbuf = so->btso_mrkbuf;
so->btso_mrkbuf = buf;
buf = so->btso_curbuf;
switch (op)
{
/*
* ...comments are above...
case BT_INSERT:
_bt_step(scan, &buf, ForwardScanDirection);
break;
*/
case BT_DELETE:
_bt_step(scan, &buf, BackwardScanDirection);
break;
default:
elog(ERROR, "_bt_scandel: bad operation '%d'", op);
/* NOTREACHED */
}
so->btso_curbuf = so->btso_mrkbuf;
so->btso_mrkbuf = buf;
tmp = *current;
*current = scan->currentItemData;
scan->currentItemData = tmp;
if (ItemPointerIsValid(current))
{
Page page = BufferGetPage(buf);
BTItem btitem = (BTItem) PageGetItem(page,
PageGetItemId(page, ItemPointerGetOffsetNumber(current)));
so->mrkHeapIptr = btitem->bti_itup.t_tid; page = BufferGetPage(so->btso_mrkbuf);
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
start = P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY;
if (ItemPointerGetOffsetNumber(current) == start)
ItemPointerSetInvalid(&(so->mrkHeapIptr));
else
{
ItemPointerData tmp;
tmp = *current;
*current = scan->currentItemData;
scan->currentItemData = tmp;
so->btso_curbuf = so->btso_mrkbuf;
so->btso_mrkbuf = buf;
buf = so->btso_curbuf;
_bt_step(scan, &buf, BackwardScanDirection);
so->btso_curbuf = so->btso_mrkbuf;
so->btso_mrkbuf = buf;
tmp = *current;
*current = scan->currentItemData;
scan->currentItemData = tmp;
if (ItemPointerIsValid(current))
{
Page pg = BufferGetPage(buf);
BTItem btitem = (BTItem) PageGetItem(pg,
PageGetItemId(pg, ItemPointerGetOffsetNumber(current)));
so->mrkHeapIptr = btitem->bti_itup.t_tid;
}
} }
} }
} }
/*
* _bt_scantouched() -- check to see if a scan is affected by a given
* change to the index
*/
static bool
_bt_scantouched(IndexScanDesc scan, BlockNumber blkno, OffsetNumber offno)
{
ItemPointer current;
current = &(scan->currentItemData);
if (ItemPointerIsValid(current)
&& ItemPointerGetBlockNumber(current) == blkno
&& ItemPointerGetOffsetNumber(current) >= offno)
return true;
current = &(scan->currentMarkData);
if (ItemPointerIsValid(current)
&& ItemPointerGetBlockNumber(current) == blkno
&& ItemPointerGetOffsetNumber(current) >= offno)
return true;
return false;
}
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.41 1999/02/21 03:48:27 scrappy Exp $ * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtsearch.c,v 1.42 1999/03/28 20:31:58 vadim Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -706,15 +706,7 @@ _bt_next(IndexScanDesc scan, ScanDirection dir) ...@@ -706,15 +706,7 @@ _bt_next(IndexScanDesc scan, ScanDirection dir)
so = (BTScanOpaque) scan->opaque; so = (BTScanOpaque) scan->opaque;
current = &(scan->currentItemData); current = &(scan->currentItemData);
/* Assert (BufferIsValid(so->btso_curbuf));
* XXX 10 may 91: somewhere there's a bug in our management of the
* cached buffer for this scan. wei discovered it. the following is
* a workaround so he can work until i figure out what's going on.
*/
if (!BufferIsValid(so->btso_curbuf))
so->btso_curbuf = _bt_getbuf(rel, ItemPointerGetBlockNumber(current),
BT_READ);
/* we still have the buffer pinned and locked */ /* we still have the buffer pinned and locked */
buf = so->btso_curbuf; buf = so->btso_curbuf;
...@@ -1069,7 +1061,11 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) ...@@ -1069,7 +1061,11 @@ _bt_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir)
rel = scan->relation; rel = scan->relation;
current = &(scan->currentItemData); current = &(scan->currentItemData);
offnum = ItemPointerGetOffsetNumber(current); /*
* Don't use ItemPointerGetOffsetNumber or you risk to get
* assertion due to ability of ip_posid to be equal 0.
*/
offnum = current->ip_posid;
page = BufferGetPage(*bufP); page = BufferGetPage(*bufP);
opaque = (BTPageOpaque) PageGetSpecialPointer(page); opaque = (BTPageOpaque) PageGetSpecialPointer(page);
so = (BTScanOpaque) scan->opaque; so = (BTScanOpaque) scan->opaque;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.32 1999/02/13 23:14:49 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.33 1999/03/28 20:31:59 vadim Exp $
* *
* NOTES * NOTES
* Transaction aborts can now occur two ways: * Transaction aborts can now occur two ways:
...@@ -933,7 +933,10 @@ CommitTransaction() ...@@ -933,7 +933,10 @@ CommitTransaction()
* 11/26/96 * 11/26/96
*/ */
if (MyProc != (PROC *) NULL) if (MyProc != (PROC *) NULL)
{
MyProc->xid = InvalidTransactionId; MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId;
}
} }
/* -------------------------------- /* --------------------------------
...@@ -951,7 +954,10 @@ AbortTransaction() ...@@ -951,7 +954,10 @@ AbortTransaction()
* 11/26/96 * 11/26/96
*/ */
if (MyProc != (PROC *) NULL) if (MyProc != (PROC *) NULL)
{
MyProc->xid = InvalidTransactionId; MyProc->xid = InvalidTransactionId;
MyProc->xmin = InvalidTransactionId;
}
/* ---------------- /* ----------------
* check the current transaction state * check the current transaction state
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.96 1999/02/21 03:48:33 scrappy Exp $ * $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.97 1999/03/28 20:32:01 vadim Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -65,6 +65,9 @@ static Portal vc_portal; ...@@ -65,6 +65,9 @@ static Portal vc_portal;
static int MESSAGE_LEVEL; /* message level */ static int MESSAGE_LEVEL; /* message level */
static TransactionId XmaxRecent;
extern void GetXmaxRecent(TransactionId *xid);
#define swapLong(a,b) {long tmp; tmp=a; a=b; b=tmp;} #define swapLong(a,b) {long tmp; tmp=a; a=b; b=tmp;}
#define swapInt(a,b) {int tmp; tmp=a; a=b; b=tmp;} #define swapInt(a,b) {int tmp; tmp=a; a=b; b=tmp;}
#define swapDatum(a,b) {Datum tmp; tmp=a; a=b; b=tmp;} #define swapDatum(a,b) {Datum tmp; tmp=a; a=b; b=tmp;}
...@@ -98,9 +101,11 @@ static void vc_free(VRelList vrl); ...@@ -98,9 +101,11 @@ static void vc_free(VRelList vrl);
static void vc_getindices(Oid relid, int *nindices, Relation **Irel); static void vc_getindices(Oid relid, int *nindices, Relation **Irel);
static void vc_clsindices(int nindices, Relation *Irel); static void vc_clsindices(int nindices, Relation *Irel);
static void vc_mkindesc(Relation onerel, int nindices, Relation *Irel, IndDesc **Idesc); static void vc_mkindesc(Relation onerel, int nindices, Relation *Irel, IndDesc **Idesc);
static char *vc_find_eq(char *bot, int nelem, int size, char *elm, int (*compar) (char *, char *)); static void *vc_find_eq(void *bot, int nelem, int size, void *elm,
static int vc_cmp_blk(char *left, char *right); int (*compar) (const void *, const void *));
static int vc_cmp_offno(char *left, char *right); static int vc_cmp_blk(const void *left, const void *right);
static int vc_cmp_offno(const void *left, const void *right);
static int vc_cmp_vtlinks(const void *left, const void *right);
static bool vc_enough_space(VPageDescr vpd, Size len); static bool vc_enough_space(VPageDescr vpd, Size len);
void void
...@@ -502,6 +507,8 @@ vc_vacone(Oid relid, bool analyze, List *va_cols) ...@@ -502,6 +507,8 @@ vc_vacone(Oid relid, bool analyze, List *va_cols)
/* we require the relation to be locked until the indices are cleaned */ /* we require the relation to be locked until the indices are cleaned */
LockRelation(onerel, AccessExclusiveLock); LockRelation(onerel, AccessExclusiveLock);
GetXmaxRecent(&XmaxRecent);
/* scan it */ /* scan it */
vacuum_pages.vpl_num_pages = fraged_pages.vpl_num_pages = 0; vacuum_pages.vpl_num_pages = fraged_pages.vpl_num_pages = 0;
vc_scanheap(vacrelstats, onerel, &vacuum_pages, &fraged_pages); vc_scanheap(vacrelstats, onerel, &vacuum_pages, &fraged_pages);
...@@ -595,6 +602,7 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel, ...@@ -595,6 +602,7 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel,
vp; vp;
uint32 tups_vacuumed, uint32 tups_vacuumed,
num_tuples, num_tuples,
nkeep,
nunused, nunused,
ncrash, ncrash,
empty_pages, empty_pages,
...@@ -609,22 +617,24 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel, ...@@ -609,22 +617,24 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel,
struct rusage ru0, struct rusage ru0,
ru1; ru1;
bool do_shrinking = true; bool do_shrinking = true;
VTupleLink vtlinks = (VTupleLink) palloc(100 * sizeof(VTupleLinkData));
int num_vtlinks = 0;
int free_vtlinks = 100;
getrusage(RUSAGE_SELF, &ru0); getrusage(RUSAGE_SELF, &ru0);
tups_vacuumed = num_tuples = nunused = ncrash = empty_pages = relname = (RelationGetRelationName(onerel))->data;
elog(MESSAGE_LEVEL, "--Relation %s--", relname);
tups_vacuumed = num_tuples = nkeep = nunused = ncrash = empty_pages =
new_pages = changed_pages = empty_end_pages = 0; new_pages = changed_pages = empty_end_pages = 0;
free_size = usable_free_size = 0; free_size = usable_free_size = 0;
relname = (RelationGetRelationName(onerel))->data;
nblocks = RelationGetNumberOfBlocks(onerel); nblocks = RelationGetNumberOfBlocks(onerel);
vpc = (VPageDescr) palloc(sizeof(VPageDescrData) + MaxOffsetNumber * sizeof(OffsetNumber)); vpc = (VPageDescr) palloc(sizeof(VPageDescrData) + MaxOffsetNumber * sizeof(OffsetNumber));
vpc->vpd_offsets_used = 0; vpc->vpd_offsets_used = 0;
elog(MESSAGE_LEVEL, "--Relation %s--", relname);
for (blkno = 0; blkno < nblocks; blkno++) for (blkno = 0; blkno < nblocks; blkno++)
{ {
buf = ReadBuffer(onerel, blkno); buf = ReadBuffer(onerel, blkno);
...@@ -686,6 +696,34 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel, ...@@ -686,6 +696,34 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel,
{ {
if (tuple.t_data->t_infomask & HEAP_XMIN_INVALID) if (tuple.t_data->t_infomask & HEAP_XMIN_INVALID)
tupgone = true; tupgone = true;
else if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
{
if (TransactionIdDidCommit((TransactionId)
tuple.t_data->t_cmin))
{
tuple.t_data->t_infomask |= HEAP_XMIN_INVALID;
tupgone = true;
}
else
{
tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED;
pgchanged = true;
}
}
else if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
{
if (!TransactionIdDidCommit((TransactionId)
tuple.t_data->t_cmin))
{
tuple.t_data->t_infomask |= HEAP_XMIN_INVALID;
tupgone = true;
}
else
{
tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED;
pgchanged = true;
}
}
else else
{ {
if (TransactionIdDidAbort(tuple.t_data->t_xmin)) if (TransactionIdDidAbort(tuple.t_data->t_xmin))
...@@ -722,22 +760,37 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel, ...@@ -722,22 +760,37 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel,
!(tuple.t_data->t_infomask & HEAP_XMAX_INVALID)) !(tuple.t_data->t_infomask & HEAP_XMAX_INVALID))
{ {
if (tuple.t_data->t_infomask & HEAP_XMAX_COMMITTED) if (tuple.t_data->t_infomask & HEAP_XMAX_COMMITTED)
tupgone = true; {
if (tuple.t_data->t_infomask & HEAP_MARKED_FOR_UPDATE)
{
pgchanged = true;
tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
}
else
tupgone = true;
}
else if (TransactionIdDidAbort(tuple.t_data->t_xmax)) else if (TransactionIdDidAbort(tuple.t_data->t_xmax))
{ {
tuple.t_data->t_infomask |= HEAP_XMAX_INVALID; tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
pgchanged = true; pgchanged = true;
} }
else if (TransactionIdDidCommit(tuple.t_data->t_xmax)) else if (TransactionIdDidCommit(tuple.t_data->t_xmax))
tupgone = true; {
if (tuple.t_data->t_infomask & HEAP_MARKED_FOR_UPDATE)
{
tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
pgchanged = true;
}
else
tupgone = true;
}
else if (!TransactionIdIsInProgress(tuple.t_data->t_xmax)) else if (!TransactionIdIsInProgress(tuple.t_data->t_xmax))
{ {
/* /*
* Not Aborted, Not Committed, Not in Progress - so it * Not Aborted, Not Committed, Not in Progress - so it
* from crashed process. - vadim 06/02/97 * from crashed process. - vadim 06/02/97
*/ */
tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;; tuple.t_data->t_infomask |= HEAP_XMAX_INVALID;
pgchanged = true; pgchanged = true;
} }
else else
...@@ -746,6 +799,41 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel, ...@@ -746,6 +799,41 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel,
relname, blkno, offnum, tuple.t_data->t_xmax); relname, blkno, offnum, tuple.t_data->t_xmax);
do_shrinking = false; do_shrinking = false;
} }
/*
* If tuple is recently deleted then
* we must not remove it from relation.
*/
if (tupgone && tuple.t_data->t_xmax >= XmaxRecent &&
tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED)
{
tupgone = false;
nkeep++;
if (!(tuple.t_data->t_infomask & HEAP_XMAX_COMMITTED))
{
tuple.t_data->t_infomask |= HEAP_XMAX_COMMITTED;
pgchanged = true;
}
/*
* If we do shrinking and this tuple is updated one
* then remember it to construct updated tuple
* dependencies.
*/
if (do_shrinking && !(ItemPointerEquals(&(tuple.t_self),
&(tuple.t_data->t_ctid))))
{
if (free_vtlinks == 0)
{
free_vtlinks = 1000;
vtlinks = (VTupleLink) repalloc(vtlinks,
(free_vtlinks + num_vtlinks) *
sizeof(VTupleLinkData));
}
vtlinks[num_vtlinks].new_tid = tuple.t_data->t_ctid;
vtlinks[num_vtlinks].this_tid = tuple.t_self;
free_vtlinks--;
num_vtlinks++;
}
}
} }
/* /*
...@@ -859,13 +947,31 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel, ...@@ -859,13 +947,31 @@ vc_scanheap(VRelStats *vacrelstats, Relation onerel,
} }
} }
if (usable_free_size > 0 && num_vtlinks > 0)
{
qsort((char *) vtlinks, num_vtlinks, sizeof (VTupleLinkData),
vc_cmp_vtlinks);
vacrelstats->vtlinks = vtlinks;
vacrelstats->num_vtlinks = num_vtlinks;
}
else
{
vacrelstats->vtlinks = NULL;
vacrelstats->num_vtlinks = 0;
pfree(vtlinks);
}
getrusage(RUSAGE_SELF, &ru1); getrusage(RUSAGE_SELF, &ru1);
elog(MESSAGE_LEVEL, "Pages %u: Changed %u, Reapped %u, Empty %u, New %u; \ elog(MESSAGE_LEVEL, "Pages %u: Changed %u, Reapped %u, Empty %u, New %u; \
Tup %u: Vac %u, Crash %u, UnUsed %u, MinLen %u, MaxLen %u; Re-using: Free/Avail. Space %u/%u; EndEmpty/Avail. Pages %u/%u. Elapsed %u/%u sec.", Tup %u: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %u, MaxLen %u; \
nblocks, changed_pages, vacuum_pages->vpl_num_pages, empty_pages, new_pages, Re-using: Free/Avail. Space %u/%u; EndEmpty/Avail. Pages %u/%u. \
num_tuples, tups_vacuumed, ncrash, nunused, min_tlen, max_tlen, Elapsed %u/%u sec.",
free_size, usable_free_size, empty_end_pages, fraged_pages->vpl_num_pages, nblocks, changed_pages, vacuum_pages->vpl_num_pages, empty_pages,
new_pages, num_tuples, tups_vacuumed,
nkeep, vacrelstats->num_vtlinks, ncrash,
nunused, min_tlen, max_tlen, free_size, usable_free_size,
empty_end_pages, fraged_pages->vpl_num_pages,
ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec, ru1.ru_stime.tv_sec - ru0.ru_stime.tv_sec,
ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec); ru1.ru_utime.tv_sec - ru0.ru_utime.tv_sec);
...@@ -917,7 +1023,7 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel, ...@@ -917,7 +1023,7 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
*idcur; *idcur;
int last_fraged_block, int last_fraged_block,
last_vacuum_block, last_vacuum_block,
i; i = 0;
Size tuple_len; Size tuple_len;
int num_moved, int num_moved,
num_fraged_pages, num_fraged_pages,
...@@ -1022,6 +1128,280 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel, ...@@ -1022,6 +1128,280 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
tuple_len = tuple.t_len = ItemIdGetLength(itemid); tuple_len = tuple.t_len = ItemIdGetLength(itemid);
ItemPointerSet(&(tuple.t_self), blkno, offnum); ItemPointerSet(&(tuple.t_self), blkno, offnum);
if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
{
if ((TransactionId)tuple.t_data->t_cmin != myXID)
elog(ERROR, "Invalid XID in t_cmin");
if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
continue; /* already removed by me */
if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
break;
elog(ERROR, "HEAP_MOVED_OFF/HEAP_MOVED_IN was expected");
}
/*
* If this tuple is in the chain of tuples created in
* updates by "recent" transactions then we have to
* move all chain of tuples to another places.
*/
if ((tuple.t_data->t_infomask & HEAP_UPDATED &&
tuple.t_data->t_xmin >= XmaxRecent) ||
(!(tuple.t_data->t_infomask & HEAP_XMAX_INVALID) &&
!(ItemPointerEquals(&(tuple.t_self), &(tuple.t_data->t_ctid)))))
{
Buffer Cbuf = buf;
Page Cpage;
ItemId Citemid;
ItemPointerData Ctid;
HeapTupleData tp = tuple;
Size tlen = tuple_len;
VTupleMove vtmove = (VTupleMove)
palloc(100 * sizeof(VTupleMoveData));
int num_vtmove = 0;
int free_vtmove = 100;
VPageDescr to_vpd = fraged_pages->vpl_pagedesc[0];
int to_item = 0;
bool freeCbuf = false;
int ti;
if (vacrelstats->vtlinks == NULL)
elog(ERROR, "No one parent tuple was found");
if (cur_buffer != InvalidBuffer)
{
WriteBuffer(cur_buffer);
cur_buffer = InvalidBuffer;
}
/*
* If this tuple is in the begin/middle of the chain
* then we have to move to the end of chain.
*/
while (!(tp.t_data->t_infomask & HEAP_XMAX_INVALID) &&
!(ItemPointerEquals(&(tp.t_self), &(tp.t_data->t_ctid))))
{
Ctid = tp.t_data->t_ctid;
if (freeCbuf)
ReleaseBuffer(Cbuf);
freeCbuf = true;
Cbuf = ReadBuffer(onerel,
ItemPointerGetBlockNumber(&Ctid));
Cpage = BufferGetPage(Cbuf);
Citemid = PageGetItemId(Cpage,
ItemPointerGetOffsetNumber(&Ctid));
if (!ItemIdIsUsed(Citemid))
elog(ERROR, "Child itemid marked as unused");
tp.t_data = (HeapTupleHeader) PageGetItem(Cpage, Citemid);
tp.t_self = Ctid;
tlen = tp.t_len = ItemIdGetLength(Citemid);
}
/* first, can chain be moved ? */
for ( ; ; )
{
if (!vc_enough_space(to_vpd, tlen))
{
if (to_vpd != last_fraged_page &&
!vc_enough_space(to_vpd, vacrelstats->min_tlen))
{
Assert(num_fraged_pages > to_item + 1);
memmove(fraged_pages->vpl_pagedesc + to_item,
fraged_pages->vpl_pagedesc + to_item + 1,
sizeof(VPageDescr *) * (num_fraged_pages - to_item - 1));
num_fraged_pages--;
Assert(last_fraged_page == fraged_pages->vpl_pagedesc[num_fraged_pages - 1]);
}
for (i = 0; i < num_fraged_pages; i++)
{
if (vc_enough_space(fraged_pages->vpl_pagedesc[i], tlen))
break;
}
if (i == num_fraged_pages) /* can't move item anywhere */
{
for (i = 0; i < num_vtmove; i++)
{
Assert(vtmove[i].vpd->vpd_offsets_used > 0);
(vtmove[i].vpd->vpd_offsets_used)--;
}
num_vtmove = 0;
break;
}
to_item = i;
to_vpd = fraged_pages->vpl_pagedesc[to_item];
}
to_vpd->vpd_free -= DOUBLEALIGN(tlen);
if (to_vpd->vpd_offsets_used >= to_vpd->vpd_offsets_free)
to_vpd->vpd_free -= DOUBLEALIGN(sizeof(ItemIdData));
(to_vpd->vpd_offsets_used)++;
if (free_vtmove == 0)
{
free_vtmove = 1000;
vtmove = (VTupleMove) repalloc(vtmove,
(free_vtmove + num_vtmove) *
sizeof(VTupleMoveData));
}
vtmove[num_vtmove].tid = tp.t_self;
vtmove[num_vtmove].vpd = to_vpd;
if (to_vpd->vpd_offsets_used == 1)
vtmove[num_vtmove].cleanVpd = true;
else
vtmove[num_vtmove].cleanVpd = false;
free_vtmove--;
num_vtmove++;
/*
* All done ?
*/
if (!(tp.t_data->t_infomask & HEAP_UPDATED) ||
tp.t_data->t_xmin < XmaxRecent)
break;
/*
* Well, try to find tuple with old row version
*/
for ( ; ; )
{
Buffer Pbuf;
Page Ppage;
ItemId Pitemid;
HeapTupleData Ptp;
VTupleLinkData vtld,
*vtlp;
vtld.new_tid = tp.t_self;
vtlp = (VTupleLink)
vc_find_eq((void *) (vacrelstats->vtlinks),
vacrelstats->num_vtlinks,
sizeof(VTupleLinkData),
(void *) &vtld,
vc_cmp_vtlinks);
if (vtlp == NULL)
elog(ERROR, "Parent tuple was not found");
tp.t_self = vtlp->this_tid;
Pbuf = ReadBuffer(onerel,
ItemPointerGetBlockNumber(&(tp.t_self)));
Ppage = BufferGetPage(Pbuf);
Pitemid = PageGetItemId(Ppage,
ItemPointerGetOffsetNumber(&(tp.t_self)));
if (!ItemIdIsUsed(Pitemid))
elog(ERROR, "Parent itemid marked as unused");
Ptp.t_data = (HeapTupleHeader) PageGetItem(Ppage, Pitemid);
Assert(Ptp.t_data->t_xmax == tp.t_data->t_xmin);
/*
* If this tuple is updated version of row and
* it was created by the same transaction then
* no one is interested in this tuple -
* mark it as removed.
*/
if (Ptp.t_data->t_infomask & HEAP_UPDATED &&
Ptp.t_data->t_xmin == Ptp.t_data->t_xmax)
{
TransactionIdStore(myXID,
(TransactionId*) &(Ptp.t_data->t_cmin));
Ptp.t_data->t_infomask &=
~(HEAP_XMIN_COMMITTED|HEAP_XMIN_INVALID|HEAP_MOVED_IN);
Ptp.t_data->t_infomask |= HEAP_MOVED_OFF;
WriteBuffer(Pbuf);
continue;
}
tp.t_data = Ptp.t_data;
tlen = tp.t_len = ItemIdGetLength(Pitemid);
if (freeCbuf)
ReleaseBuffer(Cbuf);
Cbuf = Pbuf;
freeCbuf = true;
break;
}
}
if (freeCbuf)
ReleaseBuffer(Cbuf);
if (num_vtmove == 0) /* chain can't be moved */
{
pfree(vtmove);
break;
}
ItemPointerSetInvalid(&Ctid);
for (ti = 0; ti < num_vtmove; ti++)
{
/* Get tuple from chain */
tuple.t_self = vtmove[ti].tid;
Cbuf = ReadBuffer(onerel,
ItemPointerGetBlockNumber(&(tuple.t_self)));
Cpage = BufferGetPage(Cbuf);
Citemid = PageGetItemId(Cpage,
ItemPointerGetOffsetNumber(&(tuple.t_self)));
tuple.t_data = (HeapTupleHeader) PageGetItem(Cpage, Citemid);
tuple_len = tuple.t_len = ItemIdGetLength(Citemid);
/* Get page to move in */
cur_buffer = ReadBuffer(onerel, vtmove[ti].vpd->vpd_blkno);
ToPage = BufferGetPage(cur_buffer);
/* if this page was not used before - clean it */
if (!PageIsEmpty(ToPage) && vtmove[i].cleanVpd)
vc_vacpage(ToPage, vtmove[ti].vpd);
heap_copytuple_with_tuple(&tuple, &newtup);
RelationInvalidateHeapTuple(onerel, &tuple);
TransactionIdStore(myXID, (TransactionId*) &(newtup.t_data->t_cmin));
newtup.t_data->t_infomask &=
~(HEAP_XMIN_COMMITTED|HEAP_XMIN_INVALID|HEAP_MOVED_OFF);
newtup.t_data->t_infomask |= HEAP_MOVED_IN;
newoff = PageAddItem(ToPage, (Item) newtup.t_data, tuple_len,
InvalidOffsetNumber, LP_USED);
if (newoff == InvalidOffsetNumber)
{
elog(ERROR, "\
moving chain: failed to add item with len = %u to page %u",
tuple_len, vtmove[ti].vpd->vpd_blkno);
}
newitemid = PageGetItemId(ToPage, newoff);
pfree(newtup.t_data);
newtup.t_data = (HeapTupleHeader) PageGetItem(ToPage, newitemid);
ItemPointerSet(&(newtup.t_self), vtmove[i].vpd->vpd_blkno, newoff);
/*
* Set t_ctid pointing to itself for last tuple in
* chain and to next tuple in chain otherwise.
*/
if (!ItemPointerIsValid(&Ctid))
newtup.t_data->t_ctid = newtup.t_self;
else
newtup.t_data->t_ctid = Ctid;
Ctid = newtup.t_self;
TransactionIdStore(myXID, (TransactionId*) &(tuple.t_data->t_cmin));
tuple.t_data->t_infomask &=
~(HEAP_XMIN_COMMITTED|HEAP_XMIN_INVALID|HEAP_MOVED_IN);
tuple.t_data->t_infomask |= HEAP_MOVED_OFF;
num_moved++;
if (Cbuf == buf)
vpc->vpd_offsets[vpc->vpd_offsets_free++] =
ItemPointerGetOffsetNumber(&(tuple.t_self));
if (Irel != (Relation *) NULL)
{
for (i = 0, idcur = Idesc; i < nindices; i++, idcur++)
{
FormIndexDatum(idcur->natts,
(AttrNumber *) &(idcur->tform->indkey[0]),
&newtup,
tupdesc,
idatum,
inulls,
idcur->finfoP);
iresult = index_insert(Irel[i],
idatum,
inulls,
&newtup.t_self,
onerel);
if (iresult)
pfree(iresult);
}
}
WriteBuffer(cur_buffer);
if (Cbuf == buf)
ReleaseBuffer(Cbuf);
else
WriteBuffer(Cbuf);
}
cur_buffer = InvalidBuffer;
pfree(vtmove);
continue;
}
/* try to find new page for this tuple */ /* try to find new page for this tuple */
if (cur_buffer == InvalidBuffer || if (cur_buffer == InvalidBuffer ||
!vc_enough_space(cur_page, tuple_len)) !vc_enough_space(cur_page, tuple_len))
...@@ -1070,13 +1450,14 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel, ...@@ -1070,13 +1450,14 @@ vc_rpfheap(VRelStats *vacrelstats, Relation onerel,
RelationInvalidateHeapTuple(onerel, &tuple); RelationInvalidateHeapTuple(onerel, &tuple);
/* store transaction information */ /*
TransactionIdStore(myXID, &(newtup.t_data->t_xmin)); * Mark new tuple as moved_in by vacuum and
newtup.t_data->t_cmin = myCID; * store vacuum XID in t_cmin !!!
StoreInvalidTransactionId(&(newtup.t_data->t_xmax)); */
/* set xmin to unknown and xmax to invalid */ TransactionIdStore(myXID, (TransactionId*) &(newtup.t_data->t_cmin));
newtup.t_data->t_infomask &= ~(HEAP_XACT_MASK); newtup.t_data->t_infomask &=
newtup.t_data->t_infomask |= HEAP_XMAX_INVALID; ~(HEAP_XMIN_COMMITTED|HEAP_XMIN_INVALID|HEAP_MOVED_OFF);
newtup.t_data->t_infomask |= HEAP_MOVED_IN;
/* add tuple to the page */ /* add tuple to the page */
newoff = PageAddItem(ToPage, (Item) newtup.t_data, tuple_len, newoff = PageAddItem(ToPage, (Item) newtup.t_data, tuple_len,
...@@ -1094,11 +1475,14 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)", ...@@ -1094,11 +1475,14 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
ItemPointerSet(&(newtup.t_data->t_ctid), cur_page->vpd_blkno, newoff); ItemPointerSet(&(newtup.t_data->t_ctid), cur_page->vpd_blkno, newoff);
newtup.t_self = newtup.t_data->t_ctid; newtup.t_self = newtup.t_data->t_ctid;
/* now logically delete end-tuple */ /*
TransactionIdStore(myXID, &(tuple.t_data->t_xmax)); * Mark old tuple as moved_off by vacuum and
tuple.t_data->t_cmax = myCID; * store vacuum XID in t_cmin !!!
/* set xmax to unknown */ */
tuple.t_data->t_infomask &= ~(HEAP_XMAX_INVALID | HEAP_XMAX_COMMITTED); TransactionIdStore(myXID, (TransactionId*) &(tuple.t_data->t_cmin));
tuple.t_data->t_infomask &=
~(HEAP_XMIN_COMMITTED|HEAP_XMIN_INVALID|HEAP_MOVED_IN);
tuple.t_data->t_infomask |= HEAP_MOVED_OFF;
cur_page->vpd_offsets_used++; cur_page->vpd_offsets_used++;
num_moved++; num_moved++;
...@@ -1131,6 +1515,8 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)", ...@@ -1131,6 +1515,8 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
if (vpc->vpd_offsets_free > 0) /* some tuples were moved */ if (vpc->vpd_offsets_free > 0) /* some tuples were moved */
{ {
qsort((char *) (vpc->vpd_offsets), vpc->vpd_offsets_free,
sizeof(OffsetNumber), vc_cmp_offno);
vc_reappage(&Nvpl, vpc); vc_reappage(&Nvpl, vpc);
WriteBuffer(buf); WriteBuffer(buf);
} }
...@@ -1167,7 +1553,7 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)", ...@@ -1167,7 +1553,7 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
} }
/* /*
* Clean uncleaned reapped pages from vacuum_pages list and set xmin * Clean uncleaned reapped pages from vacuum_pages list list and set xmin
* committed for inserted tuples * committed for inserted tuples
*/ */
checked_moved = 0; checked_moved = 0;
...@@ -1178,16 +1564,10 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)", ...@@ -1178,16 +1564,10 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
page = BufferGetPage(buf); page = BufferGetPage(buf);
if ((*vpp)->vpd_offsets_used == 0) /* this page was not used */ if ((*vpp)->vpd_offsets_used == 0) /* this page was not used */
{ {
if (!PageIsEmpty(page))
/* vc_vacpage(page, *vpp);
* noff == 0 in empty pages only - such pages should be
* re-used
*/
Assert((*vpp)->vpd_offsets_free > 0);
vc_vacpage(page, *vpp);
} }
else else /* this page was used */
/* this page was used */
{ {
num_tuples = 0; num_tuples = 0;
max_offset = PageGetMaxOffsetNumber(page); max_offset = PageGetMaxOffsetNumber(page);
...@@ -1199,10 +1579,19 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)", ...@@ -1199,10 +1579,19 @@ failed to add item with len = %u to page %u (free space %u, nusd %u, noff %u)",
if (!ItemIdIsUsed(itemid)) if (!ItemIdIsUsed(itemid))
continue; continue;
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
if (TransactionIdEquals((TransactionId) tuple.t_data->t_xmin, myXID)) if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
{ {
tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED; if ((TransactionId)tuple.t_data->t_cmin != myXID)
num_tuples++; elog(ERROR, "Invalid XID in t_cmin (2)");
if (tuple.t_data->t_infomask & HEAP_MOVED_IN)
{
tuple.t_data->t_infomask |= HEAP_XMIN_COMMITTED;
num_tuples++;
}
else if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
tuple.t_data->t_infomask |= HEAP_XMIN_INVALID;
else
elog(ERROR, "HEAP_MOVED_OFF/HEAP_MOVED_IN was expected (2)");
} }
} }
Assert((*vpp)->vpd_offsets_used == num_tuples); Assert((*vpp)->vpd_offsets_used == num_tuples);
...@@ -1244,16 +1633,13 @@ Elapsed %u/%u sec.", ...@@ -1244,16 +1633,13 @@ Elapsed %u/%u sec.",
} }
/* /*
* clean moved tuples from last page in Nvpl list if some tuples * clean moved tuples from last page in Nvpl list
* left there
*/ */
if (vpc->vpd_offsets_free > 0 && offnum <= maxoff) if (vpc->vpd_blkno == blkno - 1 && vpc->vpd_offsets_free > 0)
{ {
Assert(vpc->vpd_blkno == blkno - 1);
buf = ReadBuffer(onerel, vpc->vpd_blkno); buf = ReadBuffer(onerel, vpc->vpd_blkno);
page = BufferGetPage(buf); page = BufferGetPage(buf);
num_tuples = 0; num_tuples = 0;
maxoff = offnum;
for (offnum = FirstOffsetNumber; for (offnum = FirstOffsetNumber;
offnum < maxoff; offnum < maxoff;
offnum = OffsetNumberNext(offnum)) offnum = OffsetNumberNext(offnum))
...@@ -1262,9 +1648,20 @@ Elapsed %u/%u sec.", ...@@ -1262,9 +1648,20 @@ Elapsed %u/%u sec.",
if (!ItemIdIsUsed(itemid)) if (!ItemIdIsUsed(itemid))
continue; continue;
tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
Assert(TransactionIdEquals((TransactionId) tuple.t_data->t_xmax, myXID));
itemid->lp_flags &= ~LP_USED; if (!(tuple.t_data->t_infomask & HEAP_XMIN_COMMITTED))
num_tuples++; {
if ((TransactionId)tuple.t_data->t_cmin != myXID)
elog(ERROR, "Invalid XID in t_cmin (3)");
if (tuple.t_data->t_infomask & HEAP_MOVED_OFF)
{
itemid->lp_flags &= ~LP_USED;
num_tuples++;
}
else
elog(ERROR, "HEAP_MOVED_OFF was expected");
}
} }
Assert(vpc->vpd_offsets_free == num_tuples); Assert(vpc->vpd_offsets_free == num_tuples);
PageRepairFragmentation(page); PageRepairFragmentation(page);
...@@ -1298,6 +1695,8 @@ Elapsed %u/%u sec.", ...@@ -1298,6 +1695,8 @@ Elapsed %u/%u sec.",
} }
pfree(vpc); pfree(vpc);
if (vacrelstats->vtlinks != NULL)
pfree(vacrelstats->vtlinks);
} /* vc_rpfheap */ } /* vc_rpfheap */
...@@ -1522,8 +1921,8 @@ vc_tidreapped(ItemPointer itemptr, VPageList vpl) ...@@ -1522,8 +1921,8 @@ vc_tidreapped(ItemPointer itemptr, VPageList vpl)
ioffno = ItemPointerGetOffsetNumber(itemptr); ioffno = ItemPointerGetOffsetNumber(itemptr);
vp = &vpd; vp = &vpd;
vpp = (VPageDescr *) vc_find_eq((char *) (vpl->vpl_pagedesc), vpp = (VPageDescr *) vc_find_eq((void *) (vpl->vpl_pagedesc),
vpl->vpl_num_pages, sizeof(VPageDescr), (char *) &vp, vpl->vpl_num_pages, sizeof(VPageDescr), (void *) &vp,
vc_cmp_blk); vc_cmp_blk);
if (vpp == (VPageDescr *) NULL) if (vpp == (VPageDescr *) NULL)
...@@ -1537,8 +1936,8 @@ vc_tidreapped(ItemPointer itemptr, VPageList vpl) ...@@ -1537,8 +1936,8 @@ vc_tidreapped(ItemPointer itemptr, VPageList vpl)
return vp; return vp;
} }
voff = (OffsetNumber *) vc_find_eq((char *) (vp->vpd_offsets), voff = (OffsetNumber *) vc_find_eq((void *) (vp->vpd_offsets),
vp->vpd_offsets_free, sizeof(OffsetNumber), (char *) &ioffno, vp->vpd_offsets_free, sizeof(OffsetNumber), (void *) &ioffno,
vc_cmp_offno); vc_cmp_offno);
if (voff == (OffsetNumber *) NULL) if (voff == (OffsetNumber *) NULL)
...@@ -1998,8 +2397,9 @@ vc_free(VRelList vrl) ...@@ -1998,8 +2397,9 @@ vc_free(VRelList vrl)
MemoryContextSwitchTo(old); MemoryContextSwitchTo(old);
} }
static char * static void *
vc_find_eq(char *bot, int nelem, int size, char *elm, int (*compar) (char *, char *)) vc_find_eq(void *bot, int nelem, int size, void *elm,
int (*compar) (const void *, const void *))
{ {
int res; int res;
int last = nelem - 1; int last = nelem - 1;
...@@ -2053,7 +2453,7 @@ vc_find_eq(char *bot, int nelem, int size, char *elm, int (*compar) (char *, cha ...@@ -2053,7 +2453,7 @@ vc_find_eq(char *bot, int nelem, int size, char *elm, int (*compar) (char *, cha
} /* vc_find_eq */ } /* vc_find_eq */
static int static int
vc_cmp_blk(char *left, char *right) vc_cmp_blk(const void *left, const void *right)
{ {
BlockNumber lblk, BlockNumber lblk,
rblk; rblk;
...@@ -2070,7 +2470,7 @@ vc_cmp_blk(char *left, char *right) ...@@ -2070,7 +2470,7 @@ vc_cmp_blk(char *left, char *right)
} /* vc_cmp_blk */ } /* vc_cmp_blk */
static int static int
vc_cmp_offno(char *left, char *right) vc_cmp_offno(const void *left, const void *right)
{ {
if (*(OffsetNumber *) left < *(OffsetNumber *) right) if (*(OffsetNumber *) left < *(OffsetNumber *) right)
...@@ -2081,6 +2481,33 @@ vc_cmp_offno(char *left, char *right) ...@@ -2081,6 +2481,33 @@ vc_cmp_offno(char *left, char *right)
} /* vc_cmp_offno */ } /* vc_cmp_offno */
static int
vc_cmp_vtlinks(const void *left, const void *right)
{
if (((VTupleLink)left)->new_tid.ip_blkid.bi_hi <
((VTupleLink)right)->new_tid.ip_blkid.bi_hi)
return -1;
if (((VTupleLink)left)->new_tid.ip_blkid.bi_hi >
((VTupleLink)right)->new_tid.ip_blkid.bi_hi)
return 1;
/* bi_hi-es are equal */
if (((VTupleLink)left)->new_tid.ip_blkid.bi_lo <
((VTupleLink)right)->new_tid.ip_blkid.bi_lo)
return -1;
if (((VTupleLink)left)->new_tid.ip_blkid.bi_lo >
((VTupleLink)right)->new_tid.ip_blkid.bi_lo)
return 1;
/* bi_lo-es are equal */
if (((VTupleLink)left)->new_tid.ip_posid <
((VTupleLink)right)->new_tid.ip_posid)
return -1;
if (((VTupleLink)left)->new_tid.ip_posid >
((VTupleLink)right)->new_tid.ip_posid)
return 1;
return 0;
}
static void static void
vc_getindices(Oid relid, int *nindices, Relation **Irel) vc_getindices(Oid relid, int *nindices, Relation **Irel)
...@@ -2230,7 +2657,7 @@ vc_enough_space(VPageDescr vpd, Size len) ...@@ -2230,7 +2657,7 @@ vc_enough_space(VPageDescr vpd, Size len)
return true; /* and len <= free_space */ return true; /* and len <= free_space */
/* ok. noff_usd >= noff_free and so we'll have to allocate new itemid */ /* ok. noff_usd >= noff_free and so we'll have to allocate new itemid */
if (len <= vpd->vpd_free - sizeof(ItemIdData)) if (len + DOUBLEALIGN(sizeof(ItemIdData)) <= vpd->vpd_free)
return true; return true;
return false; return false;
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.49 1999/02/21 03:49:21 scrappy Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/buffer/bufmgr.c,v 1.50 1999/03/28 20:32:17 vadim Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -95,7 +95,7 @@ static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum, ...@@ -95,7 +95,7 @@ static BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
static int FlushBuffer(Buffer buffer, bool release); static int FlushBuffer(Buffer buffer, bool release);
static void BufferSync(void); static void BufferSync(void);
static int BufferReplace(BufferDesc *bufHdr, bool bufferLockHeld); static int BufferReplace(BufferDesc *bufHdr, bool bufferLockHeld);
static void PrintBufferDescs(void); void PrintBufferDescs(void);
/* not static but used by vacuum only ... */ /* not static but used by vacuum only ... */
int BlowawayRelationBuffers(Relation rel, BlockNumber block); int BlowawayRelationBuffers(Relation rel, BlockNumber block);
...@@ -1208,23 +1208,24 @@ int ...@@ -1208,23 +1208,24 @@ int
BufferPoolCheckLeak() BufferPoolCheckLeak()
{ {
int i; int i;
int error = 0; int result = 0;
for (i = 1; i <= NBuffers; i++) for (i = 1; i <= NBuffers; i++)
{ {
if (BufferIsValid(i)) if (BufferIsValid(i))
{ {
BufferDesc *buf = &(BufferDescriptors[i - 1]);
elog(NOTICE, elog(NOTICE,
"buffer leak [%d] detected in BufferPoolCheckLeak()", i - 1); "Buffer Leak: [%03d] (freeNext=%d, freePrev=%d, \
error = 1; relname=%s, blockNum=%d, flags=0x%x, refcount=%d %d)",
i - 1, buf->freeNext, buf->freePrev,
buf->sb_relname, buf->tag.blockNum, buf->flags,
buf->refcount, PrivateRefCount[i - 1]);
result = 1;
} }
} }
if (error) return (result);
{
PrintBufferDescs();
return 1;
}
return 0;
} }
/* ------------------------------------------------ /* ------------------------------------------------
...@@ -1465,7 +1466,7 @@ DropBuffers(Oid dbid) ...@@ -1465,7 +1466,7 @@ DropBuffers(Oid dbid)
* use only. * use only.
* ----------------------------------------------------------------- * -----------------------------------------------------------------
*/ */
static void void
PrintBufferDescs() PrintBufferDescs()
{ {
int i; int i;
...@@ -1474,16 +1475,14 @@ PrintBufferDescs() ...@@ -1474,16 +1475,14 @@ PrintBufferDescs()
if (IsUnderPostmaster) if (IsUnderPostmaster)
{ {
SpinAcquire(BufMgrLock); SpinAcquire(BufMgrLock);
#ifdef NOT_USED
for (i = 0; i < NBuffers; ++i, ++buf) for (i = 0; i < NBuffers; ++i, ++buf)
{ {
elog(NOTICE, "[%02d] (freeNext=%d, freePrev=%d, relname=%s, \ elog(DEBUG, "[%02d] (freeNext=%d, freePrev=%d, relname=%s, \
blockNum=%d, flags=0x%x, refcount=%d %d)", blockNum=%d, flags=0x%x, refcount=%d %d)",
i, buf->freeNext, buf->freePrev, i, buf->freeNext, buf->freePrev,
buf->sb_relname, buf->tag.blockNum, buf->flags, buf->sb_relname, buf->tag.blockNum, buf->flags,
buf->refcount, PrivateRefCount[i]); buf->refcount, PrivateRefCount[i]);
} }
#endif
SpinRelease(BufMgrLock); SpinRelease(BufMgrLock);
} }
else else
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.37 1999/02/22 06:16:48 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/ipc/shmem.c,v 1.38 1999/03/28 20:32:22 vadim Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -636,12 +636,13 @@ TransactionIdIsInProgress(TransactionId xid) ...@@ -636,12 +636,13 @@ TransactionIdIsInProgress(TransactionId xid)
Snapshot Snapshot
GetSnapshotData(bool serializable) GetSnapshotData(bool serializable)
{ {
Snapshot snapshot = (Snapshot) malloc(sizeof(SnapshotData)); Snapshot snapshot = (Snapshot) malloc(sizeof(SnapshotData));
ShmemIndexEnt *result; ShmemIndexEnt *result;
PROC *proc; PROC *proc;
TransactionId cid = GetCurrentTransactionId(); TransactionId cid = GetCurrentTransactionId();
uint32 count = 0; TransactionId xid;
uint32 have = 32; uint32 count = 0;
uint32 have = 32;
Assert(ShmemIndex); Assert(ShmemIndex);
...@@ -669,19 +670,20 @@ GetSnapshotData(bool serializable) ...@@ -669,19 +670,20 @@ GetSnapshotData(bool serializable)
strncmp(result->key, "PID ", 4) != 0) strncmp(result->key, "PID ", 4) != 0)
continue; continue;
proc = (PROC *) MAKE_PTR(result->location); proc = (PROC *) MAKE_PTR(result->location);
if (proc == MyProc || proc->xid < FirstTransactionId) xid = proc->xid; /* we don't use spin-locking in xact.c ! */
if (proc == MyProc || xid < FirstTransactionId)
continue; continue;
if (proc->xid < snapshot->xmin) if (xid < snapshot->xmin)
snapshot->xmin = proc->xid; snapshot->xmin = xid;
else if (proc->xid > snapshot->xmax) else if (xid > snapshot->xmax)
snapshot->xmax = proc->xid; snapshot->xmax = xid;
if (have == 0) if (have == 0)
{ {
snapshot->xip = (TransactionId *) realloc(snapshot->xip, snapshot->xip = (TransactionId *) realloc(snapshot->xip,
(count + 32) * sizeof(TransactionId)); (count + 32) * sizeof(TransactionId));
have = 32; have = 32;
} }
snapshot->xip[count] = proc->xid; snapshot->xip[count] = xid;
have--; have--;
count++; count++;
} }
...@@ -692,3 +694,48 @@ GetSnapshotData(bool serializable) ...@@ -692,3 +694,48 @@ GetSnapshotData(bool serializable)
elog(ERROR, "GetSnapshotData: ShmemIndex corrupted"); elog(ERROR, "GetSnapshotData: ShmemIndex corrupted");
return NULL; return NULL;
} }
/*
* GetXmaxRecent -- returns oldest transaction that was running
* when all current transaction was started.
* It's used by vacuum to decide what deleted
* tuples must be preserved in a table.
*
* And yet another strange func for this place... - vadim 03/18/99
*/
void
GetXmaxRecent(TransactionId *XmaxRecent)
{
ShmemIndexEnt *result;
PROC *proc;
TransactionId xmin;
Assert(ShmemIndex);
ReadNewTransactionId(XmaxRecent);
SpinAcquire(ShmemIndexLock);
hash_seq((HTAB *) NULL);
while ((result = (ShmemIndexEnt *) hash_seq(ShmemIndex)) != NULL)
{
if (result == (ShmemIndexEnt *) TRUE)
{
SpinRelease(ShmemIndexLock);
return;
}
if (result->location == INVALID_OFFSET ||
strncmp(result->key, "PID ", 4) != 0)
continue;
proc = (PROC *) MAKE_PTR(result->location);
xmin = proc->xmin; /* we don't use spin-locking in xact.c ! */
if (proc == MyProc || xmin < FirstTransactionId)
continue;
if (xmin < *XmaxRecent)
*XmaxRecent = xmin;
}
SpinRelease(ShmemIndexLock);
elog(ERROR, "GetXmaxRecent: ShmemIndex corrupted");
return NULL;
}
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Attic/multi.c,v 1.27 1999/02/13 23:18:27 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/Attic/multi.c,v 1.28 1999/03/28 20:32:25 vadim Exp $
* *
* NOTES: * NOTES:
* (1) The lock.c module assumes that the caller here is doing * (1) The lock.c module assumes that the caller here is doing
...@@ -34,55 +34,6 @@ static bool MultiAcquire(LOCKMETHOD lockmethod, LOCKTAG *tag, ...@@ -34,55 +34,6 @@ static bool MultiAcquire(LOCKMETHOD lockmethod, LOCKTAG *tag,
static bool MultiRelease(LOCKMETHOD lockmethod, LOCKTAG *tag, static bool MultiRelease(LOCKMETHOD lockmethod, LOCKTAG *tag,
LOCKMODE lockmode, PG_LOCK_LEVEL level); LOCKMODE lockmode, PG_LOCK_LEVEL level);
#ifdef LowLevelLocking
static MASK MultiConflicts[] = {
(int) NULL,
/* RowShareLock */
(1 << ExclusiveLock),
/* RowExclusiveLock */
(1 << ExclusiveLock) | (1 << ShareRowExclusiveLock) | (1 << ShareLock),
/* ShareLock */
(1 << ExclusiveLock) | (1 << ShareRowExclusiveLock) |
(1 << RowExclusiveLock),
/* ShareRowExclusiveLock */
(1 << ExclusiveLock) | (1 << ShareRowExclusiveLock) |
(1 << ShareLock) | (1 << RowExclusiveLock),
/* ExclusiveLock */
(1 << ExclusiveLock) | (1 << ShareRowExclusiveLock) | (1 << ShareLock) |
(1 << RowExclusiveLock) | (1 << RowShareLock),
/* ObjShareLock */
(1 << ObjExclusiveLock),
/* ObjExclusiveLock */
(1 << ObjExclusiveLock) | (1 << ObjShareLock),
/* ExtendLock */
(1 << ExtendLock)
};
/*
* write locks have higher priority than read locks and extend locks. May
* want to treat INTENT locks differently.
*/
static int MultiPrios[] = {
(int) NULL,
2,
1,
2,
1,
1
};
#else
/* /*
* INTENT indicates to higher level that a lower level lock has been * INTENT indicates to higher level that a lower level lock has been
* set. For example, a write lock on a tuple conflicts with a write * set. For example, a write lock on a tuple conflicts with a write
...@@ -121,8 +72,6 @@ static int MultiPrios[] = { ...@@ -121,8 +72,6 @@ static int MultiPrios[] = {
1 1
}; };
#endif /* !LowLevelLocking */
/* /*
* Lock table identifier for this lock table. The multi-level * Lock table identifier for this lock table. The multi-level
* lock table is ONE lock table, not three. * lock table is ONE lock table, not three.
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.51 1999/02/21 01:41:45 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.52 1999/03/28 20:32:26 vadim Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -46,7 +46,7 @@ ...@@ -46,7 +46,7 @@
* This is so that we can support more backends. (system-wide semaphore * This is so that we can support more backends. (system-wide semaphore
* sets run out pretty fast.) -ay 4/95 * sets run out pretty fast.) -ay 4/95
* *
* $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.51 1999/02/21 01:41:45 tgl Exp $ * $Header: /cvsroot/pgsql/src/backend/storage/lmgr/proc.c,v 1.52 1999/03/28 20:32:26 vadim Exp $
*/ */
#include <sys/time.h> #include <sys/time.h>
#include <unistd.h> #include <unistd.h>
...@@ -300,9 +300,7 @@ InitProcess(IPCKey key) ...@@ -300,9 +300,7 @@ InitProcess(IPCKey key)
MyProc->pid = MyProcPid; MyProc->pid = MyProcPid;
MyProc->xid = InvalidTransactionId; MyProc->xid = InvalidTransactionId;
#ifdef LowLevelLocking
MyProc->xmin = InvalidTransactionId; MyProc->xmin = InvalidTransactionId;
#endif
/* ---------------- /* ----------------
* Start keeping spin lock stats from here on. Any botch before * Start keeping spin lock stats from here on. Any botch before
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.25 1999/02/13 23:20:19 momjian Exp $ * $Header: /cvsroot/pgsql/src/backend/utils/time/tqual.c,v 1.26 1999/03/28 20:32:29 vadim Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -86,11 +86,26 @@ HeapTupleSatisfiesItself(HeapTupleHeader tuple) ...@@ -86,11 +86,26 @@ HeapTupleSatisfiesItself(HeapTupleHeader tuple)
if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED)) if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
{ {
if (tuple->t_infomask & HEAP_XMIN_INVALID) /* xid invalid or if (tuple->t_infomask & HEAP_XMIN_INVALID)
* aborted */
return false; return false;
if (TransactionIdIsCurrentTransactionId(tuple->t_xmin)) if (tuple->t_infomask & HEAP_MOVED_OFF)
{
if (TransactionIdDidCommit((TransactionId)tuple->t_cmin))
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
{
if (!TransactionIdDidCommit((TransactionId)tuple->t_cmin))
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
}
else if (TransactionIdIsCurrentTransactionId(tuple->t_xmin))
{ {
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
return true; return true;
...@@ -98,14 +113,12 @@ HeapTupleSatisfiesItself(HeapTupleHeader tuple) ...@@ -98,14 +113,12 @@ HeapTupleSatisfiesItself(HeapTupleHeader tuple)
return true; return true;
return false; return false;
} }
else if (!TransactionIdDidCommit(tuple->t_xmin))
if (!TransactionIdDidCommit(tuple->t_xmin))
{ {
if (TransactionIdDidAbort(tuple->t_xmin)) if (TransactionIdDidAbort(tuple->t_xmin))
tuple->t_infomask |= HEAP_XMIN_INVALID; /* aborted */ tuple->t_infomask |= HEAP_XMIN_INVALID; /* aborted */
return false; return false;
} }
tuple->t_infomask |= HEAP_XMIN_COMMITTED; tuple->t_infomask |= HEAP_XMIN_COMMITTED;
} }
/* the tuple was inserted validly */ /* the tuple was inserted validly */
...@@ -200,11 +213,26 @@ HeapTupleSatisfiesNow(HeapTupleHeader tuple) ...@@ -200,11 +213,26 @@ HeapTupleSatisfiesNow(HeapTupleHeader tuple)
if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED)) if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
{ {
if (tuple->t_infomask & HEAP_XMIN_INVALID) /* xid invalid or if (tuple->t_infomask & HEAP_XMIN_INVALID)
* aborted */
return false; return false;
if (TransactionIdIsCurrentTransactionId(tuple->t_xmin)) if (tuple->t_infomask & HEAP_MOVED_OFF)
{
if (TransactionIdDidCommit((TransactionId)tuple->t_cmin))
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
{
if (!TransactionIdDidCommit((TransactionId)tuple->t_cmin))
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
}
else if (TransactionIdIsCurrentTransactionId(tuple->t_xmin))
{ {
if (CommandIdGEScanCommandId(tuple->t_cmin)) if (CommandIdGEScanCommandId(tuple->t_cmin))
return false; /* inserted after scan started */ return false; /* inserted after scan started */
...@@ -222,18 +250,12 @@ HeapTupleSatisfiesNow(HeapTupleHeader tuple) ...@@ -222,18 +250,12 @@ HeapTupleSatisfiesNow(HeapTupleHeader tuple)
else else
return false; /* deleted before scan started */ return false; /* deleted before scan started */
} }
else if (!TransactionIdDidCommit(tuple->t_xmin))
/*
* this call is VERY expensive - requires a log table lookup.
*/
if (!TransactionIdDidCommit(tuple->t_xmin))
{ {
if (TransactionIdDidAbort(tuple->t_xmin)) if (TransactionIdDidAbort(tuple->t_xmin))
tuple->t_infomask |= HEAP_XMIN_INVALID; /* aborted */ tuple->t_infomask |= HEAP_XMIN_INVALID; /* aborted */
return false; return false;
} }
tuple->t_infomask |= HEAP_XMIN_COMMITTED; tuple->t_infomask |= HEAP_XMIN_COMMITTED;
} }
...@@ -288,7 +310,23 @@ HeapTupleSatisfiesUpdate(HeapTuple tuple) ...@@ -288,7 +310,23 @@ HeapTupleSatisfiesUpdate(HeapTuple tuple)
if (th->t_infomask & HEAP_XMIN_INVALID) /* xid invalid or aborted */ if (th->t_infomask & HEAP_XMIN_INVALID) /* xid invalid or aborted */
return HeapTupleInvisible; return HeapTupleInvisible;
if (TransactionIdIsCurrentTransactionId(th->t_xmin)) if (th->t_infomask & HEAP_MOVED_OFF)
{
if (TransactionIdDidCommit((TransactionId)th->t_cmin))
{
th->t_infomask |= HEAP_XMIN_INVALID;
return HeapTupleInvisible;
}
}
else if (th->t_infomask & HEAP_MOVED_IN)
{
if (!TransactionIdDidCommit((TransactionId)th->t_cmin))
{
th->t_infomask |= HEAP_XMIN_INVALID;
return HeapTupleInvisible;
}
}
else if (TransactionIdIsCurrentTransactionId(th->t_xmin))
{ {
if (CommandIdGEScanCommandId(th->t_cmin) && !heapisoverride()) if (CommandIdGEScanCommandId(th->t_cmin) && !heapisoverride())
return HeapTupleInvisible; /* inserted after scan started */ return HeapTupleInvisible; /* inserted after scan started */
...@@ -306,19 +344,12 @@ HeapTupleSatisfiesUpdate(HeapTuple tuple) ...@@ -306,19 +344,12 @@ HeapTupleSatisfiesUpdate(HeapTuple tuple)
else else
return HeapTupleInvisible; /* updated before scan started */ return HeapTupleInvisible; /* updated before scan started */
} }
else if (!TransactionIdDidCommit(th->t_xmin))
/*
* This call is VERY expensive - requires a log table lookup.
* Actually, this should be done by query before...
*/
if (!TransactionIdDidCommit(th->t_xmin))
{ {
if (TransactionIdDidAbort(th->t_xmin)) if (TransactionIdDidAbort(th->t_xmin))
th->t_infomask |= HEAP_XMIN_INVALID; /* aborted */ th->t_infomask |= HEAP_XMIN_INVALID; /* aborted */
return HeapTupleInvisible; return HeapTupleInvisible;
} }
th->t_infomask |= HEAP_XMIN_COMMITTED; th->t_infomask |= HEAP_XMIN_COMMITTED;
} }
...@@ -375,10 +406,26 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple) ...@@ -375,10 +406,26 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple)
if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED)) if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
{ {
if (tuple->t_infomask & HEAP_XMIN_INVALID) /* xid invalid or aborted */ if (tuple->t_infomask & HEAP_XMIN_INVALID)
return false; return false;
if (TransactionIdIsCurrentTransactionId(tuple->t_xmin)) if (tuple->t_infomask & HEAP_MOVED_OFF)
{
if (TransactionIdDidCommit((TransactionId)tuple->t_cmin))
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
{
if (!TransactionIdDidCommit((TransactionId)tuple->t_cmin))
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
}
else if (TransactionIdIsCurrentTransactionId(tuple->t_xmin))
{ {
if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */
return true; return true;
...@@ -390,8 +437,7 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple) ...@@ -390,8 +437,7 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple)
return false; return false;
} }
else if (!TransactionIdDidCommit(tuple->t_xmin))
if (!TransactionIdDidCommit(tuple->t_xmin))
{ {
if (TransactionIdDidAbort(tuple->t_xmin)) if (TransactionIdDidAbort(tuple->t_xmin))
{ {
...@@ -401,7 +447,6 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple) ...@@ -401,7 +447,6 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple)
SnapshotDirty->xmin = tuple->t_xmin; SnapshotDirty->xmin = tuple->t_xmin;
return true; /* in insertion by other */ return true; /* in insertion by other */
} }
tuple->t_infomask |= HEAP_XMIN_COMMITTED; tuple->t_infomask |= HEAP_XMIN_COMMITTED;
} }
...@@ -451,11 +496,26 @@ HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot) ...@@ -451,11 +496,26 @@ HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot)
if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED)) if (!(tuple->t_infomask & HEAP_XMIN_COMMITTED))
{ {
if (tuple->t_infomask & HEAP_XMIN_INVALID) /* xid invalid or if (tuple->t_infomask & HEAP_XMIN_INVALID)
* aborted */
return false; return false;
if (TransactionIdIsCurrentTransactionId(tuple->t_xmin)) if (tuple->t_infomask & HEAP_MOVED_OFF)
{
if (TransactionIdDidCommit((TransactionId)tuple->t_cmin))
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
}
else if (tuple->t_infomask & HEAP_MOVED_IN)
{
if (!TransactionIdDidCommit((TransactionId)tuple->t_cmin))
{
tuple->t_infomask |= HEAP_XMIN_INVALID;
return false;
}
}
else if (TransactionIdIsCurrentTransactionId(tuple->t_xmin))
{ {
if (CommandIdGEScanCommandId(tuple->t_cmin)) if (CommandIdGEScanCommandId(tuple->t_cmin))
return false; /* inserted after scan started */ return false; /* inserted after scan started */
...@@ -473,18 +533,12 @@ HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot) ...@@ -473,18 +533,12 @@ HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot)
else else
return false; /* deleted before scan started */ return false; /* deleted before scan started */
} }
else if (!TransactionIdDidCommit(tuple->t_xmin))
/*
* this call is VERY expensive - requires a log table lookup.
*/
if (!TransactionIdDidCommit(tuple->t_xmin))
{ {
if (TransactionIdDidAbort(tuple->t_xmin)) if (TransactionIdDidAbort(tuple->t_xmin))
tuple->t_infomask |= HEAP_XMIN_INVALID; /* aborted */ tuple->t_infomask |= HEAP_XMIN_INVALID; /* aborted */
return false; return false;
} }
tuple->t_infomask |= HEAP_XMIN_COMMITTED; tuple->t_infomask |= HEAP_XMIN_COMMITTED;
} }
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* *
* Copyright (c) 1994, Regents of the University of California * Copyright (c) 1994, Regents of the University of California
* *
* $Id: htup.h,v 1.13 1999/02/13 23:20:54 momjian Exp $ * $Id: htup.h,v 1.14 1999/03/28 20:32:30 vadim Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -117,8 +117,11 @@ typedef HeapTupleData *HeapTuple; ...@@ -117,8 +117,11 @@ typedef HeapTupleData *HeapTuple;
#define HEAP_XMAX_COMMITTED 0x0400 /* t_xmax committed */ #define HEAP_XMAX_COMMITTED 0x0400 /* t_xmax committed */
#define HEAP_XMAX_INVALID 0x0800 /* t_xmax invalid/aborted */ #define HEAP_XMAX_INVALID 0x0800 /* t_xmax invalid/aborted */
#define HEAP_MARKED_FOR_UPDATE 0x1000 /* marked for UPDATE */ #define HEAP_MARKED_FOR_UPDATE 0x1000 /* marked for UPDATE */
#define HEAP_UPDATED 0x2000 /* this is UPDATEd version of row */
#define HEAP_MOVED_OFF 0x4000 /* removed or moved to another place by vacuum */
#define HEAP_MOVED_IN 0x8000 /* moved from another place by vacuum */
#define HEAP_XACT_MASK 0x0F00 /* */ #define HEAP_XACT_MASK 0xFF00 /* */
#define HeapTupleNoNulls(tuple) \ #define HeapTupleNoNulls(tuple) \
(!(((HeapTuple) (tuple))->t_data->t_infomask & HEAP_HASNULL)) (!(((HeapTuple) (tuple))->t_data->t_infomask & HEAP_HASNULL))
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* *
* Copyright (c) 1994, Regents of the University of California * Copyright (c) 1994, Regents of the University of California
* *
* $Id: nbtree.h,v 1.23 1999/02/13 23:20:55 momjian Exp $ * $Id: nbtree.h,v 1.24 1999/03/28 20:32:34 vadim Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -42,6 +42,7 @@ typedef struct BTPageOpaqueData ...@@ -42,6 +42,7 @@ typedef struct BTPageOpaqueData
{ {
BlockNumber btpo_prev; BlockNumber btpo_prev;
BlockNumber btpo_next; BlockNumber btpo_next;
BlockNumber btpo_parent;
uint16 btpo_flags; uint16 btpo_flags;
#define BTP_LEAF (1 << 0) #define BTP_LEAF (1 << 0)
...@@ -176,13 +177,6 @@ typedef struct BTPageState ...@@ -176,13 +177,6 @@ typedef struct BTPageState
#define BT_INSERTION 0 #define BT_INSERTION 0
#define BT_DESCENT 1 #define BT_DESCENT 1
/*
* We must classify index modification types for the benefit of
* _bt_adjscans.
*/
#define BT_INSERT 0
#define BT_DELETE 1
/* /*
* In general, the btree code tries to localize its knowledge about * In general, the btree code tries to localize its knowledge about
* page layout to a couple of routines. However, we need a special * page layout to a couple of routines. However, we need a special
...@@ -268,7 +262,7 @@ extern void btdelete(Relation rel, ItemPointer tid); ...@@ -268,7 +262,7 @@ extern void btdelete(Relation rel, ItemPointer tid);
*/ */
extern void _bt_regscan(IndexScanDesc scan); extern void _bt_regscan(IndexScanDesc scan);
extern void _bt_dropscan(IndexScanDesc scan); extern void _bt_dropscan(IndexScanDesc scan);
extern void _bt_adjscans(Relation rel, ItemPointer tid, int op); extern void _bt_adjscans(Relation rel, ItemPointer tid);
/* /*
* prototypes for functions in nbtsearch.c * prototypes for functions in nbtsearch.c
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* *
* Copyright (c) 1994, Regents of the University of California * Copyright (c) 1994, Regents of the University of California
* *
* $Id: vacuum.h,v 1.17 1999/02/13 23:21:20 momjian Exp $ * $Id: vacuum.h,v 1.18 1999/03/28 20:32:38 vadim Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -95,16 +95,35 @@ typedef struct VRelListData ...@@ -95,16 +95,35 @@ typedef struct VRelListData
typedef VRelListData *VRelList; typedef VRelListData *VRelList;
typedef struct VTupleLinkData
{
ItemPointerData new_tid;
ItemPointerData this_tid;
} VTupleLinkData;
typedef VTupleLinkData *VTupleLink;
typedef struct VTupleMoveData
{
ItemPointerData tid; /* tuple ID */
VPageDescr vpd; /* where to move */
bool cleanVpd; /* clean vpd before using */
} VTupleMoveData;
typedef VTupleMoveData *VTupleMove;
typedef struct VRelStats typedef struct VRelStats
{ {
Oid relid; Oid relid;
int num_tuples; int num_tuples;
int num_pages; int num_pages;
Size min_tlen; Size min_tlen;
Size max_tlen; Size max_tlen;
bool hasindex; bool hasindex;
int va_natts; /* number of attrs being analyzed */ int va_natts; /* number of attrs being analyzed */
VacAttrStats *vacattrstats; VacAttrStats *vacattrstats;
int num_vtlinks;
VTupleLink vtlinks;
} VRelStats; } VRelStats;
extern bool VacuumRunning; extern bool VacuumRunning;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment