Commit 47ad7912 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Fix bugs in Serializable Snapshot Isolation.

Change the way UPDATEs are handled. Instead of maintaining a chain of
tuple-level locks in shared memory, copy any existing locks on the old
tuple to the new tuple at UPDATE. Any existing page-level lock needs to
be duplicated too, as a lock on the new tuple. That was neglected
previously.

Store xmin on tuple-level predicate locks, to distinguish a lock on an old
already-recycled tuple from a new tuple at the same physical location.
Failure to distinguish them caused loops in the tuple-lock chains, as
reported by YAMAMOTO Takashi. Although we don't use the chain representation
of UPDATEs anymore, it seems like a good idea to store the xmin to avoid
some false positives if no other reason.

CheckSingleTargetForConflictsIn now correctly handles the case where a lock
that's being held is not reflected in the local lock table. That happens
if another backend acquires a lock on our behalf due to an UPDATE or a page
split.

PredicateLockPageCombine now retains locks for the page that is being
removed, rather than removing them. This prevents a potentially dangerous
false-positive inconsistency where the local lock table believes that a lock
is held, but it is actually not.

Dan Ports and Kevin Grittner
parent 16143d64
......@@ -824,7 +824,6 @@ restart:
if (_bt_page_recyclable(page))
{
/* Okay to recycle this page */
Assert(!PageIsPredicateLocked(rel, blkno));
RecordFreeIndexPage(rel, blkno);
vstate->totFreePages++;
stats->pages_deleted++;
......
This diff is collapsed.
......@@ -78,7 +78,6 @@ typedef enum LWLockId
SerializableFinishedListLock,
SerializablePredicateLockListLock,
OldSerXidLock,
PredicateLockNextRowLinkLock,
/* Individual lock IDs end here */
FirstBufMappingLock,
FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
......
......@@ -150,8 +150,6 @@ typedef struct PredXactListData
SerCommitSeqNo HavePartialClearedThrough; /* have cleared through this
* seq no */
SERIALIZABLEXACT *OldCommittedSxact; /* shared copy of dummy sxact */
bool NeedTargetLinkCleanup; /* to save cleanup effort for rare
* case */
PredXactListElement element;
} PredXactListData;
......@@ -231,9 +229,13 @@ typedef struct SERIALIZABLEXID
/*
* The PREDICATELOCKTARGETTAG struct identifies a database object which can
* be the target of predicate locks. It is designed to fit into 16 bytes
* with no padding. Note that this would need adjustment if we widen Oid or
* BlockNumber to more than 32 bits.
* be the target of predicate locks.
*
* Note that the hash function being used doesn't properly respect tag
* length -- it will go to a four byte boundary past the end of the tag.
* If you change this struct, make sure any slack space is initialized,
* so that any random bytes in the middle or at the end are not included
* in the hash.
*
* TODO SSI: If we always use the same fields for the same type of value, we
* should rename these. Holding off until it's clear there are no exceptions.
......@@ -247,8 +249,8 @@ typedef struct PREDICATELOCKTARGETTAG
uint32 locktag_field1; /* a 32-bit ID field */
uint32 locktag_field2; /* a 32-bit ID field */
uint32 locktag_field3; /* a 32-bit ID field */
uint16 locktag_field4; /* a 16-bit ID field */
uint16 locktag_field5; /* a 16-bit ID field */
uint32 locktag_field4; /* a 32-bit ID field */
uint32 locktag_field5; /* a 32-bit ID field */
} PREDICATELOCKTARGETTAG;
/*
......@@ -260,12 +262,11 @@ typedef struct PREDICATELOCKTARGETTAG
* already have one. An entry is removed when the last lock is removed from
* its list.
*
* Because a check for predicate locks on a tuple target should also find
* locks on previous versions of the same row, if there are any created by
* overlapping transactions, we keep a pointer to the target for the prior
* version of the row. We also keep a pointer to the next version of the
* row, so that when we no longer have any predicate locks and the back
* pointer is clear, we can clean up the prior pointer for the next version.
* Because a particular target might become obsolete, due to update to a new
* version, before the reading transaction is obsolete, we need some way to
* prevent errors from reuse of a tuple ID. Rather than attempting to clean
* up the targets as the related tuples are pruned or vacuumed, we check the
* xmin on access. This should be far less costly.
*/
typedef struct PREDICATELOCKTARGET PREDICATELOCKTARGET;
......@@ -277,15 +278,6 @@ struct PREDICATELOCKTARGET
/* data */
SHM_QUEUE predicateLocks; /* list of PREDICATELOCK objects assoc. with
* predicate lock target */
/*
* The following two pointers are only used for tuple locks, and are only
* consulted for conflict detection and cleanup; not for granularity
* promotion.
*/
PREDICATELOCKTARGET *priorVersionOfRow; /* what other locks to check */
PREDICATELOCKTARGET *nextVersionOfRow; /* who has pointer here for
* more targets */
};
......@@ -387,30 +379,32 @@ typedef struct PredicateLockData
(locktag).locktag_field2 = (reloid), \
(locktag).locktag_field3 = InvalidBlockNumber, \
(locktag).locktag_field4 = InvalidOffsetNumber, \
(locktag).locktag_field5 = 0)
(locktag).locktag_field5 = InvalidTransactionId)
#define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
((locktag).locktag_field1 = (dboid), \
(locktag).locktag_field2 = (reloid), \
(locktag).locktag_field3 = (blocknum), \
(locktag).locktag_field4 = InvalidOffsetNumber, \
(locktag).locktag_field5 = 0)
(locktag).locktag_field5 = InvalidTransactionId)
#define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \
#define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum,xmin) \
((locktag).locktag_field1 = (dboid), \
(locktag).locktag_field2 = (reloid), \
(locktag).locktag_field3 = (blocknum), \
(locktag).locktag_field4 = (offnum), \
(locktag).locktag_field5 = 0)
(locktag).locktag_field5 = (xmin))
#define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
((locktag).locktag_field1)
((Oid) (locktag).locktag_field1)
#define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
((locktag).locktag_field2)
((Oid) (locktag).locktag_field2)
#define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
((locktag).locktag_field3)
((BlockNumber) (locktag).locktag_field3)
#define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
((locktag).locktag_field4)
((OffsetNumber) (locktag).locktag_field4)
#define GET_PREDICATELOCKTARGETTAG_XMIN(locktag) \
((TransactionId) (locktag).locktag_field5)
#define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \
(((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
(((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment