Commit 47ad7912 authored by Heikki Linnakangas's avatar Heikki Linnakangas

Fix bugs in Serializable Snapshot Isolation.

Change the way UPDATEs are handled. Instead of maintaining a chain of
tuple-level locks in shared memory, copy any existing locks on the old
tuple to the new tuple at UPDATE. Any existing page-level lock needs to
be duplicated too, as a lock on the new tuple. That was neglected
previously.

Store xmin on tuple-level predicate locks, to distinguish a lock on an old
already-recycled tuple from a new tuple at the same physical location.
Failure to distinguish them caused loops in the tuple-lock chains, as
reported by YAMAMOTO Takashi. Although we don't use the chain representation
of UPDATEs anymore, it seems like a good idea to store the xmin to avoid
some false positives if no other reason.

CheckSingleTargetForConflictsIn now correctly handles the case where a lock
that's being held is not reflected in the local lock table. That happens
if another backend acquires a lock on our behalf due to an UPDATE or a page
split.

PredicateLockPageCombine now retains locks for the page that is being
removed, rather than removing them. This prevents a potentially dangerous
false-positive inconsistency where the local lock table believes that a lock
is held, but it is actually not.

Dan Ports and Kevin Grittner
parent 16143d64
...@@ -824,7 +824,6 @@ restart: ...@@ -824,7 +824,6 @@ restart:
if (_bt_page_recyclable(page)) if (_bt_page_recyclable(page))
{ {
/* Okay to recycle this page */ /* Okay to recycle this page */
Assert(!PageIsPredicateLocked(rel, blkno));
RecordFreeIndexPage(rel, blkno); RecordFreeIndexPage(rel, blkno);
vstate->totFreePages++; vstate->totFreePages++;
stats->pages_deleted++; stats->pages_deleted++;
......
This diff is collapsed.
...@@ -78,7 +78,6 @@ typedef enum LWLockId ...@@ -78,7 +78,6 @@ typedef enum LWLockId
SerializableFinishedListLock, SerializableFinishedListLock,
SerializablePredicateLockListLock, SerializablePredicateLockListLock,
OldSerXidLock, OldSerXidLock,
PredicateLockNextRowLinkLock,
/* Individual lock IDs end here */ /* Individual lock IDs end here */
FirstBufMappingLock, FirstBufMappingLock,
FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS, FirstLockMgrLock = FirstBufMappingLock + NUM_BUFFER_PARTITIONS,
......
...@@ -150,8 +150,6 @@ typedef struct PredXactListData ...@@ -150,8 +150,6 @@ typedef struct PredXactListData
SerCommitSeqNo HavePartialClearedThrough; /* have cleared through this SerCommitSeqNo HavePartialClearedThrough; /* have cleared through this
* seq no */ * seq no */
SERIALIZABLEXACT *OldCommittedSxact; /* shared copy of dummy sxact */ SERIALIZABLEXACT *OldCommittedSxact; /* shared copy of dummy sxact */
bool NeedTargetLinkCleanup; /* to save cleanup effort for rare
* case */
PredXactListElement element; PredXactListElement element;
} PredXactListData; } PredXactListData;
...@@ -231,9 +229,13 @@ typedef struct SERIALIZABLEXID ...@@ -231,9 +229,13 @@ typedef struct SERIALIZABLEXID
/* /*
* The PREDICATELOCKTARGETTAG struct identifies a database object which can * The PREDICATELOCKTARGETTAG struct identifies a database object which can
* be the target of predicate locks. It is designed to fit into 16 bytes * be the target of predicate locks.
* with no padding. Note that this would need adjustment if we widen Oid or *
* BlockNumber to more than 32 bits. * Note that the hash function being used doesn't properly respect tag
* length -- it will go to a four byte boundary past the end of the tag.
* If you change this struct, make sure any slack space is initialized,
* so that any random bytes in the middle or at the end are not included
* in the hash.
* *
* TODO SSI: If we always use the same fields for the same type of value, we * TODO SSI: If we always use the same fields for the same type of value, we
* should rename these. Holding off until it's clear there are no exceptions. * should rename these. Holding off until it's clear there are no exceptions.
...@@ -247,8 +249,8 @@ typedef struct PREDICATELOCKTARGETTAG ...@@ -247,8 +249,8 @@ typedef struct PREDICATELOCKTARGETTAG
uint32 locktag_field1; /* a 32-bit ID field */ uint32 locktag_field1; /* a 32-bit ID field */
uint32 locktag_field2; /* a 32-bit ID field */ uint32 locktag_field2; /* a 32-bit ID field */
uint32 locktag_field3; /* a 32-bit ID field */ uint32 locktag_field3; /* a 32-bit ID field */
uint16 locktag_field4; /* a 16-bit ID field */ uint32 locktag_field4; /* a 32-bit ID field */
uint16 locktag_field5; /* a 16-bit ID field */ uint32 locktag_field5; /* a 32-bit ID field */
} PREDICATELOCKTARGETTAG; } PREDICATELOCKTARGETTAG;
/* /*
...@@ -260,12 +262,11 @@ typedef struct PREDICATELOCKTARGETTAG ...@@ -260,12 +262,11 @@ typedef struct PREDICATELOCKTARGETTAG
* already have one. An entry is removed when the last lock is removed from * already have one. An entry is removed when the last lock is removed from
* its list. * its list.
* *
* Because a check for predicate locks on a tuple target should also find * Because a particular target might become obsolete, due to update to a new
* locks on previous versions of the same row, if there are any created by * version, before the reading transaction is obsolete, we need some way to
* overlapping transactions, we keep a pointer to the target for the prior * prevent errors from reuse of a tuple ID. Rather than attempting to clean
* version of the row. We also keep a pointer to the next version of the * up the targets as the related tuples are pruned or vacuumed, we check the
* row, so that when we no longer have any predicate locks and the back * xmin on access. This should be far less costly.
* pointer is clear, we can clean up the prior pointer for the next version.
*/ */
typedef struct PREDICATELOCKTARGET PREDICATELOCKTARGET; typedef struct PREDICATELOCKTARGET PREDICATELOCKTARGET;
...@@ -277,15 +278,6 @@ struct PREDICATELOCKTARGET ...@@ -277,15 +278,6 @@ struct PREDICATELOCKTARGET
/* data */ /* data */
SHM_QUEUE predicateLocks; /* list of PREDICATELOCK objects assoc. with SHM_QUEUE predicateLocks; /* list of PREDICATELOCK objects assoc. with
* predicate lock target */ * predicate lock target */
/*
* The following two pointers are only used for tuple locks, and are only
* consulted for conflict detection and cleanup; not for granularity
* promotion.
*/
PREDICATELOCKTARGET *priorVersionOfRow; /* what other locks to check */
PREDICATELOCKTARGET *nextVersionOfRow; /* who has pointer here for
* more targets */
}; };
...@@ -387,30 +379,32 @@ typedef struct PredicateLockData ...@@ -387,30 +379,32 @@ typedef struct PredicateLockData
(locktag).locktag_field2 = (reloid), \ (locktag).locktag_field2 = (reloid), \
(locktag).locktag_field3 = InvalidBlockNumber, \ (locktag).locktag_field3 = InvalidBlockNumber, \
(locktag).locktag_field4 = InvalidOffsetNumber, \ (locktag).locktag_field4 = InvalidOffsetNumber, \
(locktag).locktag_field5 = 0) (locktag).locktag_field5 = InvalidTransactionId)
#define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \ #define SET_PREDICATELOCKTARGETTAG_PAGE(locktag,dboid,reloid,blocknum) \
((locktag).locktag_field1 = (dboid), \ ((locktag).locktag_field1 = (dboid), \
(locktag).locktag_field2 = (reloid), \ (locktag).locktag_field2 = (reloid), \
(locktag).locktag_field3 = (blocknum), \ (locktag).locktag_field3 = (blocknum), \
(locktag).locktag_field4 = InvalidOffsetNumber, \ (locktag).locktag_field4 = InvalidOffsetNumber, \
(locktag).locktag_field5 = 0) (locktag).locktag_field5 = InvalidTransactionId)
#define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum) \ #define SET_PREDICATELOCKTARGETTAG_TUPLE(locktag,dboid,reloid,blocknum,offnum,xmin) \
((locktag).locktag_field1 = (dboid), \ ((locktag).locktag_field1 = (dboid), \
(locktag).locktag_field2 = (reloid), \ (locktag).locktag_field2 = (reloid), \
(locktag).locktag_field3 = (blocknum), \ (locktag).locktag_field3 = (blocknum), \
(locktag).locktag_field4 = (offnum), \ (locktag).locktag_field4 = (offnum), \
(locktag).locktag_field5 = 0) (locktag).locktag_field5 = (xmin))
#define GET_PREDICATELOCKTARGETTAG_DB(locktag) \ #define GET_PREDICATELOCKTARGETTAG_DB(locktag) \
((locktag).locktag_field1) ((Oid) (locktag).locktag_field1)
#define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \ #define GET_PREDICATELOCKTARGETTAG_RELATION(locktag) \
((locktag).locktag_field2) ((Oid) (locktag).locktag_field2)
#define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \ #define GET_PREDICATELOCKTARGETTAG_PAGE(locktag) \
((locktag).locktag_field3) ((BlockNumber) (locktag).locktag_field3)
#define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \ #define GET_PREDICATELOCKTARGETTAG_OFFSET(locktag) \
((locktag).locktag_field4) ((OffsetNumber) (locktag).locktag_field4)
#define GET_PREDICATELOCKTARGETTAG_XMIN(locktag) \
((TransactionId) (locktag).locktag_field5)
#define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \ #define GET_PREDICATELOCKTARGETTAG_TYPE(locktag) \
(((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \ (((locktag).locktag_field4 != InvalidOffsetNumber) ? PREDLOCKTAG_TUPLE : \
(((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \ (((locktag).locktag_field3 != InvalidBlockNumber) ? PREDLOCKTAG_PAGE : \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment