Commit 04645bbc authored by Amit Kapila's avatar Amit Kapila

WAL log unchanged toasted replica identity key attributes.

Currently, during UPDATE, the unchanged replica identity key attributes
are not logged separately because they are getting logged as part of the
new tuple. But if they are stored externally then the untoasted values are
not getting logged as part of the new tuple and logical replication won't
be able to replicate such UPDATEs. So we need to log such attributes as
part of the old_key_tuple during UPDATE.

Reported-by: Haiying Tang
Author: Dilip Kumar and Amit Kapila
Reviewed-by: Alvaro Herrera, Haiying Tang, Andres Freund
Backpatch-through: 10
Discussion: https://postgr.es/m/OS0PR01MB611342D0A92D4F4BF26C0F47FB229@OS0PR01MB6113.jpnprd01.prod.outlook.com
parent c76665ed
...@@ -77,7 +77,7 @@ SELECT substr(data, 1, 200) FROM pg_logical_slot_get_changes('regression_slot', ...@@ -77,7 +77,7 @@ SELECT substr(data, 1, 200) FROM pg_logical_slot_get_changes('regression_slot',
table public.toasted_key: INSERT: id[integer]:1 toasted_key[text]:'1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123 table public.toasted_key: INSERT: id[integer]:1 toasted_key[text]:'1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123
COMMIT COMMIT
BEGIN BEGIN
table public.toasted_key: UPDATE: id[integer]:1 toasted_key[text]:unchanged-toast-datum toasted_col1[text]:unchanged-toast-datum toasted_col2[text]:'987654321098765432109876543210987654321098765432109 table public.toasted_key: UPDATE: old-key: toasted_key[text]:'123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678
COMMIT COMMIT
BEGIN BEGIN
table public.toasted_key: UPDATE: old-key: toasted_key[text]:'123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678 table public.toasted_key: UPDATE: old-key: toasted_key[text]:'123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678
......
...@@ -850,10 +850,11 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM ...@@ -850,10 +850,11 @@ WITH ( MODULUS <replaceable class="parameter">numeric_literal</replaceable>, REM
<listitem> <listitem>
<para> <para>
This form changes the information which is written to the write-ahead log This form changes the information which is written to the write-ahead log
to identify rows which are updated or deleted. This option has no effect to identify rows which are updated or deleted.
except when logical replication is in use. In most cases, the old value of each column is only logged if it differs
In all cases, no old values are logged unless at least one of the columns from the new value; however, if the old value is stored externally, it is
that would be logged differs between the old and new versions of the row. always logged regardless of whether it changed.
This option has no effect except when logical replication is in use.
<variablelist> <variablelist>
<varlistentry> <varlistentry>
<term><literal>DEFAULT</literal></term> <term><literal>DEFAULT</literal></term>
......
...@@ -78,9 +78,11 @@ static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, ...@@ -78,9 +78,11 @@ static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
Buffer newbuf, HeapTuple oldtup, Buffer newbuf, HeapTuple oldtup,
HeapTuple newtup, HeapTuple old_key_tuple, HeapTuple newtup, HeapTuple old_key_tuple,
bool all_visible_cleared, bool new_all_visible_cleared); bool all_visible_cleared, bool new_all_visible_cleared);
static Bitmapset *HeapDetermineModifiedColumns(Relation relation, static Bitmapset *HeapDetermineColumnsInfo(Relation relation,
Bitmapset *interesting_cols, Bitmapset *interesting_cols,
HeapTuple oldtup, HeapTuple newtup); Bitmapset *external_cols,
HeapTuple oldtup, HeapTuple newtup,
bool *has_external);
static bool heap_acquire_tuplock(Relation relation, ItemPointer tid, static bool heap_acquire_tuplock(Relation relation, ItemPointer tid,
LockTupleMode mode, LockWaitPolicy wait_policy, LockTupleMode mode, LockWaitPolicy wait_policy,
bool *have_tuple_lock); bool *have_tuple_lock);
...@@ -106,7 +108,7 @@ static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status ...@@ -106,7 +108,7 @@ static bool ConditionalMultiXactIdWait(MultiXactId multi, MultiXactStatus status
static void index_delete_sort(TM_IndexDeleteOp *delstate); static void index_delete_sort(TM_IndexDeleteOp *delstate);
static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate); static int bottomup_sort_and_shrink(TM_IndexDeleteOp *delstate);
static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup); static XLogRecPtr log_heap_new_cid(Relation relation, HeapTuple tup);
static HeapTuple ExtractReplicaIdentity(Relation rel, HeapTuple tup, bool key_changed, static HeapTuple ExtractReplicaIdentity(Relation rel, HeapTuple tup, bool key_required,
bool *copy); bool *copy);
...@@ -3185,6 +3187,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, ...@@ -3185,6 +3187,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
bool all_visible_cleared_new = false; bool all_visible_cleared_new = false;
bool checked_lockers; bool checked_lockers;
bool locker_remains; bool locker_remains;
bool id_has_external = false;
TransactionId xmax_new_tuple, TransactionId xmax_new_tuple,
xmax_old_tuple; xmax_old_tuple;
uint16 infomask_old_tuple, uint16 infomask_old_tuple,
...@@ -3269,7 +3272,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, ...@@ -3269,7 +3272,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
Assert(ItemIdIsNormal(lp)); Assert(ItemIdIsNormal(lp));
/* /*
* Fill in enough data in oldtup for HeapDetermineModifiedColumns to work * Fill in enough data in oldtup for HeapDetermineColumnsInfo to work
* properly. * properly.
*/ */
oldtup.t_tableOid = RelationGetRelid(relation); oldtup.t_tableOid = RelationGetRelid(relation);
...@@ -3280,9 +3283,17 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, ...@@ -3280,9 +3283,17 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
/* the new tuple is ready, except for this: */ /* the new tuple is ready, except for this: */
newtup->t_tableOid = RelationGetRelid(relation); newtup->t_tableOid = RelationGetRelid(relation);
/* Determine columns modified by the update. */ /*
modified_attrs = HeapDetermineModifiedColumns(relation, interesting_attrs, * Determine columns modified by the update. Additionally, identify
&oldtup, newtup); * whether any of the unmodified replica identity key attributes in the
* old tuple is externally stored or not. This is required because for
* such attributes the flattened value won't be WAL logged as part of the
* new tuple so we must include it as part of the old_key_tuple. See
* ExtractReplicaIdentity.
*/
modified_attrs = HeapDetermineColumnsInfo(relation, interesting_attrs,
id_attrs, &oldtup,
newtup, &id_has_external);
/* /*
* If we're not updating any "key" column, we can grab a weaker lock type. * If we're not updating any "key" column, we can grab a weaker lock type.
...@@ -3883,10 +3894,12 @@ l2: ...@@ -3883,10 +3894,12 @@ l2:
* Compute replica identity tuple before entering the critical section so * Compute replica identity tuple before entering the critical section so
* we don't PANIC upon a memory allocation failure. * we don't PANIC upon a memory allocation failure.
* ExtractReplicaIdentity() will return NULL if nothing needs to be * ExtractReplicaIdentity() will return NULL if nothing needs to be
* logged. * logged. Pass old key required as true only if the replica identity key
* columns are modified or it has external data.
*/ */
old_key_tuple = ExtractReplicaIdentity(relation, &oldtup, old_key_tuple = ExtractReplicaIdentity(relation, &oldtup,
bms_overlap(modified_attrs, id_attrs), bms_overlap(modified_attrs, id_attrs) ||
id_has_external,
&old_key_copied); &old_key_copied);
/* NO EREPORT(ERROR) from here till changes are logged */ /* NO EREPORT(ERROR) from here till changes are logged */
...@@ -4042,47 +4055,15 @@ l2: ...@@ -4042,47 +4055,15 @@ l2:
} }
/* /*
* Check if the specified attribute's value is same in both given tuples. * Check if the specified attribute's values are the same. Subroutine for
* Subroutine for HeapDetermineModifiedColumns. * HeapDetermineColumnsInfo.
*/ */
static bool static bool
heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum, heap_attr_equals(TupleDesc tupdesc, int attrnum, Datum value1, Datum value2,
HeapTuple tup1, HeapTuple tup2) bool isnull1, bool isnull2)
{ {
Datum value1,
value2;
bool isnull1,
isnull2;
Form_pg_attribute att; Form_pg_attribute att;
/*
* If it's a whole-tuple reference, say "not equal". It's not really
* worth supporting this case, since it could only succeed after a no-op
* update, which is hardly a case worth optimizing for.
*/
if (attrnum == 0)
return false;
/*
* Likewise, automatically say "not equal" for any system attribute other
* than tableOID; we cannot expect these to be consistent in a HOT chain,
* or even to be set correctly yet in the new tuple.
*/
if (attrnum < 0)
{
if (attrnum != TableOidAttributeNumber)
return false;
}
/*
* Extract the corresponding values. XXX this is pretty inefficient if
* there are many indexed columns. Should HeapDetermineModifiedColumns do
* a single heap_deform_tuple call on each tuple, instead? But that
* doesn't work for system columns ...
*/
value1 = heap_getattr(tup1, attrnum, tupdesc, &isnull1);
value2 = heap_getattr(tup2, attrnum, tupdesc, &isnull2);
/* /*
* If one value is NULL and other is not, then they are certainly not * If one value is NULL and other is not, then they are certainly not
* equal * equal
...@@ -4124,24 +4105,96 @@ heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum, ...@@ -4124,24 +4105,96 @@ heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum,
* Given an updated tuple, determine (and return into the output bitmapset), * Given an updated tuple, determine (and return into the output bitmapset),
* from those listed as interesting, the set of columns that changed. * from those listed as interesting, the set of columns that changed.
* *
* The input bitmapset is destructively modified; that is OK since this is * has_external indicates if any of the unmodified attributes (from those
* invoked at most once in heap_update. * listed as interesting) of the old tuple is a member of external_cols and is
* stored externally.
*
* The input interesting_cols bitmapset is destructively modified; that is OK
* since this is invoked at most once in heap_update.
*/ */
static Bitmapset * static Bitmapset *
HeapDetermineModifiedColumns(Relation relation, Bitmapset *interesting_cols, HeapDetermineColumnsInfo(Relation relation,
HeapTuple oldtup, HeapTuple newtup) Bitmapset *interesting_cols,
Bitmapset *external_cols,
HeapTuple oldtup, HeapTuple newtup,
bool *has_external)
{ {
int attnum; int attrnum;
Bitmapset *modified = NULL; Bitmapset *modified = NULL;
TupleDesc tupdesc = RelationGetDescr(relation);
while ((attnum = bms_first_member(interesting_cols)) >= 0) while ((attrnum = bms_first_member(interesting_cols)) >= 0)
{ {
attnum += FirstLowInvalidHeapAttributeNumber; Datum value1,
value2;
bool isnull1,
isnull2;
attrnum += FirstLowInvalidHeapAttributeNumber;
if (!heap_tuple_attr_equals(RelationGetDescr(relation), /*
attnum, oldtup, newtup)) * If it's a whole-tuple reference, say "not equal". It's not really
* worth supporting this case, since it could only succeed after a
* no-op update, which is hardly a case worth optimizing for.
*/
if (attrnum == 0)
{
modified = bms_add_member(modified,
attrnum -
FirstLowInvalidHeapAttributeNumber);
continue;
}
/*
* Likewise, automatically say "not equal" for any system attribute
* other than tableOID; we cannot expect these to be consistent in a
* HOT chain, or even to be set correctly yet in the new tuple.
*/
if (attrnum < 0)
{
if (attrnum != TableOidAttributeNumber)
{
modified = bms_add_member(modified, modified = bms_add_member(modified,
attnum - FirstLowInvalidHeapAttributeNumber); attrnum -
FirstLowInvalidHeapAttributeNumber);
continue;
}
}
/*
* Extract the corresponding values. XXX this is pretty inefficient
* if there are many indexed columns. Should we do a single
* heap_deform_tuple call on each tuple, instead? But that doesn't
* work for system columns ...
*/
value1 = heap_getattr(oldtup, attrnum, tupdesc, &isnull1);
value2 = heap_getattr(newtup, attrnum, tupdesc, &isnull2);
if (!heap_attr_equals(tupdesc, attrnum, value1,
value2, isnull1, isnull2))
{
modified = bms_add_member(modified,
attrnum -
FirstLowInvalidHeapAttributeNumber);
continue;
}
/*
* No need to check attributes that can't be stored externally. Note
* that system attributes can't be stored externally.
*/
if (attrnum < 0 || isnull1 ||
TupleDescAttr(tupdesc, attrnum - 1)->attlen != -1)
continue;
/*
* Check if the old tuple's attribute is stored externally and is a
* member of external_cols.
*/
if (VARATT_IS_EXTERNAL((struct varlena *) DatumGetPointer(value1)) &&
bms_is_member(attrnum - FirstLowInvalidHeapAttributeNumber,
external_cols))
*has_external = true;
} }
return modified; return modified;
...@@ -8306,14 +8359,14 @@ log_heap_new_cid(Relation relation, HeapTuple tup) ...@@ -8306,14 +8359,14 @@ log_heap_new_cid(Relation relation, HeapTuple tup)
* Returns NULL if there's no need to log an identity or if there's no suitable * Returns NULL if there's no need to log an identity or if there's no suitable
* key defined. * key defined.
* *
* key_changed should be false if caller knows that no replica identity * Pass key_required true if any replica identity columns changed value, or if
* columns changed value. It's always true in the DELETE case. * any of them have any external data. Delete must always pass true.
* *
* *copy is set to true if the returned tuple is a modified copy rather than * *copy is set to true if the returned tuple is a modified copy rather than
* the same tuple that was passed in. * the same tuple that was passed in.
*/ */
static HeapTuple static HeapTuple
ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_changed, ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_required,
bool *copy) bool *copy)
{ {
TupleDesc desc = RelationGetDescr(relation); TupleDesc desc = RelationGetDescr(relation);
...@@ -8345,8 +8398,8 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_changed, ...@@ -8345,8 +8398,8 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_changed,
return tp; return tp;
} }
/* if the key hasn't changed and we're only logging the key, we're done */ /* if the key isn't required and we're only logging the key, we're done */
if (!key_changed) if (!key_required)
return NULL; return NULL;
/* find out the replica identity columns */ /* find out the replica identity columns */
...@@ -8354,10 +8407,10 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_changed, ...@@ -8354,10 +8407,10 @@ ExtractReplicaIdentity(Relation relation, HeapTuple tp, bool key_changed,
INDEX_ATTR_BITMAP_IDENTITY_KEY); INDEX_ATTR_BITMAP_IDENTITY_KEY);
/* /*
* If there's no defined replica identity columns, treat as !key_changed. * If there's no defined replica identity columns, treat as !key_required.
* (This case should not be reachable from heap_update, since that should * (This case should not be reachable from heap_update, since that should
* calculate key_changed accurately. But heap_delete just passes constant * calculate key_required accurately. But heap_delete just passes
* true for key_changed, so we can hit this case in deletes.) * constant true for key_required, so we can hit this case in deletes.)
*/ */
if (bms_is_empty(idattrs)) if (bms_is_empty(idattrs))
return NULL; return NULL;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment