Commit 7db0cd21 authored by Tomas Vondra's avatar Tomas Vondra

Set PD_ALL_VISIBLE and visibility map bits in COPY FREEZE

Make sure COPY FREEZE marks the pages as PD_ALL_VISIBLE and updates the
visibility map. Until now we only marked individual tuples as frozen,
but page-level flags were not updated, so the first VACUUM after the
COPY FREEZE had to rewrite the whole table.

This is a fairly old patch, and multiple people worked on it. The first
version was written by Jeff Janes, and then reworked by Pavan Deolasee
and Anastasia Lubennikova.

Author: Anastasia Lubennikova, Pavan Deolasee, Jeff Janes
Reviewed-by: Kuntal Ghosh, Jeff Janes, Tomas Vondra, Masahiko Sawada,
             Andres Freund, Ibrar Ahmed, Robert Haas, Tatsuro Ishii,
             Darafei Praliaskouski
Discussion: https://postgr.es/m/CABOikdN-ptGv0mZntrK2Q8OtfUuAjqaYMGmkdU1dCKFtUxVLrg@mail.gmail.com
Discussion: https://postgr.es/m/CAMkU%3D1w3osJJ2FneELhhNRLxfZitDgp9FPHee08NT2FQFmz_pQ%40mail.gmail.com
parent 0c7d3bb9
...@@ -179,6 +179,69 @@ select pg_truncate_visibility_map('test_partition'); ...@@ -179,6 +179,69 @@ select pg_truncate_visibility_map('test_partition');
(1 row) (1 row)
-- test copy freeze
create table copyfreeze (a int, b char(1500));
-- load all rows via COPY FREEZE and ensure that all pages are set all-visible
-- and all-frozen.
begin;
truncate copyfreeze;
copy copyfreeze from stdin freeze;
commit;
select * from pg_visibility_map('copyfreeze');
blkno | all_visible | all_frozen
-------+-------------+------------
0 | t | t
1 | t | t
2 | t | t
(3 rows)
select * from pg_check_frozen('copyfreeze');
t_ctid
--------
(0 rows)
-- load half the rows via regular COPY and rest via COPY FREEZE. The pages
-- which are touched by regular COPY must not be set all-visible/all-frozen. On
-- the other hand, pages allocated by COPY FREEZE should be marked
-- all-frozen/all-visible.
begin;
truncate copyfreeze;
copy copyfreeze from stdin;
copy copyfreeze from stdin freeze;
commit;
select * from pg_visibility_map('copyfreeze');
blkno | all_visible | all_frozen
-------+-------------+------------
0 | f | f
1 | f | f
2 | t | t
(3 rows)
select * from pg_check_frozen('copyfreeze');
t_ctid
--------
(0 rows)
-- Try a mix of regular COPY and COPY FREEZE.
begin;
truncate copyfreeze;
copy copyfreeze from stdin freeze;
copy copyfreeze from stdin;
copy copyfreeze from stdin freeze;
commit;
select * from pg_visibility_map('copyfreeze');
blkno | all_visible | all_frozen
-------+-------------+------------
0 | t | t
1 | f | f
2 | t | t
(3 rows)
select * from pg_check_frozen('copyfreeze');
t_ctid
--------
(0 rows)
-- cleanup -- cleanup
drop table test_partitioned; drop table test_partitioned;
drop view test_view; drop view test_view;
...@@ -188,3 +251,4 @@ drop server dummy_server; ...@@ -188,3 +251,4 @@ drop server dummy_server;
drop foreign data wrapper dummy; drop foreign data wrapper dummy;
drop materialized view matview_visibility_test; drop materialized view matview_visibility_test;
drop table regular_table; drop table regular_table;
drop table copyfreeze;
...@@ -94,6 +94,82 @@ select count(*) > 0 from pg_visibility_map_summary('test_partition'); ...@@ -94,6 +94,82 @@ select count(*) > 0 from pg_visibility_map_summary('test_partition');
select * from pg_check_frozen('test_partition'); -- hopefully none select * from pg_check_frozen('test_partition'); -- hopefully none
select pg_truncate_visibility_map('test_partition'); select pg_truncate_visibility_map('test_partition');
-- test copy freeze
create table copyfreeze (a int, b char(1500));
-- load all rows via COPY FREEZE and ensure that all pages are set all-visible
-- and all-frozen.
begin;
truncate copyfreeze;
copy copyfreeze from stdin freeze;
1 '1'
2 '2'
3 '3'
4 '4'
5 '5'
6 '6'
7 '7'
8 '8'
9 '9'
10 '10'
11 '11'
12 '12'
\.
commit;
select * from pg_visibility_map('copyfreeze');
select * from pg_check_frozen('copyfreeze');
-- load half the rows via regular COPY and rest via COPY FREEZE. The pages
-- which are touched by regular COPY must not be set all-visible/all-frozen. On
-- the other hand, pages allocated by COPY FREEZE should be marked
-- all-frozen/all-visible.
begin;
truncate copyfreeze;
copy copyfreeze from stdin;
1 '1'
2 '2'
3 '3'
4 '4'
5 '5'
6 '6'
\.
copy copyfreeze from stdin freeze;
7 '7'
8 '8'
9 '9'
10 '10'
11 '11'
12 '12'
\.
commit;
select * from pg_visibility_map('copyfreeze');
select * from pg_check_frozen('copyfreeze');
-- Try a mix of regular COPY and COPY FREEZE.
begin;
truncate copyfreeze;
copy copyfreeze from stdin freeze;
1 '1'
2 '2'
3 '3'
4 '4'
5 '5'
\.
copy copyfreeze from stdin;
6 '6'
\.
copy copyfreeze from stdin freeze;
7 '7'
8 '8'
9 '9'
10 '10'
11 '11'
12 '12'
\.
commit;
select * from pg_visibility_map('copyfreeze');
select * from pg_check_frozen('copyfreeze');
-- cleanup -- cleanup
drop table test_partitioned; drop table test_partitioned;
drop view test_view; drop view test_view;
...@@ -103,3 +179,4 @@ drop server dummy_server; ...@@ -103,3 +179,4 @@ drop server dummy_server;
drop foreign data wrapper dummy; drop foreign data wrapper dummy;
drop materialized view matview_visibility_test; drop materialized view matview_visibility_test;
drop table regular_table; drop table regular_table;
drop table copyfreeze;
...@@ -2121,6 +2121,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, ...@@ -2121,6 +2121,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
int ndone; int ndone;
PGAlignedBlock scratch; PGAlignedBlock scratch;
Page page; Page page;
Buffer vmbuffer = InvalidBuffer;
bool needwal; bool needwal;
Size saveFreeSpace; Size saveFreeSpace;
bool need_tuple_data = RelationIsLogicallyLogged(relation); bool need_tuple_data = RelationIsLogicallyLogged(relation);
...@@ -2175,8 +2176,9 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, ...@@ -2175,8 +2176,9 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
while (ndone < ntuples) while (ndone < ntuples)
{ {
Buffer buffer; Buffer buffer;
Buffer vmbuffer = InvalidBuffer; bool starting_with_empty_page;
bool all_visible_cleared = false; bool all_visible_cleared = false;
bool all_frozen_set = false;
int nthispage; int nthispage;
CHECK_FOR_INTERRUPTS(); CHECK_FOR_INTERRUPTS();
...@@ -2184,12 +2186,20 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, ...@@ -2184,12 +2186,20 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
/* /*
* Find buffer where at least the next tuple will fit. If the page is * Find buffer where at least the next tuple will fit. If the page is
* all-visible, this will also pin the requisite visibility map page. * all-visible, this will also pin the requisite visibility map page.
*
* Also pin visibility map page if COPY FREEZE inserts tuples into an
* empty page. See all_frozen_set below.
*/ */
buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len, buffer = RelationGetBufferForTuple(relation, heaptuples[ndone]->t_len,
InvalidBuffer, options, bistate, InvalidBuffer, options, bistate,
&vmbuffer, NULL); &vmbuffer, NULL);
page = BufferGetPage(buffer); page = BufferGetPage(buffer);
starting_with_empty_page = PageGetMaxOffsetNumber(page) == 0;
if (starting_with_empty_page && (options & HEAP_INSERT_FROZEN))
all_frozen_set = true;
/* NO EREPORT(ERROR) from here till changes are logged */ /* NO EREPORT(ERROR) from here till changes are logged */
START_CRIT_SECTION(); START_CRIT_SECTION();
...@@ -2223,7 +2233,14 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, ...@@ -2223,7 +2233,14 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
log_heap_new_cid(relation, heaptup); log_heap_new_cid(relation, heaptup);
} }
if (PageIsAllVisible(page)) /*
* If the page is all visible, need to clear that, unless we're only
* going to add further frozen rows to it.
*
* If we're only adding already frozen rows to a previously empty
* page, mark it as all-visible.
*/
if (PageIsAllVisible(page) && !(options & HEAP_INSERT_FROZEN))
{ {
all_visible_cleared = true; all_visible_cleared = true;
PageClearAllVisible(page); PageClearAllVisible(page);
...@@ -2231,6 +2248,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, ...@@ -2231,6 +2248,8 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
BufferGetBlockNumber(buffer), BufferGetBlockNumber(buffer),
vmbuffer, VISIBILITYMAP_VALID_BITS); vmbuffer, VISIBILITYMAP_VALID_BITS);
} }
else if (all_frozen_set)
PageSetAllVisible(page);
/* /*
* XXX Should we set PageSetPrunable on this page ? See heap_insert() * XXX Should we set PageSetPrunable on this page ? See heap_insert()
...@@ -2254,8 +2273,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, ...@@ -2254,8 +2273,7 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
* If the page was previously empty, we can reinit the page * If the page was previously empty, we can reinit the page
* instead of restoring the whole thing. * instead of restoring the whole thing.
*/ */
init = (ItemPointerGetOffsetNumber(&(heaptuples[ndone]->t_self)) == FirstOffsetNumber && init = starting_with_empty_page;
PageGetMaxOffsetNumber(page) == FirstOffsetNumber + nthispage - 1);
/* allocate xl_heap_multi_insert struct from the scratch area */ /* allocate xl_heap_multi_insert struct from the scratch area */
xlrec = (xl_heap_multi_insert *) scratchptr; xlrec = (xl_heap_multi_insert *) scratchptr;
...@@ -2273,7 +2291,15 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, ...@@ -2273,7 +2291,15 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
/* the rest of the scratch space is used for tuple data */ /* the rest of the scratch space is used for tuple data */
tupledata = scratchptr; tupledata = scratchptr;
xlrec->flags = all_visible_cleared ? XLH_INSERT_ALL_VISIBLE_CLEARED : 0; /* check that the mutually exclusive flags are not both set */
Assert (!(all_visible_cleared && all_frozen_set));
xlrec->flags = 0;
if (all_visible_cleared)
xlrec->flags = XLH_INSERT_ALL_VISIBLE_CLEARED;
if (all_frozen_set)
xlrec->flags = XLH_INSERT_ALL_FROZEN_SET;
xlrec->ntuples = nthispage; xlrec->ntuples = nthispage;
/* /*
...@@ -2347,13 +2373,40 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples, ...@@ -2347,13 +2373,40 @@ heap_multi_insert(Relation relation, TupleTableSlot **slots, int ntuples,
END_CRIT_SECTION(); END_CRIT_SECTION();
UnlockReleaseBuffer(buffer); /*
if (vmbuffer != InvalidBuffer) * If we've frozen everything on the page, update the visibilitymap.
ReleaseBuffer(vmbuffer); * We're already holding pin on the vmbuffer.
*/
if (all_frozen_set)
{
Assert(PageIsAllVisible(page));
Assert(visibilitymap_pin_ok(BufferGetBlockNumber(buffer), vmbuffer));
/*
* It's fine to use InvalidTransactionId here - this is only used
* when HEAP_INSERT_FROZEN is specified, which intentionally
* violates visibility rules.
*/
visibilitymap_set(relation, BufferGetBlockNumber(buffer), buffer,
InvalidXLogRecPtr, vmbuffer,
InvalidTransactionId,
VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN);
}
UnlockReleaseBuffer(buffer);
ndone += nthispage; ndone += nthispage;
/*
* NB: Only release vmbuffer after inserting all tuples - it's fairly
* likely that we'll insert into subsequent heap pages that are likely
* to use the same vm page.
*/
} }
/* We're done with inserting all tuples, so release the last vmbuffer. */
if (vmbuffer != InvalidBuffer)
ReleaseBuffer(vmbuffer);
/* /*
* We're done with the actual inserts. Check for conflicts again, to * We're done with the actual inserts. Check for conflicts again, to
* ensure that all rw-conflicts in to these inserts are detected. Without * ensure that all rw-conflicts in to these inserts are detected. Without
...@@ -8725,6 +8778,10 @@ heap_xlog_insert(XLogReaderState *record) ...@@ -8725,6 +8778,10 @@ heap_xlog_insert(XLogReaderState *record)
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
PageClearAllVisible(page); PageClearAllVisible(page);
/* XLH_INSERT_ALL_FROZEN_SET implies that all tuples are visible */
if (xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)
PageSetAllVisible(page);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
} }
if (BufferIsValid(buffer)) if (BufferIsValid(buffer))
...@@ -8775,6 +8832,10 @@ heap_xlog_multi_insert(XLogReaderState *record) ...@@ -8775,6 +8832,10 @@ heap_xlog_multi_insert(XLogReaderState *record)
XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno); XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
/* check that the mutually exclusive flags are not both set */
Assert (!((xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) &&
(xlrec->flags & XLH_INSERT_ALL_FROZEN_SET)));
/* /*
* The visibility map may need to be fixed even if the heap page is * The visibility map may need to be fixed even if the heap page is
* already up-to-date. * already up-to-date.
......
...@@ -433,6 +433,14 @@ loop: ...@@ -433,6 +433,14 @@ loop:
buffer = ReadBufferBI(relation, targetBlock, RBM_NORMAL, bistate); buffer = ReadBufferBI(relation, targetBlock, RBM_NORMAL, bistate);
if (PageIsAllVisible(BufferGetPage(buffer))) if (PageIsAllVisible(BufferGetPage(buffer)))
visibilitymap_pin(relation, targetBlock, vmbuffer); visibilitymap_pin(relation, targetBlock, vmbuffer);
/*
* If the page is empty, pin vmbuffer to set all_frozen bit later.
*/
if ((options & HEAP_INSERT_FROZEN) &&
(PageGetMaxOffsetNumber(BufferGetPage(buffer)) == 0))
visibilitymap_pin(relation, targetBlock, vmbuffer);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
} }
else if (otherBlock == targetBlock) else if (otherBlock == targetBlock)
...@@ -619,6 +627,15 @@ loop: ...@@ -619,6 +627,15 @@ loop:
PageInit(page, BufferGetPageSize(buffer), 0); PageInit(page, BufferGetPageSize(buffer), 0);
MarkBufferDirty(buffer); MarkBufferDirty(buffer);
/*
* The page is empty, pin vmbuffer to set all_frozen bit.
*/
if (options & HEAP_INSERT_FROZEN)
{
Assert(PageGetMaxOffsetNumber(BufferGetPage(buffer)) == 0);
visibilitymap_pin(relation, BufferGetBlockNumber(buffer), vmbuffer);
}
/* /*
* Release the file-extension lock; it's now OK for someone else to extend * Release the file-extension lock; it's now OK for someone else to extend
* the relation some more. * the relation some more.
......
...@@ -69,6 +69,9 @@ ...@@ -69,6 +69,9 @@
#define XLH_INSERT_CONTAINS_NEW_TUPLE (1<<3) #define XLH_INSERT_CONTAINS_NEW_TUPLE (1<<3)
#define XLH_INSERT_ON_TOAST_RELATION (1<<4) #define XLH_INSERT_ON_TOAST_RELATION (1<<4)
/* all_frozen_set always implies all_visible_set */
#define XLH_INSERT_ALL_FROZEN_SET (1<<5)
/* /*
* xl_heap_update flag values, 8 bits are available. * xl_heap_update flag values, 8 bits are available.
*/ */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment