Commit 9727c583 authored by Tom Lane's avatar Tom Lane

Restructure CLUSTER/newstyle VACUUM FULL/ALTER TABLE support so that swapping

of old and new toast tables can be done either at the logical level (by
swapping the heaps' reltoastrelid links) or at the physical level (by swapping
the relfilenodes of the toast tables and their indexes).  This is necessary
infrastructure for upcoming changes to support CLUSTER/VAC FULL on shared
system catalogs, where we cannot change reltoastrelid.  The physical swap
saves a few catalog updates too.

We unfortunately have to keep the logical-level swap logic because in some
cases we will be adding or deleting a toast table, so there's no possibility
of a physical swap.  However, that only happens as a consequence of schema
changes in the table, which we do not need to support for system catalogs,
so such cases aren't an obstacle for that.

In passing, refactor the cluster support functions a little bit to eliminate
unnecessarily-duplicated code; and fix the problem that while CLUSTER had
been taught to rename the final toast table at need, ALTER TABLE had not.
parent fdac8cf9
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.96 2010/01/02 16:57:35 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.97 2010/02/04 00:09:13 tgl Exp $
*
*
* INTERFACE ROUTINES
......@@ -1185,10 +1185,25 @@ toast_save_datum(Relation rel, Datum value, int options)
toast_pointer.va_extsize = data_todo;
}
/*
* Insert the correct table OID into the result TOAST pointer.
*
* Normally this is the actual OID of the target toast table, but during
* table-rewriting operations such as CLUSTER, we have to insert the OID
* of the table's real permanent toast table instead. rd_toastoid is
* set if we have to substitute such an OID.
*/
if (OidIsValid(rel->rd_toastoid))
toast_pointer.va_toastrelid = rel->rd_toastoid;
else
toast_pointer.va_toastrelid = RelationGetRelid(toastrel);
/*
* Choose an unused OID within the toast table for this toast value.
*/
toast_pointer.va_valueid = GetNewOidWithIndex(toastrel,
RelationGetRelid(toastidx),
(AttrNumber) 1);
toast_pointer.va_toastrelid = rel->rd_rel->reltoastrelid;
/*
* Initialize constant parts of the tuple data
......
......@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.196 2010/02/02 19:12:29 tgl Exp $
* $PostgreSQL: pgsql/src/backend/commands/cluster.c,v 1.197 2010/02/04 00:09:14 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -63,8 +63,9 @@ typedef struct
static void rebuild_relation(Relation OldHeap, Oid indexOid,
int freeze_min_age, int freeze_table_age);
static TransactionId copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap,
Oid OIDOldIndex, int freeze_min_age, int freeze_table_age);
static void copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
int freeze_min_age, int freeze_table_age,
bool *pSwapToastByContent, TransactionId *pFreezeXid);
static List *get_tables_to_cluster(MemoryContext cluster_context);
......@@ -584,10 +585,8 @@ rebuild_relation(Relation OldHeap, Oid indexOid,
Oid tableOid = RelationGetRelid(OldHeap);
Oid tableSpace = OldHeap->rd_rel->reltablespace;
Oid OIDNewHeap;
char NewHeapName[NAMEDATALEN];
bool swap_toast_by_content;
TransactionId frozenXid;
ObjectAddress object;
Relation newrel;
/* Mark the correct index as clustered */
if (OidIsValid(indexOid))
......@@ -596,98 +595,39 @@ rebuild_relation(Relation OldHeap, Oid indexOid,
/* Close relcache entry, but keep lock until transaction commit */
heap_close(OldHeap, NoLock);
/*
* Create the new heap, using a temporary name in the same namespace as
* the existing table. NOTE: there is some risk of collision with user
* relnames. Working around this seems more trouble than it's worth; in
* particular, we can't create the new heap in a different namespace from
* the old, or we will have problems with the TEMP status of temp tables.
*/
snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", tableOid);
OIDNewHeap = make_new_heap(tableOid, NewHeapName, tableSpace);
/*
* We don't need CommandCounterIncrement() because make_new_heap did it.
*/
/*
* Copy the heap data into the new table in the desired order.
*/
frozenXid = copy_heap_data(OIDNewHeap, tableOid, indexOid,
freeze_min_age, freeze_table_age);
/* To make the new heap's data visible (probably not needed?). */
CommandCounterIncrement();
/* Swap the physical files of the old and new heaps. */
swap_relation_files(tableOid, OIDNewHeap, frozenXid);
CommandCounterIncrement();
/* Destroy new heap with old filenode */
object.classId = RelationRelationId;
object.objectId = OIDNewHeap;
object.objectSubId = 0;
/*
* The new relation is local to our transaction and we know nothing
* depends on it, so DROP_RESTRICT should be OK.
*/
performDeletion(&object, DROP_RESTRICT);
/* Create the transient table that will receive the re-ordered data */
OIDNewHeap = make_new_heap(tableOid, tableSpace);
/* performDeletion does CommandCounterIncrement at end */
/* Copy the heap data into the new table in the desired order */
copy_heap_data(OIDNewHeap, tableOid, indexOid,
freeze_min_age, freeze_table_age,
&swap_toast_by_content, &frozenXid);
/*
* Rebuild each index on the relation (but not the toast table, which is
* all-new at this point). We do not need CommandCounterIncrement()
* because reindex_relation does it.
*/
reindex_relation(tableOid, false);
/* Swap the physical files of the old and new heaps */
swap_relation_files(tableOid, OIDNewHeap,
swap_toast_by_content, frozenXid);
/*
* At this point, everything is kosher except that the toast table's name
* corresponds to the temporary table. The name is irrelevant to the
* backend because it's referenced by OID, but users looking at the
* catalogs could be confused. Rename it to prevent this problem.
*
* Note no lock required on the relation, because we already hold an
* exclusive lock on it.
*/
newrel = heap_open(tableOid, NoLock);
if (OidIsValid(newrel->rd_rel->reltoastrelid))
{
Relation toastrel;
Oid toastidx;
Oid toastnamespace;
char NewToastName[NAMEDATALEN];
toastrel = relation_open(newrel->rd_rel->reltoastrelid, AccessShareLock);
toastidx = toastrel->rd_rel->reltoastidxid;
toastnamespace = toastrel->rd_rel->relnamespace;
relation_close(toastrel, AccessShareLock);
/* rename the toast table ... */
snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u", tableOid);
RenameRelationInternal(newrel->rd_rel->reltoastrelid, NewToastName,
toastnamespace);
/* ... and its index too */
snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index", tableOid);
RenameRelationInternal(toastidx, NewToastName,
toastnamespace);
}
relation_close(newrel, NoLock);
/* Destroy the new heap, removing the old data along with it */
cleanup_heap_swap(tableOid, OIDNewHeap, swap_toast_by_content);
}
/*
* Create the new table that we will fill with correctly-ordered data.
* Create the transient table that will be filled with new data during
* CLUSTER, ALTER TABLE, and similar operations. The transient table
* duplicates the logical structure of the OldHeap, but is placed in
* NewTableSpace which might be different from OldHeap's.
*
* After this, the caller should load the new heap with transferred/modified
* data, then call swap_relation_files, and finally call cleanup_heap_swap to
* remove the debris.
*/
Oid
make_new_heap(Oid OIDOldHeap, const char *NewName, Oid NewTableSpace)
make_new_heap(Oid OIDOldHeap, Oid NewTableSpace)
{
TupleDesc OldHeapDesc,
tupdesc;
char NewHeapName[NAMEDATALEN];
Oid OIDNewHeap;
Oid toastid;
Relation OldHeap;
......@@ -708,7 +648,7 @@ make_new_heap(Oid OIDOldHeap, const char *NewName, Oid NewTableSpace)
tupdesc = CreateTupleDescCopy(OldHeapDesc);
/*
* Use options of the old heap for new heap.
* But we do want to use reloptions of the old heap for new heap.
*/
tuple = SearchSysCache(RELOID,
ObjectIdGetDatum(OIDOldHeap),
......@@ -720,7 +660,16 @@ make_new_heap(Oid OIDOldHeap, const char *NewName, Oid NewTableSpace)
if (isNull)
reloptions = (Datum) 0;
OIDNewHeap = heap_create_with_catalog(NewName,
/*
* Create the new heap, using a temporary name in the same namespace as
* the existing table. NOTE: there is some risk of collision with user
* relnames. Working around this seems more trouble than it's worth; in
* particular, we can't create the new heap in a different namespace from
* the old, or we will have problems with the TEMP status of temp tables.
*/
snprintf(NewHeapName, sizeof(NewHeapName), "pg_temp_%u", OIDOldHeap);
OIDNewHeap = heap_create_with_catalog(NewHeapName,
RelationGetNamespace(OldHeap),
NewTableSpace,
InvalidOid,
......@@ -776,12 +725,16 @@ make_new_heap(Oid OIDOldHeap, const char *NewName, Oid NewTableSpace)
}
/*
* Do the physical copying of heap data. Returns the TransactionId used as
* freeze cutoff point for the tuples.
* Do the physical copying of heap data.
*
* There are two output parameters:
* *pSwapToastByContent is set true if toast tables must be swapped by content.
* *pFreezeXid receives the TransactionId used as freeze cutoff point.
*/
static TransactionId
static void
copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
int freeze_min_age, int freeze_table_age)
int freeze_min_age, int freeze_table_age,
bool *pSwapToastByContent, TransactionId *pFreezeXid)
{
Relation NewHeap,
OldHeap,
......@@ -842,13 +795,41 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
/* use_wal off requires rd_targblock be initially invalid */
Assert(NewHeap->rd_targblock == InvalidBlockNumber);
/*
* If both tables have TOAST tables, perform toast swap by content. It is
* possible that the old table has a toast table but the new one doesn't,
* if toastable columns have been dropped. In that case we have to do
* swap by links. This is okay because swap by content is only essential
* for system catalogs, and we don't support schema changes for them.
*/
if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
{
*pSwapToastByContent = true;
/*
* When doing swap by content, any toast pointers written into NewHeap
* must use the old toast table's OID, because that's where the toast
* data will eventually be found. Set this up by setting rd_toastoid.
* Note that we must hold NewHeap open until we are done writing data,
* since the relcache will not guarantee to remember this setting once
* the relation is closed. Also, this technique depends on the fact
* that no one will try to read from the NewHeap until after we've
* finished writing it and swapping the rels --- otherwise they could
* follow the toast pointers to the wrong place.
*/
NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
}
else
*pSwapToastByContent = false;
/*
* compute xids used to freeze and weed out dead tuples. We use -1
* freeze_min_age to avoid having CLUSTER freeze tuples earlier than a
* plain VACUUM would.
*/
vacuum_set_xid_limits(freeze_min_age, freeze_table_age,
OldHeap->rd_rel->relisshared, &OldestXmin, &FreezeXid, NULL);
OldHeap->rd_rel->relisshared,
&OldestXmin, &FreezeXid, NULL);
/*
* FreezeXid will become the table's new relfrozenxid, and that mustn't go
......@@ -857,20 +838,23 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
if (TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid))
FreezeXid = OldHeap->rd_rel->relfrozenxid;
/* return selected value to caller */
*pFreezeXid = FreezeXid;
/* Initialize the rewrite operation */
rwstate = begin_heap_rewrite(NewHeap, OldestXmin, FreezeXid, use_wal);
/*
* Scan through the OldHeap in OldIndex order and copy each tuple into the
* NewHeap. To ensure we see recently-dead tuples that still need to be
* copied, we scan with SnapshotAny and use HeapTupleSatisfiesVacuum for
* the visibility test.
* Scan through the OldHeap, either in OldIndex order or sequentially,
* and copy each tuple into the NewHeap. To ensure we see recently-dead
* tuples that still need to be copied, we scan with SnapshotAny and use
* HeapTupleSatisfiesVacuum for the visibility test.
*/
if (OldIndex != NULL)
{
heapScan = NULL;
indexScan = index_beginscan(OldHeap, OldIndex,
SnapshotAny, 0, (ScanKey) NULL);
SnapshotAny, 0, (ScanKey) NULL);
}
else
{
......@@ -1005,6 +989,10 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
/* Write out any remaining tuples, and fsync if needed */
end_heap_rewrite(rwstate);
/* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
NewHeap->rd_toastoid = InvalidOid;
/* Clean up */
pfree(values);
pfree(isnull);
......@@ -1012,8 +1000,6 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
index_close(OldIndex, NoLock);
heap_close(OldHeap, NoLock);
heap_close(NewHeap, NoLock);
return FreezeXid;
}
/*
......@@ -1022,18 +1008,23 @@ copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
* We swap the physical identity (reltablespace and relfilenode) while
* keeping the same logical identities of the two relations.
*
* Also swap any TOAST links, so that the toast data moves along with
* the main-table data.
* We can swap associated TOAST data in either of two ways: recursively swap
* the physical content of the toast tables (and their indexes), or swap the
* TOAST links in the given relations' pg_class entries. The former is needed
* to manage rewrites of shared catalogs (where we cannot change the pg_class
* links) while the latter is the only way to handle cases in which a toast
* table is added or removed altogether.
*
* Additionally, the first relation is marked with relfrozenxid set to
* frozenXid. It seems a bit ugly to have this here, but all callers would
* have to do it anyway, so having it here saves a heap_update. Note: the
* TOAST table needs no special handling, because since we swapped the links,
* the entry for the TOAST table will now contain RecentXmin in relfrozenxid,
* which is the correct value.
* have to do it anyway, so having it here saves a heap_update. Note: in
* the swap-toast-links case, we assume we don't need to change the toast
* table's relfrozenxid: the new version of the toast table should already
* have relfrozenxid set to RecentXmin, which is good enough.
*/
void
swap_relation_files(Oid r1, Oid r2, TransactionId frozenXid)
swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content,
TransactionId frozenXid)
{
Relation relRelation;
HeapTuple reltup1,
......@@ -1071,15 +1062,26 @@ swap_relation_files(Oid r1, Oid r2, TransactionId frozenXid)
relform1->reltablespace = relform2->reltablespace;
relform2->reltablespace = swaptemp;
swaptemp = relform1->reltoastrelid;
relform1->reltoastrelid = relform2->reltoastrelid;
relform2->reltoastrelid = swaptemp;
if (!swap_toast_by_content)
{
swaptemp = relform1->reltoastrelid;
relform1->reltoastrelid = relform2->reltoastrelid;
relform2->reltoastrelid = swaptemp;
/* we should not swap reltoastidxid */
/* we should not swap reltoastidxid */
}
/*
* In the case of a shared catalog, these next few steps only affect our
* own database's pg_class row; but that's okay.
*/
/* set rel1's frozen Xid */
Assert(TransactionIdIsNormal(frozenXid));
relform1->relfrozenxid = frozenXid;
if (relform1->relkind != RELKIND_INDEX)
{
Assert(TransactionIdIsNormal(frozenXid));
relform1->relfrozenxid = frozenXid;
}
/* swap size statistics too, since new rel has freshly-updated stats */
{
......@@ -1107,63 +1109,96 @@ swap_relation_files(Oid r1, Oid r2, TransactionId frozenXid)
/*
* If we have toast tables associated with the relations being swapped,
* change their dependency links to re-associate them with their new
* owning relations. Otherwise the wrong one will get dropped ...
*
* NOTE: it is possible that only one table has a toast table; this can
* happen in CLUSTER if there were dropped columns in the old table, and
* in ALTER TABLE when adding or changing type of columns.
*
* NOTE: at present, a TOAST table's only dependency is the one on its
* owning table. If more are ever created, we'd need to use something
* more selective than deleteDependencyRecordsFor() to get rid of only the
* link we want.
* deal with them too.
*/
if (relform1->reltoastrelid || relform2->reltoastrelid)
{
ObjectAddress baseobject,
toastobject;
long count;
/* Delete old dependencies */
if (relform1->reltoastrelid)
if (swap_toast_by_content)
{
count = deleteDependencyRecordsFor(RelationRelationId,
relform1->reltoastrelid);
if (count != 1)
elog(ERROR, "expected one dependency record for TOAST table, found %ld",
count);
if (relform1->reltoastrelid && relform2->reltoastrelid)
{
/* Recursively swap the contents of the toast tables */
swap_relation_files(relform1->reltoastrelid,
relform2->reltoastrelid,
true,
frozenXid);
}
else
{
/* caller messed up */
elog(ERROR, "cannot swap toast files by content when there's only one");
}
}
if (relform2->reltoastrelid)
else
{
count = deleteDependencyRecordsFor(RelationRelationId,
relform2->reltoastrelid);
if (count != 1)
elog(ERROR, "expected one dependency record for TOAST table, found %ld",
count);
}
/*
* We swapped the ownership links, so we need to change dependency
* data to match.
*
* NOTE: it is possible that only one table has a toast table.
*
* NOTE: at present, a TOAST table's only dependency is the one on
* its owning table. If more are ever created, we'd need to use
* something more selective than deleteDependencyRecordsFor() to
* get rid of just the link we want.
*/
ObjectAddress baseobject,
toastobject;
long count;
/* Delete old dependencies */
if (relform1->reltoastrelid)
{
count = deleteDependencyRecordsFor(RelationRelationId,
relform1->reltoastrelid);
if (count != 1)
elog(ERROR, "expected one dependency record for TOAST table, found %ld",
count);
}
if (relform2->reltoastrelid)
{
count = deleteDependencyRecordsFor(RelationRelationId,
relform2->reltoastrelid);
if (count != 1)
elog(ERROR, "expected one dependency record for TOAST table, found %ld",
count);
}
/* Register new dependencies */
baseobject.classId = RelationRelationId;
baseobject.objectSubId = 0;
toastobject.classId = RelationRelationId;
toastobject.objectSubId = 0;
/* Register new dependencies */
baseobject.classId = RelationRelationId;
baseobject.objectSubId = 0;
toastobject.classId = RelationRelationId;
toastobject.objectSubId = 0;
if (relform1->reltoastrelid)
{
baseobject.objectId = r1;
toastobject.objectId = relform1->reltoastrelid;
recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
}
if (relform1->reltoastrelid)
{
baseobject.objectId = r1;
toastobject.objectId = relform1->reltoastrelid;
recordDependencyOn(&toastobject, &baseobject,
DEPENDENCY_INTERNAL);
}
if (relform2->reltoastrelid)
{
baseobject.objectId = r2;
toastobject.objectId = relform2->reltoastrelid;
recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
if (relform2->reltoastrelid)
{
baseobject.objectId = r2;
toastobject.objectId = relform2->reltoastrelid;
recordDependencyOn(&toastobject, &baseobject,
DEPENDENCY_INTERNAL);
}
}
}
/*
* If we're swapping two toast tables by content, do the same for their
* indexes.
*/
if (swap_toast_by_content &&
relform1->reltoastidxid && relform2->reltoastidxid)
swap_relation_files(relform1->reltoastidxid,
relform2->reltoastidxid,
true,
InvalidTransactionId);
/*
* Blow away the old relcache entries now. We need this kluge because
* relcache.c keeps a link to the smgr relation for the physical file, and
......@@ -1187,6 +1222,85 @@ swap_relation_files(Oid r1, Oid r2, TransactionId frozenXid)
heap_close(relRelation, RowExclusiveLock);
}
/*
* Remove the transient table that was built by make_new_heap, and finish
* cleaning up (including rebuilding all indexes on the old heap).
*/
void
cleanup_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap, bool swap_toast_by_content)
{
ObjectAddress object;
/* Make swap_relation_files' changes visible in the catalogs. */
CommandCounterIncrement();
/* Destroy new heap with old filenode */
object.classId = RelationRelationId;
object.objectId = OIDNewHeap;
object.objectSubId = 0;
/*
* The new relation is local to our transaction and we know nothing
* depends on it, so DROP_RESTRICT should be OK.
*/
performDeletion(&object, DROP_RESTRICT);
/* performDeletion does CommandCounterIncrement at end */
/*
* Rebuild each index on the relation (but not the toast table, which is
* all-new at this point). We do not need CommandCounterIncrement()
* because reindex_relation does it.
*/
reindex_relation(OIDOldHeap, false);
/*
* At this point, everything is kosher except that, if we did toast swap
* by links, the toast table's name corresponds to the transient table.
* The name is irrelevant to the backend because it's referenced by OID,
* but users looking at the catalogs could be confused. Rename it to
* prevent this problem.
*
* Note no lock required on the relation, because we already hold an
* exclusive lock on it.
*/
if (!swap_toast_by_content)
{
Relation newrel;
newrel = heap_open(OIDOldHeap, NoLock);
if (OidIsValid(newrel->rd_rel->reltoastrelid))
{
Relation toastrel;
Oid toastidx;
Oid toastnamespace;
char NewToastName[NAMEDATALEN];
toastrel = relation_open(newrel->rd_rel->reltoastrelid,
AccessShareLock);
toastidx = toastrel->rd_rel->reltoastidxid;
toastnamespace = toastrel->rd_rel->relnamespace;
relation_close(toastrel, AccessShareLock);
/* rename the toast table ... */
snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u",
OIDOldHeap);
RenameRelationInternal(newrel->rd_rel->reltoastrelid,
NewToastName,
toastnamespace);
/* ... and its index too */
snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index",
OIDOldHeap);
RenameRelationInternal(toastidx,
NewToastName,
toastnamespace);
}
relation_close(newrel, NoLock);
}
}
/*
* Get a list of tables that the current user owns and
* have indisclustered set. Return the list in a List * of rvsToCluster
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.323 2010/02/03 10:01:29 heikki Exp $
* $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.324 2010/02/04 00:09:14 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -2866,11 +2866,9 @@ ATRewriteTables(List **wqueue)
if (tab->newvals != NIL || tab->new_changeoids)
{
/* Build a temporary relation and copy data */
Relation OldHeap;
Oid OIDNewHeap;
char NewHeapName[NAMEDATALEN];
Oid NewTableSpace;
Relation OldHeap;
ObjectAddress object;
OldHeap = heap_open(tab->relid, NoLock);
......@@ -2905,18 +2903,8 @@ ATRewriteTables(List **wqueue)
heap_close(OldHeap, NoLock);
/*
* Create the new heap, using a temporary name in the same
* namespace as the existing table. NOTE: there is some risk of
* collision with user relnames. Working around this seems more
* trouble than it's worth; in particular, we can't create the new
* heap in a different namespace from the old, or we will have
* problems with the TEMP status of temp tables.
*/
snprintf(NewHeapName, sizeof(NewHeapName),
"pg_temp_%u", tab->relid);
OIDNewHeap = make_new_heap(tab->relid, NewHeapName, NewTableSpace);
/* Create transient table that will receive the modified data */
OIDNewHeap = make_new_heap(tab->relid, NewTableSpace);
/*
* Copy the heap data into the new table with the desired
......@@ -2929,30 +2917,14 @@ ATRewriteTables(List **wqueue)
* Swap the physical files of the old and new heaps. Since we are
* generating a new heap, we can use RecentXmin for the table's
* new relfrozenxid because we rewrote all the tuples on
* ATRewriteTable, so no older Xid remains on the table.
*/
swap_relation_files(tab->relid, OIDNewHeap, RecentXmin);
CommandCounterIncrement();
/* Destroy new heap with old filenode */
object.classId = RelationRelationId;
object.objectId = OIDNewHeap;
object.objectSubId = 0;
/*
* The new relation is local to our transaction and we know
* nothing depends on it, so DROP_RESTRICT should be OK.
* ATRewriteTable, so no older Xid remains in the table. Also,
* we never try to swap toast tables by content, since we have
* no interest in letting this code work on system catalogs.
*/
performDeletion(&object, DROP_RESTRICT);
/* performDeletion does CommandCounterIncrement at end */
swap_relation_files(tab->relid, OIDNewHeap, false, RecentXmin);
/*
* Rebuild each index on the relation (but not the toast table,
* which is all-new anyway). We do not need
* CommandCounterIncrement() because reindex_relation does it.
*/
reindex_relation(tab->relid, false);
/* Destroy the new heap, removing the old data along with it. */
cleanup_heap_swap(tab->relid, OIDNewHeap, false);
}
else
{
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.301 2010/02/03 01:14:17 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/cache/relcache.c,v 1.302 2010/02/04 00:09:14 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -1925,13 +1925,13 @@ RelationClearRelation(Relation relation, bool rebuild)
* new entry, and this shouldn't happen often enough for that to be
* a big problem.
*
* When rebuilding an open relcache entry, we must preserve ref count
* and rd_createSubid/rd_newRelfilenodeSubid state. Also attempt to
* preserve the pg_class entry (rd_rel), tupledesc, and rewrite-rule
* substructures in place, because various places assume that these
* structures won't move while they are working with an open relcache
* entry. (Note: the refcount mechanism for tupledescs might someday
* allow us to remove this hack for the tupledesc.)
* When rebuilding an open relcache entry, we must preserve ref count,
* rd_createSubid/rd_newRelfilenodeSubid, and rd_toastoid state. Also
* attempt to preserve the pg_class entry (rd_rel), tupledesc, and
* rewrite-rule substructures in place, because various places assume
* that these structures won't move while they are working with an
* open relcache entry. (Note: the refcount mechanism for tupledescs
* might someday allow us to remove this hack for the tupledesc.)
*
* Note that this process does not touch CurrentResourceOwner; which
* is good because whatever ref counts the entry may have do not
......@@ -2005,6 +2005,8 @@ RelationClearRelation(Relation relation, bool rebuild)
SWAPFIELD(RuleLock *, rd_rules);
SWAPFIELD(MemoryContext, rd_rulescxt);
}
/* toast OID override must be preserved */
SWAPFIELD(Oid, rd_toastoid);
/* pgstat_info must be preserved */
SWAPFIELD(struct PgStat_TableStatus *, pgstat_info);
......
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994-5, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/commands/cluster.h,v 1.38 2010/01/06 05:31:14 itagaki Exp $
* $PostgreSQL: pgsql/src/include/commands/cluster.h,v 1.39 2010/02/04 00:09:14 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -23,8 +23,11 @@ extern void cluster_rel(Oid tableOid, Oid indexOid, bool recheck,
extern void check_index_is_clusterable(Relation OldHeap, Oid indexOid,
bool recheck);
extern void mark_index_clustered(Relation rel, Oid indexOid);
extern Oid make_new_heap(Oid OIDOldHeap, const char *NewName,
Oid NewTableSpace);
extern void swap_relation_files(Oid r1, Oid r2, TransactionId frozenXid);
extern Oid make_new_heap(Oid OIDOldHeap, Oid NewTableSpace);
extern void swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content,
TransactionId frozenXid);
extern void cleanup_heap_swap(Oid OIDOldHeap, Oid OIDNewHeap,
bool swap_toast_by_content);
#endif /* CLUSTER_H */
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.120 2010/01/17 22:56:23 tgl Exp $
* $PostgreSQL: pgsql/src/include/utils/rel.h,v 1.121 2010/02/04 00:09:14 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -202,6 +202,16 @@ typedef struct RelationData
uint16 *rd_exclstrats; /* exclusion ops' strategy numbers, if any */
void *rd_amcache; /* available for use by index AM */
/*
* Hack for CLUSTER, rewriting ALTER TABLE, etc: when writing a new
* version of a table, we need to make any toast pointers inserted into it
* have the existing toast table's OID, not the OID of the transient toast
* table. If rd_toastoid isn't InvalidOid, it is the OID to place in
* toast pointers inserted into this rel. (Note it's set on the new
* version of the main heap, not the toast table itself.)
*/
Oid rd_toastoid; /* Real TOAST table's OID, or InvalidOid */
/*
* sizes of the free space and visibility map forks, or InvalidBlockNumber
* if not known yet
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment