Commit f5b2f60b authored by Tom Lane's avatar Tom Lane

Change WAL-logging scheme for multixacts to be more like regular

transaction IDs, rather than like subtrans; in particular, the information
now survives a database restart.  Per previous discussion, this is
essential for PITR log shipping and for 2PC.
parent 593badd3
<!--
$PostgreSQL: pgsql/doc/src/sgml/ref/pg_resetxlog.sgml,v 1.10 2005/04/28 21:47:10 tgl Exp $
$PostgreSQL: pgsql/doc/src/sgml/ref/pg_resetxlog.sgml,v 1.11 2005/06/08 15:50:21 tgl Exp $
PostgreSQL documentation
-->
......@@ -23,6 +23,7 @@ PostgreSQL documentation
<arg> -o <replaceable class="parameter">oid</replaceable> </arg>
<arg> -x <replaceable class="parameter">xid</replaceable> </arg>
<arg> -m <replaceable class="parameter">mxid</replaceable> </arg>
<arg> -O <replaceable class="parameter">mxoff</replaceable> </arg>
<arg> -l <replaceable class="parameter">timelineid</replaceable>,<replaceable class="parameter">fileid</replaceable>,<replaceable class="parameter">seg</replaceable> </arg>
<arg choice="plain"><replaceable>datadir</replaceable></arg>
</cmdsynopsis>
......@@ -32,8 +33,8 @@ PostgreSQL documentation
<title>Description</title>
<para>
<command>pg_resetxlog</command> clears the write-ahead log (WAL) and
optionally resets some other control information (stored in the
<filename>pg_control</> file). This function is sometimes needed
optionally resets some other control information stored in the
<filename>pg_control</> file. This function is sometimes needed
if these files have become corrupted. It should be used only as a
last resort, when the server will not start due to such corruption.
</para>
......@@ -60,8 +61,9 @@ PostgreSQL documentation
by specifying the <literal>-f</> (force) switch. In this case plausible
values will be substituted for the missing data. Most of the fields can be
expected to match, but manual assistance may be needed for the next OID,
next transaction ID, WAL starting address, and database locale fields.
The first three of these can be set using the switches discussed below.
next transaction ID, next multi-transaction ID and offset,
WAL starting address, and database locale fields.
The first five of these can be set using the switches discussed below.
<command>pg_resetxlog</command>'s own environment is the source for its
guess at the locale fields; take care that <envar>LANG</> and so forth
match the environment that <command>initdb</> was run in.
......@@ -74,9 +76,10 @@ PostgreSQL documentation
</para>
<para>
The <literal>-o</>, <literal>-x</>, <literal>-m</>, and <literal>-l</>
The <literal>-o</>, <literal>-x</>, <literal>-m</>, <literal>-O</>,
and <literal>-l</>
switches allow the next OID, next transaction ID, next multi-transaction
ID, and WAL starting address values to
ID, next multi-transaction offset, and WAL starting address values to
be set manually. These are only needed when
<command>pg_resetxlog</command> is unable to determine appropriate values
by reading <filename>pg_control</>. Safe values may be determined as
......@@ -108,6 +111,17 @@ PostgreSQL documentation
</para>
</listitem>
<listitem>
<para>
A safe value for the next multi-transaction offset (<literal>-O</>)
may be determined by looking for the numerically largest
file name in the directory <filename>pg_multixact/members</> under the
data directory, adding one, and then multiplying by 65536. As above,
the file names are in hexadecimal, so the easiest way to do this is to
specify the switch value in hexadecimal and add four zeroes.
</para>
</listitem>
<listitem>
<para>
The WAL starting address (<literal>-l</>) should be
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.193 2005/06/06 20:22:56 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.194 2005/06/08 15:50:21 tgl Exp $
*
*
* INTERFACE ROUTINES
......@@ -2219,6 +2219,8 @@ l3:
* Else the same IDs might be re-used after a crash, which would be
* disastrous if this page made it to disk before the crash. Essentially
* we have to enforce the WAL log-before-data rule even in this case.
* (Also, in a PITR log-shipping or 2PC environment, we have to have XLOG
* entries for everything anyway.)
*/
if (!relation->rd_istemp)
{
......@@ -2228,6 +2230,8 @@ l3:
xlrec.target.node = relation->rd_node;
xlrec.target.tid = tuple->t_self;
xlrec.locking_xid = xid;
xlrec.xid_is_mxact = ((new_infomask & HEAP_XMAX_IS_MULTI) != 0);
xlrec.shared_lock = (mode == LockTupleShared);
rdata[0].data = (char *) &xlrec;
rdata[0].len = SizeOfHeapLock;
......@@ -2900,17 +2904,18 @@ heap_xlog_lock(XLogRecPtr lsn, XLogRecord *record)
htup = (HeapTupleHeader) PageGetItem(page, lp);
/*
* Presently, we don't bother to restore the locked state, but
* just set the XMAX_INVALID bit.
*/
htup->t_infomask &= ~(HEAP_XMAX_COMMITTED |
HEAP_XMAX_INVALID |
HEAP_XMAX_IS_MULTI |
HEAP_IS_LOCKED |
HEAP_MOVED);
htup->t_infomask |= HEAP_XMAX_INVALID;
HeapTupleHeaderSetXmax(htup, record->xl_xid);
if (xlrec->xid_is_mxact)
htup->t_infomask |= HEAP_XMAX_IS_MULTI;
if (xlrec->shared_lock)
htup->t_infomask |= HEAP_XMAX_SHARED_LOCK;
else
htup->t_infomask |= HEAP_XMAX_EXCL_LOCK;
HeapTupleHeaderSetXmax(htup, xlrec->locking_xid);
HeapTupleHeaderSetCmax(htup, FirstCommandId);
/* Make sure there is no forward chain link in t_ctid */
htup->t_ctid = xlrec->target.tid;
......@@ -3010,6 +3015,11 @@ heap_desc(char *buf, uint8 xl_info, char *rec)
strcat(buf, "shared_lock: ");
else
strcat(buf, "exclusive_lock: ");
if (xlrec->xid_is_mxact)
strcat(buf, "mxid ");
else
strcat(buf, "xid ");
sprintf(buf + strlen(buf), "%u ", xlrec->locking_xid);
out_target(buf, &(xlrec->target));
}
else
......
......@@ -10,7 +10,7 @@
* tuple to be unlocked can sleep on the potentially-several TransactionIds
* that compose the MultiXactId.
*
* We use two SLRU areas, one for storing the offsets on which the data
* We use two SLRU areas, one for storing the offsets at which the data
* starts for each MultiXactId in the other one. This trick allows us to
* store variable length arrays of TransactionIds. (We could alternatively
* use one area containing counts and TransactionIds, with valid MultiXactId
......@@ -18,20 +18,31 @@
* since it would get completely confused if someone inquired about a bogus
* MultiXactId that pointed to an intermediate slot containing an XID.)
*
* This code is based on subtrans.c; see it for additional discussion.
* Like the subtransaction manager, we only need to remember multixact
* information for currently-open transactions. Thus, there is
* no need to preserve data over a crash and restart.
* XLOG interactions: this module generates an XLOG record whenever a new
* OFFSETs or MEMBERs page is initialized to zeroes, as well as an XLOG record
* whenever a new MultiXactId is defined. This allows us to completely
* rebuild the data entered since the last checkpoint during XLOG replay.
* Because this is possible, we need not follow the normal rule of
* "write WAL before data"; the only correctness guarantee needed is that
* we flush and sync all dirty OFFSETs and MEMBERs pages to disk before a
* checkpoint is considered complete. If a page does make it to disk ahead
* of corresponding WAL records, it will be forcibly zeroed before use anyway.
* Therefore, we don't need to mark our pages with LSN information; we have
* enough synchronization already.
*
* Like clog.c, and unlike subtrans.c, we have to preserve state across
* crashes and ensure that MXID and offset numbering increases monotonically
* across a crash. We do this in the same way as it's done for transaction
* IDs: the WAL record is guaranteed to contain evidence of every MXID we
* could need to worry about, and we just make sure that at the end of
* replay, the next-MXID and next-offset counters are at least as large as
* anything we saw during replay.
*
* The only XLOG interaction we need to take care of is that generated
* MultiXactId values must continue to increase across a system crash.
* Thus we log groups of MultiXactIds acquisition in the same fashion we do
* for Oids (see XLogPutNextMultiXactId).
*
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.4 2005/05/19 21:35:45 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/multixact.c,v 1.5 2005/06/08 15:50:25 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -51,8 +62,8 @@
* Defines for MultiXactOffset page sizes. A page is the same BLCKSZ as is
* used everywhere else in Postgres.
*
* Note: because both uint32 and TransactionIds are 32 bits and wrap around at
* 0xFFFFFFFF, MultiXact page numbering also wraps around at
* Note: because both MultiXactOffsets and TransactionIds are 32 bits and
* wrap around at 0xFFFFFFFF, MultiXact page numbering also wraps around at
* 0xFFFFFFFF/MULTIXACT_*_PER_PAGE, and segment numbering at
* 0xFFFFFFFF/MULTIXACT_*_PER_PAGE/SLRU_SEGMENTS_PER_PAGE. We need take no
* explicit notice of that fact in this module, except when comparing segment
......@@ -61,21 +72,19 @@
*/
/* We need four bytes per offset and also four bytes per member */
#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(uint32))
#define MULTIXACT_OFFSETS_PER_PAGE (BLCKSZ / sizeof(MultiXactOffset))
#define MULTIXACT_MEMBERS_PER_PAGE (BLCKSZ / sizeof(TransactionId))
#define MultiXactIdToOffsetPage(xid) \
((xid) / (uint32) MULTIXACT_OFFSETS_PER_PAGE)
((xid) / (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE)
#define MultiXactIdToOffsetEntry(xid) \
((xid) % (uint32) MULTIXACT_OFFSETS_PER_PAGE)
((xid) % (MultiXactOffset) MULTIXACT_OFFSETS_PER_PAGE)
#define MXOffsetToMemberPage(xid) \
((xid) / (TransactionId) MULTIXACT_MEMBERS_PER_PAGE)
#define MXOffsetToMemberEntry(xid) \
((xid) % (TransactionId) MULTIXACT_MEMBERS_PER_PAGE)
/* Arbitrary number of MultiXactIds to allocate at each XLog call */
#define MXACT_PREFETCH 8192
/*
* Links to shared-memory data structures for MultiXact control
......@@ -98,11 +107,8 @@ typedef struct MultiXactStateData
/* next-to-be-assigned MultiXactId */
MultiXactId nextMXact;
/* MultiXactIds we have left before logging more */
uint32 mXactCount;
/* next-to-be-assigned offset */
uint32 nextOffset;
MultiXactOffset nextOffset;
/* the Offset SLRU area was last truncated at this MultiXactId */
MultiXactId lastTruncationPoint;
......@@ -161,7 +167,8 @@ static MultiXactId *OldestVisibleMXactId;
* for this being that most entries will contain our own TransactionId and
* so they will be uninteresting by the time our next transaction starts.
* (XXX not clear that this is correct --- other members of the MultiXact
* could hang around longer than we did.)
* could hang around longer than we did. However, it's not clear what a
* better policy for flushing old cache entries would be.)
*
* We allocate the cache entries in a memory context that is deleted at
* transaction end, so we don't need to do retail freeing of entries.
......@@ -194,7 +201,9 @@ static MemoryContext MXactContext = NULL;
static void MultiXactIdSetOldestVisible(void);
static MultiXactId CreateMultiXactId(int nxids, TransactionId *xids);
static int GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids);
static MultiXactId GetNewMultiXactId(int nxids, uint32 *offset);
static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
int nxids, TransactionId *xids);
static MultiXactId GetNewMultiXactId(int nxids, MultiXactOffset *offset);
/* MultiXact cache management */
static MultiXactId mXactCacheGetBySet(int nxids, TransactionId *xids);
......@@ -206,15 +215,17 @@ static char *mxid_to_string(MultiXactId multi, int nxids, TransactionId *xids);
#endif
/* management of SLRU infrastructure */
static int ZeroMultiXactOffsetPage(int pageno);
static int ZeroMultiXactMemberPage(int pageno);
static int ZeroMultiXactOffsetPage(int pageno, bool writeXlog);
static int ZeroMultiXactMemberPage(int pageno, bool writeXlog);
static bool MultiXactOffsetPagePrecedes(int page1, int page2);
static bool MultiXactMemberPagePrecedes(int page1, int page2);
static bool MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2);
static bool MultiXactOffsetPrecedes(uint32 offset1, uint32 offset2);
static bool MultiXactOffsetPrecedes(MultiXactOffset offset1,
MultiXactOffset offset2);
static void ExtendMultiXactOffset(MultiXactId multi);
static void ExtendMultiXactMember(uint32 offset);
static void ExtendMultiXactMember(MultiXactOffset offset, int nmembers);
static void TruncateMultiXact(void);
static void WriteMZeroPageXlogRec(int pageno, uint8 info);
/*
......@@ -551,8 +562,8 @@ MultiXactIdWait(MultiXactId multi)
* CreateMultiXactId
* Make a new MultiXactId
*
* Make SLRU and cache entries for a new MultiXactId, recording the given
* TransactionIds as members. Returns the newly created MultiXactId.
* Make XLOG, SLRU and cache entries for a new MultiXactId, recording the
* given TransactionIds as members. Returns the newly created MultiXactId.
*
* NB: the passed xids[] array will be sorted in-place.
*/
......@@ -560,13 +571,9 @@ static MultiXactId
CreateMultiXactId(int nxids, TransactionId *xids)
{
MultiXactId multi;
int pageno;
int prev_pageno;
int entryno;
int slotno;
uint32 *offptr;
uint32 offset;
int i;
MultiXactOffset offset;
XLogRecData rdata[2];
xl_multixact_create xlrec;
debug_elog3(DEBUG2, "Create: %s",
mxid_to_string(InvalidMultiXactId, nxids, xids));
......@@ -588,11 +595,70 @@ CreateMultiXactId(int nxids, TransactionId *xids)
return multi;
}
/*
* OK, assign the MXID and offsets range to use
*/
multi = GetNewMultiXactId(nxids, &offset);
LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
debug_elog4(DEBUG2, "Create: assigned id %u offset %u", multi, offset);
/*
* Make an XLOG entry describing the new MXID.
*
* Note: we need not flush this XLOG entry to disk before proceeding.
* The only way for the MXID to be referenced from any data page is
* for heap_lock_tuple() to have put it there, and heap_lock_tuple()
* generates an XLOG record that must follow ours. The normal LSN
* interlock between the data page and that XLOG record will ensure
* that our XLOG record reaches disk first. If the SLRU members/offsets
* data reaches disk sooner than the XLOG record, we do not care because
* we'll overwrite it with zeroes unless the XLOG record is there too;
* see notes at top of this file.
*/
xlrec.mid = multi;
xlrec.moff = offset;
xlrec.nxids = nxids;
rdata[0].data = (char *) (&xlrec);
rdata[0].len = MinSizeOfMultiXactCreate;
rdata[0].buffer = InvalidBuffer;
rdata[0].next = &(rdata[1]);
rdata[1].data = (char *) xids;
rdata[1].len = nxids * sizeof(TransactionId);
rdata[1].buffer = InvalidBuffer;
rdata[1].next = NULL;
(void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID, rdata);
/* Now enter the information into the OFFSETs and MEMBERs logs */
RecordNewMultiXact(multi, offset, nxids, xids);
/* Store the new MultiXactId in the local cache, too */
mXactCachePut(multi, nxids, xids);
debug_elog2(DEBUG2, "Create: all done");
return multi;
}
/*
* RecordNewMultiXact
* Write info about a new multixact into the offsets and members files
*
* This is broken out of CreateMultiXactId so that xlog replay can use it.
*/
static void
RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
int nxids, TransactionId *xids)
{
int pageno;
int prev_pageno;
int entryno;
int slotno;
MultiXactOffset *offptr;
int i;
ExtendMultiXactOffset(multi);
LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
pageno = MultiXactIdToOffsetPage(multi);
entryno = MultiXactIdToOffsetEntry(multi);
......@@ -605,8 +671,9 @@ CreateMultiXactId(int nxids, TransactionId *xids)
* we'll take the trouble to generalize the slru.c error reporting code.
*/
slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, multi);
offptr = (uint32 *) MultiXactOffsetCtl->shared->page_buffer[slotno];
offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
offptr += entryno;
*offptr = offset;
MultiXactOffsetCtl->shared->page_status[slotno] = SLRU_PAGE_DIRTY;
......@@ -614,8 +681,6 @@ CreateMultiXactId(int nxids, TransactionId *xids)
/* Exchange our lock */
LWLockRelease(MultiXactOffsetControlLock);
debug_elog3(DEBUG2, "Create: got offset %u", offset);
LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
prev_pageno = -1;
......@@ -624,8 +689,6 @@ CreateMultiXactId(int nxids, TransactionId *xids)
{
TransactionId *memberptr;
ExtendMultiXactMember(offset);
pageno = MXOffsetToMemberPage(offset);
entryno = MXOffsetToMemberEntry(offset);
......@@ -640,29 +703,27 @@ CreateMultiXactId(int nxids, TransactionId *xids)
memberptr += entryno;
*memberptr = xids[i];
MultiXactMemberCtl->shared->page_status[slotno] = SLRU_PAGE_DIRTY;
}
LWLockRelease(MultiXactMemberControlLock);
/* Store the new MultiXactId in the local cache, too */
mXactCachePut(multi, nxids, xids);
debug_elog2(DEBUG2, "Create: all done");
return multi;
}
/*
* GetNewMultiXactId
* Get the next MultiXactId.
*
* Get the next MultiXactId, XLogging if needed. Also, reserve the needed
* amount of space in the "members" area. The starting offset of the
* reserved space is returned in *offset.
* Also, reserve the needed amount of space in the "members" area. The
* starting offset of the reserved space is returned in *offset.
*
* This may generate XLOG records for expansion of the offsets and/or members
* files. Unfortunately, we have to do that while holding MultiXactGenLock
* to avoid race conditions --- the XLOG record for zeroing a page must appear
* before any backend can possibly try to store data in that page!
*/
static MultiXactId
GetNewMultiXactId(int nxids, uint32 *offset)
GetNewMultiXactId(int nxids, MultiXactOffset *offset)
{
MultiXactId result;
......@@ -675,33 +736,33 @@ GetNewMultiXactId(int nxids, uint32 *offset)
/* Handle wraparound of the nextMXact counter */
if (MultiXactState->nextMXact < FirstMultiXactId)
{
MultiXactState->nextMXact = FirstMultiXactId;
MultiXactState->mXactCount = 0;
}
/* If we run out of logged for use multixacts then we must log more */
if (MultiXactState->mXactCount == 0)
{
XLogPutNextMultiXactId(MultiXactState->nextMXact + MXACT_PREFETCH);
MultiXactState->mXactCount = MXACT_PREFETCH;
}
/*
* Assign the MXID, and make sure there is room for it in the file.
*/
result = MultiXactState->nextMXact;
ExtendMultiXactOffset(result);
/*
* Advance counter. As in GetNewTransactionId(), this must not happen
* until after ExtendMultiXactOffset has succeeded!
*
* We don't care about MultiXactId wraparound here; it will be handled by
* the next iteration. But note that nextMXact may be InvalidMultiXactId
* after this routine exits, so anyone else looking at the variable must
* be prepared to deal with that.
*/
(MultiXactState->nextMXact)++;
(MultiXactState->mXactCount)--;
/*
* Reserve the members space.
* Reserve the members space. Same considerations as above.
*/
*offset = MultiXactState->nextOffset;
ExtendMultiXactMember(*offset, nxids);
MultiXactState->nextOffset += nxids;
LWLockRelease(MultiXactGenLock);
......@@ -725,13 +786,13 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
int prev_pageno;
int entryno;
int slotno;
uint32 *offptr;
uint32 offset;
MultiXactOffset *offptr;
MultiXactOffset offset;
int length;
int i;
MultiXactId nextMXact;
MultiXactId tmpMXact;
uint32 nextOffset;
MultiXactOffset nextOffset;
TransactionId *ptr;
debug_elog3(DEBUG2, "GetMembers: asked for %u", multi);
......@@ -799,7 +860,7 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
entryno = MultiXactIdToOffsetEntry(multi);
slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, multi);
offptr = (uint32 *) MultiXactOffsetCtl->shared->page_buffer[slotno];
offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
offptr += entryno;
offset = *offptr;
......@@ -829,7 +890,7 @@ GetMultiXactIdMembers(MultiXactId multi, TransactionId **xids)
if (pageno != prev_pageno)
slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, tmpMXact);
offptr = (uint32 *) MultiXactOffsetCtl->shared->page_buffer[slotno];
offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
offptr += entryno;
length = *offptr - offset;
}
......@@ -1086,10 +1147,6 @@ MultiXactShmemInit(void)
SimpleLruInit(MultiXactMemberCtl, "MultiXactMember Ctl",
MultiXactMemberControlLock, "pg_multixact/members");
/* Override default assumption that writes should be fsync'd */
MultiXactOffsetCtl->do_fsync = false;
MultiXactMemberCtl->do_fsync = false;
/* Initialize our shared state struct */
MultiXactState = ShmemInitStruct("Shared MultiXact State",
SHARED_MULTIXACT_STATE_SIZE,
......@@ -1116,10 +1173,6 @@ MultiXactShmemInit(void)
* This func must be called ONCE on system install. It creates the initial
* MultiXact segments. (The MultiXacts directories are assumed to have been
* created by initdb, and MultiXactShmemInit must have been called already.)
*
* Note: it's not really necessary to create the initial segments now,
* since slru.c would create 'em on first write anyway. But we may as well
* do it to be sure the directories are set up correctly.
*/
void
BootStrapMultiXact(void)
......@@ -1128,8 +1181,10 @@ BootStrapMultiXact(void)
LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
/* Offsets first page */
slotno = ZeroMultiXactOffsetPage(0);
/* Create and zero the first page of the offsets log */
slotno = ZeroMultiXactOffsetPage(0, false);
/* Make sure it's written out */
SimpleLruWritePage(MultiXactOffsetCtl, slotno, NULL);
Assert(MultiXactOffsetCtl->shared->page_status[slotno] == SLRU_PAGE_CLEAN);
......@@ -1137,8 +1192,10 @@ BootStrapMultiXact(void)
LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
/* Members first page */
slotno = ZeroMultiXactMemberPage(0);
/* Create and zero the first page of the members log */
slotno = ZeroMultiXactMemberPage(0, false);
/* Make sure it's written out */
SimpleLruWritePage(MultiXactMemberCtl, slotno, NULL);
Assert(MultiXactMemberCtl->shared->page_status[slotno] == SLRU_PAGE_CLEAN);
......@@ -1147,6 +1204,7 @@ BootStrapMultiXact(void)
/*
* Initialize (or reinitialize) a page of MultiXactOffset to zeroes.
* If writeXlog is TRUE, also emit an XLOG record saying we did this.
*
* The page is not actually written, just set up in shared memory.
* The slot number of the new page is returned.
......@@ -1154,25 +1212,40 @@ BootStrapMultiXact(void)
* Control lock must be held at entry, and will be held at exit.
*/
static int
ZeroMultiXactOffsetPage(int pageno)
ZeroMultiXactOffsetPage(int pageno, bool writeXlog)
{
return SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
int slotno;
slotno = SimpleLruZeroPage(MultiXactOffsetCtl, pageno);
if (writeXlog)
WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_OFF_PAGE);
return slotno;
}
/*
* Ditto, for MultiXactMember
*/
static int
ZeroMultiXactMemberPage(int pageno)
ZeroMultiXactMemberPage(int pageno, bool writeXlog)
{
return SimpleLruZeroPage(MultiXactMemberCtl, pageno);
int slotno;
slotno = SimpleLruZeroPage(MultiXactMemberCtl, pageno);
if (writeXlog)
WriteMZeroPageXlogRec(pageno, XLOG_MULTIXACT_ZERO_MEM_PAGE);
return slotno;
}
/*
* This must be called ONCE during postmaster or standalone-backend startup.
*
* StartupXLOG has already established nextMXact by calling
* MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact.
* StartupXLOG has already established nextMXact/nextOffset by calling
* MultiXactSetNextMXact and/or MultiXactAdvanceNextMXact. Note that we
* may already have replayed WAL data into the SLRU files.
*
* We don't need any locks here, really; the SLRU locks are taken
* only because slru.c expects to be called with locks held.
......@@ -1180,68 +1253,76 @@ ZeroMultiXactMemberPage(int pageno)
void
StartupMultiXact(void)
{
int startPage;
int cutoffPage;
uint32 offset;
MultiXactId multi = MultiXactState->nextMXact;
MultiXactOffset offset = MultiXactState->nextOffset;
int pageno;
int entryno;
/* Clean up offsets state */
LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
/*
* We start nextOffset at zero after every reboot; there is no need to
* avoid offset values that were used in the previous system lifecycle.
* Initialize our idea of the latest page number.
*/
MultiXactState->nextOffset = 0;
pageno = MultiXactIdToOffsetPage(multi);
MultiXactOffsetCtl->shared->latest_page_number = pageno;
/*
* Because of the above, a shutdown and restart is likely to leave
* high-numbered MultiXactMember page files that would not get recycled
* for a long time (about as long as the system had been up in the
* previous cycle of life). To clean out such page files, we issue an
* artificial truncation call that will zap any page files in the first
* half of the offset cycle. Should there be any page files in the last
* half, they will get cleaned out by the first checkpoint.
*
* XXX it might be a good idea to disable this when debugging, since it
* will tend to destroy evidence after a crash. To not be *too* ruthless,
* we arbitrarily spare the first 64 pages. (Note this will get
* rounded off to a multiple of SLRU_PAGES_PER_SEGMENT ...)
* Zero out the remainder of the current offsets page. See notes
* in StartupCLOG() for motivation.
*/
offset = ((~ (uint32) 0) >> 1) + 1;
entryno = MultiXactIdToOffsetEntry(multi);
if (entryno != 0)
{
int slotno;
MultiXactOffset *offptr;
cutoffPage = MXOffsetToMemberPage(offset) + 64;
slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, multi);
offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
offptr += entryno;
/*
* Defeat safety interlock in SimpleLruTruncate; this hack will be
* cleaned up by ZeroMultiXactMemberPage call below.
*/
MultiXactMemberCtl->shared->latest_page_number = cutoffPage;
MemSet(offptr, 0, BLCKSZ - (entryno * sizeof(MultiXactOffset)));
SimpleLruTruncate(MultiXactMemberCtl, cutoffPage);
MultiXactOffsetCtl->shared->page_status[slotno] = SLRU_PAGE_DIRTY;
}
LWLockRelease(MultiXactOffsetControlLock);
/* And the same for members */
LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
/*
* Initialize lastTruncationPoint to invalid, ensuring that the first
* checkpoint will try to do truncation.
* Initialize our idea of the latest page number.
*/
MultiXactState->lastTruncationPoint = InvalidMultiXactId;
pageno = MXOffsetToMemberPage(offset);
MultiXactMemberCtl->shared->latest_page_number = pageno;
/*
* Since we don't expect MultiXact to be valid across crashes, we
* initialize the currently-active pages to zeroes during startup.
* Whenever we advance into a new page, both ExtendMultiXact routines
* will likewise zero the new page without regard to whatever was
* previously on disk.
* Zero out the remainder of the current members page. See notes
* in StartupCLOG() for motivation.
*/
LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
startPage = MultiXactIdToOffsetPage(MultiXactState->nextMXact);
(void) ZeroMultiXactOffsetPage(startPage);
entryno = MXOffsetToMemberEntry(offset);
if (entryno != 0)
{
int slotno;
TransactionId *xidptr;
LWLockRelease(MultiXactOffsetControlLock);
slotno = SimpleLruReadPage(MultiXactMemberCtl, pageno, offset);
xidptr = (TransactionId *) MultiXactMemberCtl->shared->page_buffer[slotno];
xidptr += entryno;
LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
MemSet(xidptr, 0, BLCKSZ - (entryno * sizeof(TransactionId)));
startPage = MXOffsetToMemberPage(MultiXactState->nextOffset);
(void) ZeroMultiXactMemberPage(startPage);
MultiXactMemberCtl->shared->page_status[slotno] = SLRU_PAGE_DIRTY;
}
LWLockRelease(MultiXactMemberControlLock);
/*
* Initialize lastTruncationPoint to invalid, ensuring that the first
* checkpoint will try to do truncation.
*/
MultiXactState->lastTruncationPoint = InvalidMultiXactId;
}
/*
......@@ -1250,36 +1331,28 @@ StartupMultiXact(void)
void
ShutdownMultiXact(void)
{
/*
* Flush dirty MultiXact pages to disk
*
* This is not actually necessary from a correctness point of view. We do
* it merely as a debugging aid.
*/
/* Flush dirty MultiXact pages to disk */
SimpleLruFlush(MultiXactOffsetCtl, false);
SimpleLruFlush(MultiXactMemberCtl, false);
}
/*
* Get the next MultiXactId to save in a checkpoint record
* Get the next MultiXactId and offset to save in a checkpoint record
*/
MultiXactId
MultiXactGetCheckptMulti(bool is_shutdown)
void
MultiXactGetCheckptMulti(bool is_shutdown,
MultiXactId *nextMulti,
MultiXactOffset *nextMultiOffset)
{
MultiXactId retval;
LWLockAcquire(MultiXactGenLock, LW_SHARED);
retval = MultiXactState->nextMXact;
if (!is_shutdown)
retval += MultiXactState->mXactCount;
*nextMulti = MultiXactState->nextMXact;
*nextMultiOffset = MultiXactState->nextOffset;
LWLockRelease(MultiXactGenLock);
debug_elog3(DEBUG2, "MultiXact: MultiXact for checkpoint record is %u",
retval);
return retval;
debug_elog4(DEBUG2, "MultiXact: checkpoint is nextMulti %u, nextOffset %u",
*nextMulti, *nextMultiOffset);
}
/*
......@@ -1288,62 +1361,68 @@ MultiXactGetCheckptMulti(bool is_shutdown)
void
CheckPointMultiXact(void)
{
/*
* Flush dirty MultiXact pages to disk
*
* This is not actually necessary from a correctness point of view. We do
* it merely to improve the odds that writing of dirty pages is done
* by the checkpoint process and not by backends.
*/
/* Flush dirty MultiXact pages to disk */
SimpleLruFlush(MultiXactOffsetCtl, true);
SimpleLruFlush(MultiXactMemberCtl, true);
/*
* Truncate the SLRU files
* Truncate the SLRU files. This could be done at any time, but
* checkpoint seems a reasonable place for it.
*/
TruncateMultiXact();
}
/*
* Set the next-to-be-assigned MultiXactId
* Set the next-to-be-assigned MultiXactId and offset
*
* This is used when we can determine the correct next Id exactly
* from an XLog record. We need no locking since it is only called
* This is used when we can determine the correct next ID/offset exactly
* from a checkpoint record. We need no locking since it is only called
* during bootstrap and XLog replay.
*/
void
MultiXactSetNextMXact(MultiXactId nextMulti)
MultiXactSetNextMXact(MultiXactId nextMulti,
MultiXactOffset nextMultiOffset)
{
debug_elog3(DEBUG2, "MultiXact: setting next multi to %u", nextMulti);
debug_elog4(DEBUG2, "MultiXact: setting next multi to %u offset %u",
nextMulti, nextMultiOffset);
MultiXactState->nextMXact = nextMulti;
MultiXactState->mXactCount = 0;
MultiXactState->nextOffset = nextMultiOffset;
}
/*
* Ensure the next-to-be-assigned MultiXactId is at least minMulti
* Ensure the next-to-be-assigned MultiXactId is at least minMulti,
* and similarly nextOffset is at least minMultiOffset
*
* This is used when we can determine a minimum safe value
* from an XLog record. We need no locking since it is only called
* during XLog replay.
* This is used when we can determine minimum safe values from an XLog
* record (either an on-line checkpoint or an mxact creation log entry).
* We need no locking since it is only called during XLog replay.
*/
void
MultiXactAdvanceNextMXact(MultiXactId minMulti)
MultiXactAdvanceNextMXact(MultiXactId minMulti,
MultiXactOffset minMultiOffset)
{
if (MultiXactIdPrecedes(MultiXactState->nextMXact, minMulti))
{
debug_elog3(DEBUG2, "MultiXact: setting next multi to %u", minMulti);
MultiXactState->nextMXact = minMulti;
MultiXactState->mXactCount = 0;
}
if (MultiXactOffsetPrecedes(MultiXactState->nextOffset, minMultiOffset))
{
debug_elog3(DEBUG2, "MultiXact: setting next offset to %u",
minMultiOffset);
MultiXactState->nextOffset = minMultiOffset;
}
}
/*
* Make sure that MultiXactOffset has room for a newly-allocated MultiXactId.
*
* The MultiXactOffsetControlLock should be held at entry, and will
* be held at exit.
* NB: this is called while holding MultiXactGenLock. We want it to be very
* fast most of the time; even when it's not so fast, no actual I/O need
* happen unless we're forced to write out a dirty log or xlog page to make
* room in shared memory.
*/
void
static void
ExtendMultiXactOffset(MultiXactId multi)
{
int pageno;
......@@ -1358,32 +1437,56 @@ ExtendMultiXactOffset(MultiXactId multi)
pageno = MultiXactIdToOffsetPage(multi);
/* Zero the page */
ZeroMultiXactOffsetPage(pageno);
LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
/* Zero the page and make an XLOG entry about it */
ZeroMultiXactOffsetPage(pageno, true);
LWLockRelease(MultiXactOffsetControlLock);
}
/*
* Make sure that MultiXactMember has room for the members of a newly-
* allocated MultiXactId.
*
* The MultiXactMemberControlLock should be held at entry, and will be held
* at exit.
* Like the above routine, this is called while holding MultiXactGenLock;
* same comments apply.
*/
void
ExtendMultiXactMember(uint32 offset)
static void
ExtendMultiXactMember(MultiXactOffset offset, int nmembers)
{
int pageno;
/*
* It's possible that the members span more than one page of the
* members file, so we loop to ensure we consider each page. The
* coding is not optimal if the members span several pages, but
* that seems unusual enough to not worry much about.
*/
while (nmembers > 0)
{
int entryno;
/*
* No work except at first entry of a page.
* Only zero when at first entry of a page.
*/
if (MXOffsetToMemberEntry(offset) != 0)
return;
entryno = MXOffsetToMemberEntry(offset);
if (entryno == 0)
{
int pageno;
pageno = MXOffsetToMemberPage(offset);
/* Zero the page */
ZeroMultiXactMemberPage(pageno);
LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
/* Zero the page and make an XLOG entry about it */
ZeroMultiXactMemberPage(pageno, true);
LWLockRelease(MultiXactMemberControlLock);
}
/* Advance to next page (OK if nmembers goes negative) */
offset += (MULTIXACT_MEMBERS_PER_PAGE - entryno);
nmembers -= (MULTIXACT_MEMBERS_PER_PAGE - entryno);
}
}
/*
......@@ -1392,14 +1495,16 @@ ExtendMultiXactMember(uint32 offset)
*
* This is called only during checkpoints. We assume no more than one
* backend does this at a time.
*
* XXX do we have any issues with needing to checkpoint here?
*/
static void
TruncateMultiXact(void)
{
MultiXactId nextMXact;
uint32 nextOffset;
MultiXactOffset nextOffset;
MultiXactId oldestMXact;
uint32 oldestOffset;
MultiXactOffset oldestOffset;
int cutoffPage;
int i;
......@@ -1460,7 +1565,7 @@ TruncateMultiXact(void)
int pageno;
int slotno;
int entryno;
uint32 *offptr;
MultiXactOffset *offptr;
LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
......@@ -1468,7 +1573,7 @@ TruncateMultiXact(void)
entryno = MultiXactIdToOffsetEntry(oldestMXact);
slotno = SimpleLruReadPage(MultiXactOffsetCtl, pageno, oldestMXact);
offptr = (uint32 *) MultiXactOffsetCtl->shared->page_buffer[slotno];
offptr = (MultiXactOffset *) MultiXactOffsetCtl->shared->page_buffer[slotno];
offptr += entryno;
oldestOffset = *offptr;
......@@ -1529,11 +1634,11 @@ MultiXactOffsetPagePrecedes(int page1, int page2)
static bool
MultiXactMemberPagePrecedes(int page1, int page2)
{
uint32 offset1;
uint32 offset2;
MultiXactOffset offset1;
MultiXactOffset offset2;
offset1 = ((uint32) page1) * MULTIXACT_MEMBERS_PER_PAGE;
offset2 = ((uint32) page2) * MULTIXACT_MEMBERS_PER_PAGE;
offset1 = ((MultiXactOffset) page1) * MULTIXACT_MEMBERS_PER_PAGE;
offset2 = ((MultiXactOffset) page2) * MULTIXACT_MEMBERS_PER_PAGE;
return MultiXactOffsetPrecedes(offset1, offset2);
}
......@@ -1556,9 +1661,135 @@ MultiXactIdPrecedes(MultiXactId multi1, MultiXactId multi2)
* Decide which of two offsets is earlier.
*/
static bool
MultiXactOffsetPrecedes(uint32 offset1, uint32 offset2)
MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
{
int32 diff = (int32) (offset1 - offset2);
return (diff < 0);
}
/*
* Write an xlog record reflecting the zeroing of either a MEMBERs or
* OFFSETs page (info shows which)
*
* Note: xlog record is marked as outside transaction control, since we
* want it to be redone whether the invoking transaction commits or not.
*/
static void
WriteMZeroPageXlogRec(int pageno, uint8 info)
{
XLogRecData rdata;
rdata.data = (char *) (&pageno);
rdata.len = sizeof(int);
rdata.buffer = InvalidBuffer;
rdata.next = NULL;
(void) XLogInsert(RM_MULTIXACT_ID, info | XLOG_NO_TRAN, &rdata);
}
/*
* MULTIXACT resource manager's routines
*/
void
multixact_redo(XLogRecPtr lsn, XLogRecord *record)
{
uint8 info = record->xl_info & ~XLR_INFO_MASK;
if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
{
int pageno;
int slotno;
memcpy(&pageno, XLogRecGetData(record), sizeof(int));
LWLockAcquire(MultiXactOffsetControlLock, LW_EXCLUSIVE);
slotno = ZeroMultiXactOffsetPage(pageno, false);
SimpleLruWritePage(MultiXactOffsetCtl, slotno, NULL);
Assert(MultiXactOffsetCtl->shared->page_status[slotno] == SLRU_PAGE_CLEAN);
LWLockRelease(MultiXactOffsetControlLock);
}
else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
{
int pageno;
int slotno;
memcpy(&pageno, XLogRecGetData(record), sizeof(int));
LWLockAcquire(MultiXactMemberControlLock, LW_EXCLUSIVE);
slotno = ZeroMultiXactMemberPage(pageno, false);
SimpleLruWritePage(MultiXactMemberCtl, slotno, NULL);
Assert(MultiXactMemberCtl->shared->page_status[slotno] == SLRU_PAGE_CLEAN);
LWLockRelease(MultiXactMemberControlLock);
}
else if (info == XLOG_MULTIXACT_CREATE_ID)
{
xl_multixact_create *xlrec = (xl_multixact_create *) XLogRecGetData(record);
TransactionId *xids = xlrec->xids;
TransactionId max_xid;
int i;
/* Store the data back into the SLRU files */
RecordNewMultiXact(xlrec->mid, xlrec->moff, xlrec->nxids, xids);
/* Make sure nextMXact/nextOffset are beyond what this record has */
MultiXactAdvanceNextMXact(xlrec->mid + 1, xlrec->moff + xlrec->nxids);
/*
* Make sure nextXid is beyond any XID mentioned in the record.
* This should be unnecessary, since any XID found here ought to
* have other evidence in the XLOG, but let's be safe.
*/
max_xid = record->xl_xid;
for (i = 0; i < xlrec->nxids; i++)
{
if (TransactionIdPrecedes(max_xid, xids[i]))
max_xid = xids[i];
}
if (TransactionIdFollowsOrEquals(max_xid,
ShmemVariableCache->nextXid))
{
ShmemVariableCache->nextXid = max_xid;
TransactionIdAdvance(ShmemVariableCache->nextXid);
}
}
else
elog(PANIC, "multixact_redo: unknown op code %u", info);
}
void
multixact_desc(char *buf, uint8 xl_info, char *rec)
{
uint8 info = xl_info & ~XLR_INFO_MASK;
if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
{
int pageno;
memcpy(&pageno, rec, sizeof(int));
sprintf(buf + strlen(buf), "zero offsets page: %d", pageno);
}
else if (info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
{
int pageno;
memcpy(&pageno, rec, sizeof(int));
sprintf(buf + strlen(buf), "zero members page: %d", pageno);
}
else if (info == XLOG_MULTIXACT_CREATE_ID)
{
xl_multixact_create *xlrec = (xl_multixact_create *) rec;
int i;
sprintf(buf + strlen(buf), "create multixact %u offset %u:",
xlrec->mid, xlrec->moff);
for (i = 0; i < xlrec->nxids; i++)
sprintf(buf + strlen(buf), " %u", xlrec->xids[i]);
}
else
strcat(buf, "UNKNOWN");
}
......@@ -3,7 +3,7 @@
*
* Resource managers definition
*
* $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.18 2005/06/06 17:01:22 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.19 2005/06/08 15:50:26 tgl Exp $
*/
#include "postgres.h"
......@@ -11,6 +11,7 @@
#include "access/gist_private.h"
#include "access/hash.h"
#include "access/heapam.h"
#include "access/multixact.h"
#include "access/nbtree.h"
#include "access/rtree.h"
#include "access/xact.h"
......@@ -28,7 +29,7 @@ const RmgrData RmgrTable[RM_MAX_ID + 1] = {
{"CLOG", clog_redo, clog_desc, NULL, NULL},
{"Database", dbase_redo, dbase_desc, NULL, NULL},
{"Tablespace", tblspc_redo, tblspc_desc, NULL, NULL},
{"Reserved 6", NULL, NULL, NULL, NULL},
{"MultiXact", multixact_redo, multixact_desc, NULL, NULL},
{"Reserved 7", NULL, NULL, NULL, NULL},
{"Reserved 8", NULL, NULL, NULL, NULL},
{"Reserved 9", NULL, NULL, NULL, NULL},
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.197 2005/06/06 20:22:57 tgl Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.198 2005/06/08 15:50:26 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -3688,12 +3688,13 @@ BootStrapXLOG(void)
checkPoint.nextXid = FirstNormalTransactionId;
checkPoint.nextOid = FirstBootstrapObjectId;
checkPoint.nextMulti = FirstMultiXactId;
checkPoint.nextMultiOffset = 0;
checkPoint.time = time(NULL);
ShmemVariableCache->nextXid = checkPoint.nextXid;
ShmemVariableCache->nextOid = checkPoint.nextOid;
ShmemVariableCache->oidCount = 0;
MultiXactSetNextMXact(checkPoint.nextMulti);
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
/* Set up the XLOG page header */
page->xlp_magic = XLOG_PAGE_MAGIC;
......@@ -4344,8 +4345,11 @@ StartupXLOG(void)
checkPoint.undo.xlogid, checkPoint.undo.xrecoff,
wasShutdown ? "TRUE" : "FALSE")));
ereport(LOG,
(errmsg("next transaction ID: %u; next OID: %u; next MultiXactId: %u",
checkPoint.nextXid, checkPoint.nextOid, checkPoint.nextMulti)));
(errmsg("next transaction ID: %u; next OID: %u",
checkPoint.nextXid, checkPoint.nextOid)));
ereport(LOG,
(errmsg("next MultiXactId: %u; next MultiXactOffset: %u",
checkPoint.nextMulti, checkPoint.nextMultiOffset)));
if (!TransactionIdIsNormal(checkPoint.nextXid))
ereport(PANIC,
(errmsg("invalid next transaction ID")));
......@@ -4353,7 +4357,7 @@ StartupXLOG(void)
ShmemVariableCache->nextXid = checkPoint.nextXid;
ShmemVariableCache->nextOid = checkPoint.nextOid;
ShmemVariableCache->oidCount = 0;
MultiXactSetNextMXact(checkPoint.nextMulti);
MultiXactSetNextMXact(checkPoint.nextMulti, checkPoint.nextMultiOffset);
/*
* We must replay WAL entries using the same TimeLineID they were
......@@ -5080,7 +5084,9 @@ CreateCheckPoint(bool shutdown, bool force)
checkPoint.nextOid += ShmemVariableCache->oidCount;
LWLockRelease(OidGenLock);
checkPoint.nextMulti = MultiXactGetCheckptMulti(shutdown);
MultiXactGetCheckptMulti(shutdown,
&checkPoint.nextMulti,
&checkPoint.nextMultiOffset);
/*
* Having constructed the checkpoint record, ensure all shmem disk
......@@ -5228,25 +5234,6 @@ XLogPutNextOid(Oid nextOid)
*/
}
/*
* Write a NEXT_MULTIXACT log record
*/
void
XLogPutNextMultiXactId(MultiXactId nextMulti)
{
XLogRecData rdata;
rdata.data = (char *) (&nextMulti);
rdata.len = sizeof(MultiXactId);
rdata.buffer = InvalidBuffer;
rdata.next = NULL;
(void) XLogInsert(RM_XLOG_ID, XLOG_NEXTMULTI, &rdata);
/*
* We do not flush here either; this assumes that heap_lock_tuple() will
* always generate a WAL record. See notes therein.
*/
}
/*
* XLOG resource manager's routines
*/
......@@ -5266,14 +5253,6 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
ShmemVariableCache->oidCount = 0;
}
}
else if (info == XLOG_NEXTMULTI)
{
MultiXactId nextMulti;
memcpy(&nextMulti, XLogRecGetData(record), sizeof(MultiXactId));
MultiXactAdvanceNextMXact(nextMulti);
}
else if (info == XLOG_CHECKPOINT_SHUTDOWN)
{
CheckPoint checkPoint;
......@@ -5283,7 +5262,8 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
ShmemVariableCache->nextXid = checkPoint.nextXid;
ShmemVariableCache->nextOid = checkPoint.nextOid;
ShmemVariableCache->oidCount = 0;
MultiXactSetNextMXact(checkPoint.nextMulti);
MultiXactSetNextMXact(checkPoint.nextMulti,
checkPoint.nextMultiOffset);
/*
* TLI may change in a shutdown checkpoint, but it shouldn't
......@@ -5315,7 +5295,8 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
ShmemVariableCache->nextOid = checkPoint.nextOid;
ShmemVariableCache->oidCount = 0;
}
MultiXactAdvanceNextMXact(checkPoint.nextMulti);
MultiXactAdvanceNextMXact(checkPoint.nextMulti,
checkPoint.nextMultiOffset);
/* TLI should not change in an on-line checkpoint */
if (checkPoint.ThisTimeLineID != ThisTimeLineID)
ereport(PANIC,
......@@ -5335,12 +5316,13 @@ xlog_desc(char *buf, uint8 xl_info, char *rec)
CheckPoint *checkpoint = (CheckPoint *) rec;
sprintf(buf + strlen(buf), "checkpoint: redo %X/%X; undo %X/%X; "
"tli %u; xid %u; oid %u; multi %u; %s",
"tli %u; xid %u; oid %u; multi %u; offset %u; %s",
checkpoint->redo.xlogid, checkpoint->redo.xrecoff,
checkpoint->undo.xlogid, checkpoint->undo.xrecoff,
checkpoint->ThisTimeLineID, checkpoint->nextXid,
checkpoint->nextOid,
checkpoint->nextMulti,
checkpoint->nextMultiOffset,
(info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online");
}
else if (info == XLOG_NEXTOID)
......@@ -5350,13 +5332,6 @@ xlog_desc(char *buf, uint8 xl_info, char *rec)
memcpy(&nextOid, rec, sizeof(Oid));
sprintf(buf + strlen(buf), "nextOid: %u", nextOid);
}
else if (info == XLOG_NEXTMULTI)
{
MultiXactId multi;
memcpy(&multi, rec, sizeof(MultiXactId));
sprintf(buf + strlen(buf), "nextMultiXact: %u", multi);
}
else
strcat(buf, "UNKNOWN");
}
......
......@@ -6,7 +6,7 @@
* copyright (c) Oliver Elphick <olly@lfix.co.uk>, 2001;
* licence: BSD
*
* $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.24 2005/06/02 05:55:29 tgl Exp $
* $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.25 2005/06/08 15:50:27 tgl Exp $
*/
#include "postgres.h"
......@@ -166,6 +166,7 @@ main(int argc, char *argv[])
printf(_("Latest checkpoint's NextXID: %u\n"), ControlFile.checkPointCopy.nextXid);
printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid);
printf(_("Latest checkpoint's NextMultiXactId: %u\n"), ControlFile.checkPointCopy.nextMulti);
printf(_("Latest checkpoint's NextMultiOffset: %u\n"), ControlFile.checkPointCopy.nextMultiOffset);
printf(_("Time of latest checkpoint: %s\n"), ckpttime_str);
printf(_("Database block size: %u\n"), ControlFile.blcksz);
printf(_("Blocks per segment of large relation: %u\n"), ControlFile.relseg_size);
......
......@@ -23,7 +23,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.33 2005/06/02 05:55:29 tgl Exp $
* $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.34 2005/06/08 15:50:27 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -77,6 +77,7 @@ main(int argc, char *argv[])
TransactionId set_xid = 0;
Oid set_oid = 0;
MultiXactId set_mxid = 0;
MultiXactOffset set_mxoff = -1;
uint32 minXlogTli = 0,
minXlogId = 0,
minXlogSeg = 0;
......@@ -106,7 +107,7 @@ main(int argc, char *argv[])
}
while ((c = getopt(argc, argv, "fl:m:no:x:")) != -1)
while ((c = getopt(argc, argv, "fl:m:no:O:x:")) != -1)
{
switch (c)
{
......@@ -163,6 +164,21 @@ main(int argc, char *argv[])
}
break;
case 'O':
set_mxoff = strtoul(optarg, &endptr, 0);
if (endptr == optarg || *endptr != '\0')
{
fprintf(stderr, _("%s: invalid argument for option -O\n"), progname);
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
}
if (set_mxoff == -1)
{
fprintf(stderr, _("%s: multi transaction offset (-O) must not be -1\n"), progname);
exit(1);
}
break;
case 'l':
minXlogTli = strtoul(optarg, &endptr, 0);
if (endptr == optarg || *endptr != ',')
......@@ -265,6 +281,9 @@ main(int argc, char *argv[])
if (set_mxid != 0)
ControlFile.checkPointCopy.nextMulti = set_mxid;
if (set_mxoff != -1)
ControlFile.checkPointCopy.nextMultiOffset = set_mxoff;
if (minXlogTli > ControlFile.checkPointCopy.ThisTimeLineID)
ControlFile.checkPointCopy.ThisTimeLineID = minXlogTli;
......@@ -426,6 +445,7 @@ GuessControlValues(void)
ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */
ControlFile.checkPointCopy.nextOid = FirstBootstrapObjectId;
ControlFile.checkPointCopy.nextMulti = FirstMultiXactId;
ControlFile.checkPointCopy.nextMultiOffset = 0;
ControlFile.checkPointCopy.time = time(NULL);
ControlFile.state = DB_SHUTDOWNED;
......@@ -463,7 +483,7 @@ GuessControlValues(void)
/*
* XXX eventually, should try to grovel through old XLOG to develop
* more accurate values for TimeLineID, nextXID, and nextOID.
* more accurate values for TimeLineID, nextXID, etc.
*/
}
......@@ -500,6 +520,7 @@ PrintControlValues(bool guessed)
printf(_("Latest checkpoint's NextXID: %u\n"), ControlFile.checkPointCopy.nextXid);
printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid);
printf(_("Latest checkpoint's NextMultiXactId: %u\n"), ControlFile.checkPointCopy.nextMulti);
printf(_("Latest checkpoint's NextMultiOffset: %u\n"), ControlFile.checkPointCopy.nextMultiOffset);
printf(_("Database block size: %u\n"), ControlFile.blcksz);
printf(_("Blocks per segment of large relation: %u\n"), ControlFile.relseg_size);
printf(_("Maximum length of identifiers: %u\n"), ControlFile.nameDataLen);
......@@ -777,6 +798,7 @@ usage(void)
printf(_(" -o OID set next OID\n"));
printf(_(" -x XID set next transaction ID\n"));
printf(_(" -m multiXID set next multi transaction ID\n"));
printf(_(" -O multiOffset set next multi transaction offset\n"));
printf(_(" --help show this help, then exit\n"));
printf(_(" --version output version information, then exit\n"));
printf(_("\nReport bugs to <pgsql-bugs@postgresql.org>.\n"));
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/htup.h,v 1.74 2005/04/28 21:47:17 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/htup.h,v 1.75 2005/06/08 15:50:27 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -505,6 +505,8 @@ typedef struct xl_heap_newpage
typedef struct xl_heap_lock
{
xl_heaptid target; /* locked tuple id */
TransactionId locking_xid; /* might be a MultiXactId not xid */
bool xid_is_mxact; /* is it? */
bool shared_lock; /* shared or exclusive row lock? */
} xl_heap_lock;
......
......@@ -6,16 +6,38 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/multixact.h,v 1.2 2005/05/03 19:42:41 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/multixact.h,v 1.3 2005/06/08 15:50:28 tgl Exp $
*/
#ifndef MULTIXACT_H
#define MULTIXACT_H
#include "access/xlog.h"
#define InvalidMultiXactId ((MultiXactId) 0)
#define FirstMultiXactId ((MultiXactId) 1)
#define MultiXactIdIsValid(multi) ((multi) != InvalidMultiXactId)
/* ----------------
* multixact-related XLOG entries
* ----------------
*/
#define XLOG_MULTIXACT_ZERO_OFF_PAGE 0x00
#define XLOG_MULTIXACT_ZERO_MEM_PAGE 0x10
#define XLOG_MULTIXACT_CREATE_ID 0x20
typedef struct xl_multixact_create
{
MultiXactId mid; /* new MultiXact's ID */
MultiXactOffset moff; /* its starting offset in members file */
int32 nxids; /* number of member XIDs */
TransactionId xids[1]; /* VARIABLE LENGTH ARRAY */
} xl_multixact_create;
#define MinSizeOfMultiXactCreate offsetof(xl_multixact_create, xids)
extern MultiXactId MultiXactIdCreate(TransactionId xid1, TransactionId xid2);
extern MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid);
extern bool MultiXactIdIsRunning(MultiXactId multi);
......@@ -29,9 +51,16 @@ extern void MultiXactShmemInit(void);
extern void BootStrapMultiXact(void);
extern void StartupMultiXact(void);
extern void ShutdownMultiXact(void);
extern MultiXactId MultiXactGetCheckptMulti(bool is_shutdown);
extern void MultiXactGetCheckptMulti(bool is_shutdown,
MultiXactId *nextMulti,
MultiXactOffset *nextMultiOffset);
extern void CheckPointMultiXact(void);
extern void MultiXactSetNextMXact(MultiXactId nextMulti);
extern void MultiXactAdvanceNextMXact(MultiXactId minMulti);
extern void MultiXactSetNextMXact(MultiXactId nextMulti,
MultiXactOffset nextMultiOffset);
extern void MultiXactAdvanceNextMXact(MultiXactId minMulti,
MultiXactOffset minMultiOffset);
extern void multixact_redo(XLogRecPtr lsn, XLogRecord *record);
extern void multixact_desc(char *buf, uint8 xl_info, char *rec);
#endif /* MULTIXACT_H */
......@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.64 2005/06/06 20:22:58 tgl Exp $
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.65 2005/06/08 15:50:28 tgl Exp $
*/
#ifndef XLOG_H
#define XLOG_H
......@@ -165,7 +165,6 @@ extern void ShutdownXLOG(int code, Datum arg);
extern void InitXLOGAccess(void);
extern void CreateCheckPoint(bool shutdown, bool force);
extern void XLogPutNextOid(Oid nextOid);
extern void XLogPutNextMultiXactId(MultiXactId multi);
extern XLogRecPtr GetRedoRecPtr(void);
#endif /* XLOG_H */
......@@ -12,7 +12,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/c.h,v 1.184 2005/05/25 21:40:41 momjian Exp $
* $PostgreSQL: pgsql/src/include/c.h,v 1.185 2005/06/08 15:50:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -388,6 +388,8 @@ typedef uint32 SubTransactionId;
/* MultiXactId must be equivalent to TransactionId, to fit in t_xmax */
typedef TransactionId MultiXactId;
typedef uint32 MultiXactOffset;
typedef uint32 CommandId;
#define FirstCommandId ((CommandId) 0)
......
......@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.22 2005/06/02 05:55:29 tgl Exp $
* $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.23 2005/06/08 15:50:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -22,7 +22,7 @@
/* Version identifier for this pg_control format */
#define PG_CONTROL_VERSION 810
#define PG_CONTROL_VERSION 811
/*
* Body of CheckPoint XLOG records. This is declared here because we keep
......@@ -40,13 +40,13 @@ typedef struct CheckPoint
TransactionId nextXid; /* next free XID */
Oid nextOid; /* next free OID */
MultiXactId nextMulti; /* next free MultiXactId */
MultiXactOffset nextMultiOffset; /* next free MultiXact offset */
time_t time; /* time stamp of checkpoint */
} CheckPoint;
/* XLOG info values for XLOG rmgr */
#define XLOG_CHECKPOINT_SHUTDOWN 0x00
#define XLOG_CHECKPOINT_ONLINE 0x10
#define XLOG_NEXTMULTI 0x20
#define XLOG_NEXTOID 0x30
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment