Commit 2042b342 authored by Tom Lane's avatar Tom Lane

Invent WAL timelines, as per recent discussion, to make point-in-time

recovery more manageable.  Also, undo recent change to add FILE_HEADER
and WASTED_SPACE records to XLOG; instead make the XLOG page header
variable-size with extra fields in the first page of an XLOG file.
This should fix the boundary-case bugs observed by Mark Kirkwood.
initdb forced due to change of XLOG representation.
parent 8d3517dc
<!-- <!--
$PostgreSQL: pgsql/doc/src/sgml/page.sgml,v 1.17 2003/12/14 00:10:32 neilc Exp $ $PostgreSQL: pgsql/doc/src/sgml/page.sgml,v 1.18 2004/07/21 22:31:18 tgl Exp $
--> -->
<chapter id="page"> <chapter id="page">
...@@ -114,37 +114,38 @@ data. Empty in ordinary tables.</entry> ...@@ -114,37 +114,38 @@ data. Empty in ordinary tables.</entry>
<entry>pd_lsn</entry> <entry>pd_lsn</entry>
<entry>XLogRecPtr</entry> <entry>XLogRecPtr</entry>
<entry>8 bytes</entry> <entry>8 bytes</entry>
<entry>LSN: next byte after last byte of xlog</entry> <entry>LSN: next byte after last byte of xlog record for last change
to this page</entry>
</row> </row>
<row> <row>
<entry>pd_sui</entry> <entry>pd_tli</entry>
<entry>StartUpID</entry> <entry>TimeLineID</entry>
<entry>4 bytes</entry> <entry>4 bytes</entry>
<entry>SUI of last changes (currently it's used by heap AM only)</entry> <entry>TLI of last change</entry>
</row> </row>
<row> <row>
<entry>pd_lower</entry> <entry>pd_lower</entry>
<entry>LocationIndex</entry> <entry>LocationIndex</entry>
<entry>2 bytes</entry> <entry>2 bytes</entry>
<entry>Offset to start of free space.</entry> <entry>Offset to start of free space</entry>
</row> </row>
<row> <row>
<entry>pd_upper</entry> <entry>pd_upper</entry>
<entry>LocationIndex</entry> <entry>LocationIndex</entry>
<entry>2 bytes</entry> <entry>2 bytes</entry>
<entry>Offset to end of free space.</entry> <entry>Offset to end of free space</entry>
</row> </row>
<row> <row>
<entry>pd_special</entry> <entry>pd_special</entry>
<entry>LocationIndex</entry> <entry>LocationIndex</entry>
<entry>2 bytes</entry> <entry>2 bytes</entry>
<entry>Offset to start of special space.</entry> <entry>Offset to start of special space</entry>
</row> </row>
<row> <row>
<entry>pd_pagesize_version</entry> <entry>pd_pagesize_version</entry>
<entry>uint16</entry> <entry>uint16</entry>
<entry>2 bytes</entry> <entry>2 bytes</entry>
<entry>Page size and layout version number information.</entry> <entry>Page size and layout version number information</entry>
</row> </row>
</tbody> </tbody>
</tgroup> </tgroup>
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.170 2004/07/11 18:01:44 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.171 2004/07/21 22:31:19 tgl Exp $
* *
* *
* INTERFACE ROUTINES * INTERFACE ROUTINES
...@@ -1214,7 +1214,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid) ...@@ -1214,7 +1214,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid)
recptr = XLogInsert(RM_HEAP_ID, info, rdata); recptr = XLogInsert(RM_HEAP_ID, info, rdata);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
} }
else else
{ {
...@@ -1390,7 +1390,7 @@ l1: ...@@ -1390,7 +1390,7 @@ l1:
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE, rdata); recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_DELETE, rdata);
PageSetLSN(dp, recptr); PageSetLSN(dp, recptr);
PageSetSUI(dp, ThisStartUpID); PageSetTLI(dp, ThisTimeLineID);
} }
else else
{ {
...@@ -1748,10 +1748,10 @@ l2: ...@@ -1748,10 +1748,10 @@ l2:
if (newbuf != buffer) if (newbuf != buffer)
{ {
PageSetLSN(BufferGetPage(newbuf), recptr); PageSetLSN(BufferGetPage(newbuf), recptr);
PageSetSUI(BufferGetPage(newbuf), ThisStartUpID); PageSetTLI(BufferGetPage(newbuf), ThisTimeLineID);
} }
PageSetLSN(BufferGetPage(buffer), recptr); PageSetLSN(BufferGetPage(buffer), recptr);
PageSetSUI(BufferGetPage(buffer), ThisStartUpID); PageSetTLI(BufferGetPage(buffer), ThisTimeLineID);
} }
else else
{ {
...@@ -1902,7 +1902,7 @@ l3: ...@@ -1902,7 +1902,7 @@ l3:
* XLOG stuff: no logging is required as long as we have no * XLOG stuff: no logging is required as long as we have no
* savepoints. For savepoints private log could be used... * savepoints. For savepoints private log could be used...
*/ */
PageSetSUI(BufferGetPage(*buffer), ThisStartUpID); PageSetTLI(BufferGetPage(*buffer), ThisTimeLineID);
/* store transaction information of xact marking the tuple */ /* store transaction information of xact marking the tuple */
tuple->t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED | tuple->t_data->t_infomask &= ~(HEAP_XMAX_COMMITTED |
...@@ -2184,7 +2184,7 @@ heap_xlog_clean(bool redo, XLogRecPtr lsn, XLogRecord *record) ...@@ -2184,7 +2184,7 @@ heap_xlog_clean(bool redo, XLogRecPtr lsn, XLogRecord *record)
PageRepairFragmentation(page, NULL); PageRepairFragmentation(page, NULL);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID); /* prev sui */ PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
} }
...@@ -2217,7 +2217,7 @@ heap_xlog_newpage(bool redo, XLogRecPtr lsn, XLogRecord *record) ...@@ -2217,7 +2217,7 @@ heap_xlog_newpage(bool redo, XLogRecPtr lsn, XLogRecord *record)
memcpy(page, (char *) xlrec + SizeOfHeapNewpage, BLCKSZ); memcpy(page, (char *) xlrec + SizeOfHeapNewpage, BLCKSZ);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
} }
...@@ -2283,7 +2283,7 @@ heap_xlog_delete(bool redo, XLogRecPtr lsn, XLogRecord *record) ...@@ -2283,7 +2283,7 @@ heap_xlog_delete(bool redo, XLogRecPtr lsn, XLogRecord *record)
/* Make sure there is no forward chain link in t_ctid */ /* Make sure there is no forward chain link in t_ctid */
htup->t_ctid = xlrec->target.tid; htup->t_ctid = xlrec->target.tid;
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
return; return;
...@@ -2368,7 +2368,7 @@ heap_xlog_insert(bool redo, XLogRecPtr lsn, XLogRecord *record) ...@@ -2368,7 +2368,7 @@ heap_xlog_insert(bool redo, XLogRecPtr lsn, XLogRecord *record)
if (offnum == InvalidOffsetNumber) if (offnum == InvalidOffsetNumber)
elog(PANIC, "heap_insert_redo: failed to add tuple"); elog(PANIC, "heap_insert_redo: failed to add tuple");
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID); /* prev sui */ PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
return; return;
...@@ -2466,7 +2466,7 @@ heap_xlog_update(bool redo, XLogRecPtr lsn, XLogRecord *record, bool move) ...@@ -2466,7 +2466,7 @@ heap_xlog_update(bool redo, XLogRecPtr lsn, XLogRecord *record, bool move)
if (samepage) if (samepage)
goto newsame; goto newsame;
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
goto newt; goto newt;
...@@ -2564,7 +2564,7 @@ newsame:; ...@@ -2564,7 +2564,7 @@ newsame:;
if (offnum == InvalidOffsetNumber) if (offnum == InvalidOffsetNumber)
elog(PANIC, "heap_update_redo: failed to add tuple"); elog(PANIC, "heap_update_redo: failed to add tuple");
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID); /* prev sui */ PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
return; return;
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.112 2004/04/21 18:24:25 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.113 2004/07/21 22:31:19 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -621,11 +621,11 @@ _bt_insertonpg(Relation rel, ...@@ -621,11 +621,11 @@ _bt_insertonpg(Relation rel,
if (BufferIsValid(metabuf)) if (BufferIsValid(metabuf))
{ {
PageSetLSN(metapg, recptr); PageSetLSN(metapg, recptr);
PageSetSUI(metapg, ThisStartUpID); PageSetTLI(metapg, ThisTimeLineID);
} }
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
} }
END_CRIT_SECTION(); END_CRIT_SECTION();
...@@ -903,13 +903,13 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright, ...@@ -903,13 +903,13 @@ _bt_split(Relation rel, Buffer buf, OffsetNumber firstright,
recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata); recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
PageSetLSN(leftpage, recptr); PageSetLSN(leftpage, recptr);
PageSetSUI(leftpage, ThisStartUpID); PageSetTLI(leftpage, ThisTimeLineID);
PageSetLSN(rightpage, recptr); PageSetLSN(rightpage, recptr);
PageSetSUI(rightpage, ThisStartUpID); PageSetTLI(rightpage, ThisTimeLineID);
if (!P_RIGHTMOST(ropaque)) if (!P_RIGHTMOST(ropaque))
{ {
PageSetLSN(spage, recptr); PageSetLSN(spage, recptr);
PageSetSUI(spage, ThisStartUpID); PageSetTLI(spage, ThisTimeLineID);
} }
} }
...@@ -1494,13 +1494,13 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf) ...@@ -1494,13 +1494,13 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, rdata); recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, rdata);
PageSetLSN(rootpage, recptr); PageSetLSN(rootpage, recptr);
PageSetSUI(rootpage, ThisStartUpID); PageSetTLI(rootpage, ThisTimeLineID);
PageSetLSN(metapg, recptr); PageSetLSN(metapg, recptr);
PageSetSUI(metapg, ThisStartUpID); PageSetTLI(metapg, ThisTimeLineID);
PageSetLSN(lpage, recptr); PageSetLSN(lpage, recptr);
PageSetSUI(lpage, ThisStartUpID); PageSetTLI(lpage, ThisTimeLineID);
PageSetLSN(rpage, recptr); PageSetLSN(rpage, recptr);
PageSetSUI(rpage, ThisStartUpID); PageSetTLI(rpage, ThisTimeLineID);
} }
END_CRIT_SECTION(); END_CRIT_SECTION();
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.76 2004/06/02 17:28:17 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtpage.c,v 1.77 2004/07/21 22:31:20 tgl Exp $
* *
* NOTES * NOTES
* Postgres btree pages look like ordinary relation pages. The opaque * Postgres btree pages look like ordinary relation pages. The opaque
...@@ -84,7 +84,7 @@ _bt_metapinit(Relation rel) ...@@ -84,7 +84,7 @@ _bt_metapinit(Relation rel)
rdata); rdata);
PageSetLSN(pg, recptr); PageSetLSN(pg, recptr);
PageSetSUI(pg, ThisStartUpID); PageSetTLI(pg, ThisTimeLineID);
} }
END_CRIT_SECTION(); END_CRIT_SECTION();
...@@ -249,9 +249,9 @@ _bt_getroot(Relation rel, int access) ...@@ -249,9 +249,9 @@ _bt_getroot(Relation rel, int access)
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, &rdata); recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, &rdata);
PageSetLSN(rootpage, recptr); PageSetLSN(rootpage, recptr);
PageSetSUI(rootpage, ThisStartUpID); PageSetTLI(rootpage, ThisTimeLineID);
PageSetLSN(metapg, recptr); PageSetLSN(metapg, recptr);
PageSetSUI(metapg, ThisStartUpID); PageSetTLI(metapg, ThisTimeLineID);
} }
END_CRIT_SECTION(); END_CRIT_SECTION();
...@@ -686,7 +686,7 @@ _bt_delitems(Relation rel, Buffer buf, ...@@ -686,7 +686,7 @@ _bt_delitems(Relation rel, Buffer buf,
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata); recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
} }
END_CRIT_SECTION(); END_CRIT_SECTION();
...@@ -1080,22 +1080,22 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full) ...@@ -1080,22 +1080,22 @@ _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full)
if (BufferIsValid(metabuf)) if (BufferIsValid(metabuf))
{ {
PageSetLSN(metapg, recptr); PageSetLSN(metapg, recptr);
PageSetSUI(metapg, ThisStartUpID); PageSetTLI(metapg, ThisTimeLineID);
} }
page = BufferGetPage(pbuf); page = BufferGetPage(pbuf);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
page = BufferGetPage(rbuf); page = BufferGetPage(rbuf);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
page = BufferGetPage(buf); page = BufferGetPage(buf);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
if (BufferIsValid(lbuf)) if (BufferIsValid(lbuf))
{ {
page = BufferGetPage(lbuf); page = BufferGetPage(lbuf);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
} }
} }
......
...@@ -56,7 +56,7 @@ ...@@ -56,7 +56,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.84 2004/07/19 02:47:03 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.85 2004/07/21 22:31:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -299,14 +299,14 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno) ...@@ -299,14 +299,14 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata); recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
END_CRIT_SECTION(); END_CRIT_SECTION();
} }
else else
{ {
/* Leave the page LSN zero if not WAL-logged, but set SUI anyway */ /* Leave the page LSN zero if not WAL-logged, but set TLI anyway */
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
} }
/* /*
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.15 2004/07/11 18:01:45 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/nbtree/nbtxlog.c,v 1.16 2004/07/21 22:31:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -136,7 +136,7 @@ _bt_restore_meta(Relation reln, XLogRecPtr lsn, ...@@ -136,7 +136,7 @@ _bt_restore_meta(Relation reln, XLogRecPtr lsn,
pageop->btpo_flags = BTP_META; pageop->btpo_flags = BTP_META;
PageSetLSN(metapg, lsn); PageSetLSN(metapg, lsn);
PageSetSUI(metapg, ThisStartUpID); PageSetTLI(metapg, ThisTimeLineID);
LockBuffer(metabuf, BUFFER_LOCK_UNLOCK); LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
WriteBuffer(metabuf); WriteBuffer(metabuf);
} }
...@@ -197,7 +197,7 @@ btree_xlog_insert(bool redo, bool isleaf, bool ismeta, ...@@ -197,7 +197,7 @@ btree_xlog_insert(bool redo, bool isleaf, bool ismeta,
elog(PANIC, "btree_insert_redo: failed to add item"); elog(PANIC, "btree_insert_redo: failed to add item");
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
} }
...@@ -281,7 +281,7 @@ btree_xlog_split(bool redo, bool onleft, bool isroot, ...@@ -281,7 +281,7 @@ btree_xlog_split(bool redo, bool onleft, bool isroot,
xlrec->leftlen); xlrec->leftlen);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
} }
...@@ -317,7 +317,7 @@ btree_xlog_split(bool redo, bool onleft, bool isroot, ...@@ -317,7 +317,7 @@ btree_xlog_split(bool redo, bool onleft, bool isroot,
record->xl_len - SizeOfBtreeSplit - xlrec->leftlen); record->xl_len - SizeOfBtreeSplit - xlrec->leftlen);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
} }
...@@ -353,7 +353,7 @@ btree_xlog_split(bool redo, bool onleft, bool isroot, ...@@ -353,7 +353,7 @@ btree_xlog_split(bool redo, bool onleft, bool isroot,
pageop->btpo_prev = rightsib; pageop->btpo_prev = rightsib;
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
} }
...@@ -420,7 +420,7 @@ btree_xlog_delete(bool redo, XLogRecPtr lsn, XLogRecord *record) ...@@ -420,7 +420,7 @@ btree_xlog_delete(bool redo, XLogRecPtr lsn, XLogRecord *record)
} }
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
} }
...@@ -489,7 +489,7 @@ btree_xlog_delete_page(bool redo, bool ismeta, ...@@ -489,7 +489,7 @@ btree_xlog_delete_page(bool redo, bool ismeta,
} }
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
} }
...@@ -515,7 +515,7 @@ btree_xlog_delete_page(bool redo, bool ismeta, ...@@ -515,7 +515,7 @@ btree_xlog_delete_page(bool redo, bool ismeta,
pageop->btpo_prev = leftsib; pageop->btpo_prev = leftsib;
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
} }
...@@ -543,7 +543,7 @@ btree_xlog_delete_page(bool redo, bool ismeta, ...@@ -543,7 +543,7 @@ btree_xlog_delete_page(bool redo, bool ismeta,
pageop->btpo_next = rightsib; pageop->btpo_next = rightsib;
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
} }
...@@ -569,7 +569,7 @@ btree_xlog_delete_page(bool redo, bool ismeta, ...@@ -569,7 +569,7 @@ btree_xlog_delete_page(bool redo, bool ismeta,
pageop->btpo_flags = BTP_DELETED; pageop->btpo_flags = BTP_DELETED;
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
} }
...@@ -632,7 +632,7 @@ btree_xlog_newroot(bool redo, XLogRecPtr lsn, XLogRecord *record) ...@@ -632,7 +632,7 @@ btree_xlog_newroot(bool redo, XLogRecPtr lsn, XLogRecord *record)
record->xl_len - SizeOfBtreeNewroot); record->xl_len - SizeOfBtreeNewroot);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
......
...@@ -63,4 +63,12 @@ ...@@ -63,4 +63,12 @@
# #
#recovery_target_inclusive = 'true' # 'true' or 'false' #recovery_target_inclusive = 'true' # 'true' or 'false'
# #
#
# If you want to recover into a timeline other than the "main line" shown in
# pg_control, specify the timeline number here, or write 'latest' to get
# the latest branch for which there's a history file.
#
#recovery_target_timeline = '33' # number or 'latest'
#
#
#--------------------------------------------------------------------------- #---------------------------------------------------------------------------
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
* *
* Resource managers definition * Resource managers definition
* *
* $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.13 2004/07/01 00:49:42 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.14 2004/07/21 22:31:20 tgl Exp $
*/ */
#include "postgres.h" #include "postgres.h"
...@@ -14,12 +14,12 @@ ...@@ -14,12 +14,12 @@
#include "access/rtree.h" #include "access/rtree.h"
#include "access/slru.h" #include "access/slru.h"
#include "access/xact.h" #include "access/xact.h"
#include "access/xlog.h" #include "access/xlog_internal.h"
#include "storage/smgr.h" #include "storage/smgr.h"
#include "commands/sequence.h" #include "commands/sequence.h"
RmgrData RmgrTable[RM_MAX_ID + 1] = { const RmgrData RmgrTable[RM_MAX_ID + 1] = {
{"XLOG", xlog_redo, xlog_undo, xlog_desc, NULL, NULL}, {"XLOG", xlog_redo, xlog_undo, xlog_desc, NULL, NULL},
{"Transaction", xact_redo, xact_undo, xact_desc, NULL, NULL}, {"Transaction", xact_redo, xact_undo, xact_desc, NULL, NULL},
{"Storage", smgr_redo, smgr_undo, smgr_desc, NULL, NULL}, {"Storage", smgr_redo, smgr_undo, smgr_desc, NULL, NULL},
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.17 2004/07/01 00:49:42 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/slru.c,v 1.18 2004/07/21 22:31:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "postmaster/bgwriter.h" #include "postmaster/bgwriter.h"
#include "storage/fd.h" #include "storage/fd.h"
#include "storage/lwlock.h" #include "storage/lwlock.h"
#include "storage/shmem.h"
#include "miscadmin.h" #include "miscadmin.h"
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.149 2004/07/19 14:34:39 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.150 2004/07/21 22:31:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -24,12 +24,13 @@ ...@@ -24,12 +24,13 @@
#include "access/clog.h" #include "access/clog.h"
#include "access/subtrans.h" #include "access/subtrans.h"
#include "access/transam.h"
#include "access/xact.h" #include "access/xact.h"
#include "access/xlog.h" #include "access/xlog.h"
#include "access/xlog_internal.h"
#include "access/xlogutils.h" #include "access/xlogutils.h"
#include "catalog/catversion.h" #include "catalog/catversion.h"
#include "catalog/pg_control.h" #include "catalog/pg_control.h"
#include "miscadmin.h"
#include "postmaster/bgwriter.h" #include "postmaster/bgwriter.h"
#include "storage/bufpage.h" #include "storage/bufpage.h"
#include "storage/fd.h" #include "storage/fd.h"
...@@ -41,7 +42,6 @@ ...@@ -41,7 +42,6 @@
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/guc.h" #include "utils/guc.h"
#include "utils/relcache.h" #include "utils/relcache.h"
#include "miscadmin.h"
/* /*
...@@ -121,25 +121,57 @@ static int open_sync_bit = DEFAULT_SYNC_FLAGBIT; ...@@ -121,25 +121,57 @@ static int open_sync_bit = DEFAULT_SYNC_FLAGBIT;
/* /*
* ThisStartUpID will be same in all backends --- it identifies current * ThisTimeLineID will be same in all backends --- it identifies current
* instance of the database system. * WAL timeline for the database system.
*/ */
StartUpID ThisStartUpID = 0; TimeLineID ThisTimeLineID = 0;
/* Are we doing recovery from XLOG? */ /* Are we doing recovery from XLOG? */
bool InRecovery = false; bool InRecovery = false;
/* Are we recovering using offline XLOG archives? */ /* Are we recovering using offline XLOG archives? */
static bool InArchiveRecovery = false; static bool InArchiveRecovery = false;
/* Was the last file restored from archive, or local? */ /* Was the last xlog file restored from archive, or local? */
static bool restoredFromArchive = false; static bool restoredFromArchive = false;
static char recoveryRestoreCommand[MAXPGPATH]; /* options taken from recovery.conf */
static char *recoveryRestoreCommand = NULL;
static bool recoveryTarget = false; static bool recoveryTarget = false;
static bool recoveryTargetExact = false; static bool recoveryTargetExact = false;
static bool recoveryTargetInclusive = true; static bool recoveryTargetInclusive = true;
static TransactionId recoveryTargetXid; static TransactionId recoveryTargetXid;
static time_t recoveryTargetTime; static time_t recoveryTargetTime;
/* if recoveryStopsHere returns true, it saves actual stop xid/time here */
static TransactionId recoveryStopXid;
static time_t recoveryStopTime;
static bool recoveryStopAfter;
/*
* During normal operation, the only timeline we care about is ThisTimeLineID.
* During recovery, however, things are more complicated. To simplify life
* for rmgr code, we keep ThisTimeLineID set to the "current" timeline as we
* scan through the WAL history (that is, it is the line that was active when
* the currently-scanned WAL record was generated). We also need these
* timeline values:
*
* recoveryTargetTLI: the desired timeline that we want to end in.
*
* expectedTLIs: an integer list of recoveryTargetTLI and the TLIs of
* its known parents, newest first (so recoveryTargetTLI is always the
* first list member). Only these TLIs are expected to be seen in the WAL
* segments we read, and indeed only these TLIs will be considered as
* candidate WAL files to open at all.
*
* curFileTLI: the TLI appearing in the name of the current input WAL file.
* (This is not necessarily the same as ThisTimeLineID, because we could
* be scanning data that was copied from an ancestor timeline when the current
* file was created.) During a sequential scan we do not allow this value
* to decrease.
*/
static TimeLineID recoveryTargetTLI;
static List *expectedTLIs;
static TimeLineID curFileTLI;
/* /*
* MyLastRecPtr points to the start of the last XLOG record inserted by the * MyLastRecPtr points to the start of the last XLOG record inserted by the
* current transaction. If MyLastRecPtr.xrecoff == 0, then the current * current transaction. If MyLastRecPtr.xrecoff == 0, then the current
...@@ -242,12 +274,19 @@ static XLogRecPtr RedoRecPtr; ...@@ -242,12 +274,19 @@ static XLogRecPtr RedoRecPtr;
* *
*---------- *----------
*/ */
typedef struct XLogwrtRqst typedef struct XLogwrtRqst
{ {
XLogRecPtr Write; /* last byte + 1 to write out */ XLogRecPtr Write; /* last byte + 1 to write out */
XLogRecPtr Flush; /* last byte + 1 to flush */ XLogRecPtr Flush; /* last byte + 1 to flush */
} XLogwrtRqst; } XLogwrtRqst;
typedef struct XLogwrtResult
{
XLogRecPtr Write; /* last byte + 1 written out */
XLogRecPtr Flush; /* last byte + 1 flushed */
} XLogwrtResult;
/* /*
* Shared state data for XLogInsert. * Shared state data for XLogInsert.
*/ */
...@@ -293,7 +332,7 @@ typedef struct XLogCtlData ...@@ -293,7 +332,7 @@ typedef struct XLogCtlData
XLogRecPtr *xlblocks; /* 1st byte ptr-s + BLCKSZ */ XLogRecPtr *xlblocks; /* 1st byte ptr-s + BLCKSZ */
uint32 XLogCacheByte; /* # bytes in xlog buffers */ uint32 XLogCacheByte; /* # bytes in xlog buffers */
uint32 XLogCacheBlck; /* highest allocated xlog buffer index */ uint32 XLogCacheBlck; /* highest allocated xlog buffer index */
StartUpID ThisStartUpID; TimeLineID ThisTimeLineID;
slock_t info_lck; /* locks shared LogwrtRqst/LogwrtResult */ slock_t info_lck; /* locks shared LogwrtRqst/LogwrtResult */
} XLogCtlData; } XLogCtlData;
...@@ -323,99 +362,15 @@ static ControlFileData *ControlFile = NULL; ...@@ -323,99 +362,15 @@ static ControlFileData *ControlFile = NULL;
XLogCtl->xlblocks[curridx].xrecoff - INSERT_FREESPACE(Insert) \ XLogCtl->xlblocks[curridx].xrecoff - INSERT_FREESPACE(Insert) \
) )
/* Increment an xlogid/segment pair */
#define NextLogSeg(logId, logSeg) \
do { \
if ((logSeg) >= XLogSegsPerFile-1) \
{ \
(logId)++; \
(logSeg) = 0; \
} \
else \
(logSeg)++; \
} while (0)
/* Decrement an xlogid/segment pair (assume it's not 0,0) */
#define PrevLogSeg(logId, logSeg) \
do { \
if (logSeg) \
(logSeg)--; \
else \
{ \
(logId)--; \
(logSeg) = XLogSegsPerFile-1; \
} \
} while (0)
/*
* Compute ID and segment from an XLogRecPtr.
*
* For XLByteToSeg, do the computation at face value. For XLByteToPrevSeg,
* a boundary byte is taken to be in the previous segment. This is suitable
* for deciding which segment to write given a pointer to a record end,
* for example. (We can assume xrecoff is not zero, since no valid recptr
* can have that.)
*/
#define XLByteToSeg(xlrp, logId, logSeg) \
( logId = (xlrp).xlogid, \
logSeg = (xlrp).xrecoff / XLogSegSize \
)
#define XLByteToPrevSeg(xlrp, logId, logSeg) \
( logId = (xlrp).xlogid, \
logSeg = ((xlrp).xrecoff - 1) / XLogSegSize \
)
/*
* Is an XLogRecPtr within a particular XLOG segment?
*
* For XLByteInSeg, do the computation at face value. For XLByteInPrevSeg,
* a boundary byte is taken to be in the previous segment.
*/
#define XLByteInSeg(xlrp, logId, logSeg) \
((xlrp).xlogid == (logId) && \
(xlrp).xrecoff / XLogSegSize == (logSeg))
#define XLByteInPrevSeg(xlrp, logId, logSeg) \
((xlrp).xlogid == (logId) && \
((xlrp).xrecoff - 1) / XLogSegSize == (logSeg))
#define PrevBufIdx(idx) \ #define PrevBufIdx(idx) \
(((idx) == 0) ? XLogCtl->XLogCacheBlck : ((idx) - 1)) (((idx) == 0) ? XLogCtl->XLogCacheBlck : ((idx) - 1))
#define NextBufIdx(idx) \ #define NextBufIdx(idx) \
(((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1)) (((idx) == XLogCtl->XLogCacheBlck) ? 0 : ((idx) + 1))
#define XRecOffIsValid(xrecoff) \
((xrecoff) % BLCKSZ >= SizeOfXLogPHD && \
(BLCKSZ - (xrecoff) % BLCKSZ) >= SizeOfXLogRecord)
/*
* These macros encapsulate knowledge about the exact layout of XLog file
* names as well as archive-status file names.
*/
#define MAXFNAMELEN 32
#define XLogFileName(fname, log, seg) \
snprintf(fname, MAXFNAMELEN, "%08X%08X", log, seg)
#define XLogFilePath(path, log, seg) \
snprintf(path, MAXPGPATH, "%s/%08X%08X", XLogDir, log, seg)
#define StatusFilePath(path, xlog, suffix) \
snprintf(path, MAXPGPATH, "%s/archive_status/%s%s", XLogDir, xlog, suffix)
/*
* _INTL_MAXLOGRECSZ: max space needed for a record including header and
* any backup-block data.
*/
#define _INTL_MAXLOGRECSZ (SizeOfXLogRecord + MAXLOGRECSZ + \
XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
/* File path names */ /* File path names */
static char XLogDir[MAXPGPATH]; char XLogDir[MAXPGPATH];
static char ControlFilePath[MAXPGPATH]; static char ControlFilePath[MAXPGPATH];
/* /*
...@@ -453,36 +408,44 @@ static char *readBuf = NULL; ...@@ -453,36 +408,44 @@ static char *readBuf = NULL;
static XLogRecPtr ReadRecPtr; static XLogRecPtr ReadRecPtr;
static XLogRecPtr EndRecPtr; static XLogRecPtr EndRecPtr;
static XLogRecord *nextRecord = NULL; static XLogRecord *nextRecord = NULL;
static StartUpID lastReadSUI; static TimeLineID lastPageTLI = 0;
static bool InRedo = false; static bool InRedo = false;
static void XLogArchiveNotify(const char *xlog); static void XLogArchiveNotify(const char *xlog);
static void XLogArchiveNotifySeg(uint32 log, uint32 seg); static void XLogArchiveNotifySeg(uint32 log, uint32 seg);
static bool XLogArchiveIsDone(const char *xlog); static bool XLogArchiveIsDone(const char *xlog);
static void XLogArchiveCleanup(const char *xlog); static void XLogArchiveCleanup(const char *xlog);
static void readRecoveryCommandFile(void); static void readRecoveryCommandFile(void);
static void exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, static void exitArchiveRecovery(TimeLineID endTLI,
uint32 xrecoff); uint32 endLogId, uint32 endLogSeg);
static bool recoveryStopsHere(XLogRecord *record, bool *includeThis); static bool recoveryStopsHere(XLogRecord *record, bool *includeThis);
static bool AdvanceXLInsertBuffer(void); static bool AdvanceXLInsertBuffer(void);
static bool WasteXLInsertBuffer(void);
static void XLogWrite(XLogwrtRqst WriteRqst); static void XLogWrite(XLogwrtRqst WriteRqst);
static int XLogFileInit(uint32 log, uint32 seg, static int XLogFileInit(uint32 log, uint32 seg,
bool *use_existent, bool use_lock); bool *use_existent, bool use_lock);
static bool InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath, static bool InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath,
bool find_free, int max_advance, bool find_free, int max_advance,
bool use_lock); bool use_lock);
static int XLogFileOpen(uint32 log, uint32 seg, bool econt); static int XLogFileOpen(uint32 log, uint32 seg);
static void RestoreArchivedXLog(char *path, uint32 log, uint32 seg); static int XLogFileRead(uint32 log, uint32 seg, int emode);
static bool RestoreArchivedFile(char *path, const char *xlogfname,
const char *recovername);
static void PreallocXlogFiles(XLogRecPtr endptr); static void PreallocXlogFiles(XLogRecPtr endptr);
static void MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr); static void MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr);
static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer); static XLogRecord *ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer);
static bool ValidXLOGHeader(XLogPageHeader hdr, int emode, bool checkSUI); static bool ValidXLOGHeader(XLogPageHeader hdr, int emode);
static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr, static XLogRecord *ReadCheckpointRecord(XLogRecPtr RecPtr,
int whichChkpt, int whichChkpt,
char *buffer); char *buffer);
static List *readTimeLineHistory(TimeLineID targetTLI);
static bool existsTimeLineHistory(TimeLineID probeTLI);
static TimeLineID findNewestTimeLine(TimeLineID startTLI);
static void writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
TimeLineID endTLI,
uint32 endLogId, uint32 endLogSeg);
static void WriteControlFile(void); static void WriteControlFile(void);
static void ReadControlFile(void); static void ReadControlFile(void);
static char *str_time(time_t tnow); static char *str_time(time_t tnow);
...@@ -546,7 +509,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata) ...@@ -546,7 +509,7 @@ XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata)
if (IsBootstrapProcessingMode() && rmid != RM_XLOG_ID) if (IsBootstrapProcessingMode() && rmid != RM_XLOG_ID)
{ {
RecPtr.xlogid = 0; RecPtr.xlogid = 0;
RecPtr.xrecoff = SizeOfXLogPHD; /* start of 1st checkpoint record */ RecPtr.xrecoff = SizeOfXLogLongPHD; /* start of 1st chkpt record */
return (RecPtr); return (RecPtr);
} }
...@@ -755,16 +718,9 @@ begin:; ...@@ -755,16 +718,9 @@ begin:;
} }
/* /*
* Determine exactly where we will place the new XLOG record. If there * If there isn't enough space on the current XLOG page for a record
* isn't enough space on the current XLOG page for a record header, * header, advance to the next page (leaving the unused space as zeroes).
* advance to the next page (leaving the unused space as zeroes).
* If there isn't enough space in the current XLOG segment for the whole
* record, advance to the next segment (inserting wasted-space records).
* This avoids needing a continuation record at the start of a segment
* file, which would conflict with placing a FILE_HEADER record there.
* We assume that no XLOG record can be larger than a segment file...
*/ */
updrqst = false; updrqst = false;
freespace = INSERT_FREESPACE(Insert); freespace = INSERT_FREESPACE(Insert);
if (freespace < SizeOfXLogRecord) if (freespace < SizeOfXLogRecord)
...@@ -773,27 +729,6 @@ begin:; ...@@ -773,27 +729,6 @@ begin:;
freespace = INSERT_FREESPACE(Insert); freespace = INSERT_FREESPACE(Insert);
} }
if (freespace < (uint32) (SizeOfXLogRecord + write_len))
{
/* Doesn't fit on this page, so check for overrunning the file */
uint32 avail;
/* First figure the space available in remaining pages of file */
avail = XLogSegSize - BLCKSZ -
(Insert->currpage->xlp_pageaddr.xrecoff % XLogSegSize);
avail /= BLCKSZ; /* convert to pages, then usable bytes */
avail *= (BLCKSZ - SizeOfXLogPHD - SizeOfXLogContRecord);
avail += freespace; /* add in the current page too */
if (avail < (uint32) (SizeOfXLogRecord + write_len))
{
/* It overruns the file, so waste the rest of the file... */
do {
updrqst = WasteXLInsertBuffer();
} while ((Insert->currpage->xlp_pageaddr.xrecoff % XLogSegSize) != 0);
freespace = INSERT_FREESPACE(Insert);
}
}
curridx = Insert->curridx; curridx = Insert->curridx;
record = (XLogRecord *) Insert->currpos; record = (XLogRecord *) Insert->currpos;
...@@ -891,14 +826,12 @@ begin:; ...@@ -891,14 +826,12 @@ begin:;
/* Use next buffer */ /* Use next buffer */
updrqst = AdvanceXLInsertBuffer(); updrqst = AdvanceXLInsertBuffer();
curridx = Insert->curridx; curridx = Insert->curridx;
/* This assert checks we did not insert a file header record */
Assert(INSERT_FREESPACE(Insert) == BLCKSZ - SizeOfXLogPHD);
/* Insert cont-record header */ /* Insert cont-record header */
Insert->currpage->xlp_info |= XLP_FIRST_IS_CONTRECORD; Insert->currpage->xlp_info |= XLP_FIRST_IS_CONTRECORD;
contrecord = (XLogContRecord *) Insert->currpos; contrecord = (XLogContRecord *) Insert->currpos;
contrecord->xl_rem_len = write_len; contrecord->xl_rem_len = write_len;
Insert->currpos += SizeOfXLogContRecord; Insert->currpos += SizeOfXLogContRecord;
freespace = BLCKSZ - SizeOfXLogPHD - SizeOfXLogContRecord; freespace = INSERT_FREESPACE(Insert);
} }
/* Ensure next record will be properly aligned */ /* Ensure next record will be properly aligned */
...@@ -949,9 +882,9 @@ begin:; ...@@ -949,9 +882,9 @@ begin:;
* Create an archive notification file * Create an archive notification file
* *
* The name of the notification file is the message that will be picked up * The name of the notification file is the message that will be picked up
* by the archiver, e.g. we write 00000001000000C6.ready * by the archiver, e.g. we write 0000000100000001000000C6.ready
* and the archiver then knows to archive XLogDir/00000001000000C6, * and the archiver then knows to archive XLogDir/0000000100000001000000C6,
* then when complete, rename it to 00000001000000C6.done * then when complete, rename it to 0000000100000001000000C6.done
*/ */
static void static void
XLogArchiveNotify(const char *xlog) XLogArchiveNotify(const char *xlog)
...@@ -990,7 +923,7 @@ XLogArchiveNotifySeg(uint32 log, uint32 seg) ...@@ -990,7 +923,7 @@ XLogArchiveNotifySeg(uint32 log, uint32 seg)
{ {
char xlog[MAXFNAMELEN]; char xlog[MAXFNAMELEN];
XLogFileName(xlog, log, seg); XLogFileName(xlog, ThisTimeLineID, log, seg);
XLogArchiveNotify(xlog); XLogArchiveNotify(xlog);
} }
...@@ -1035,16 +968,22 @@ XLogArchiveIsDone(const char *xlog) ...@@ -1035,16 +968,22 @@ XLogArchiveIsDone(const char *xlog)
/* /*
* XLogArchiveCleanup * XLogArchiveCleanup
* *
* Cleanup an archive notification file for a particular xlog segment * Cleanup archive notification file(s) for a particular xlog segment
*/ */
static void static void
XLogArchiveCleanup(const char *xlog) XLogArchiveCleanup(const char *xlog)
{ {
char archiveStatusPath[MAXPGPATH]; char archiveStatusPath[MAXPGPATH];
/* Remove the .done file */
StatusFilePath(archiveStatusPath, xlog, ".done"); StatusFilePath(archiveStatusPath, xlog, ".done");
unlink(archiveStatusPath); unlink(archiveStatusPath);
/* should we complain about failure? */ /* should we complain about failure? */
/* Remove the .ready file if present --- normally it shouldn't be */
StatusFilePath(archiveStatusPath, xlog, ".ready");
unlink(archiveStatusPath);
/* should we complain about failure? */
} }
/* /*
...@@ -1151,7 +1090,7 @@ AdvanceXLInsertBuffer(void) ...@@ -1151,7 +1090,7 @@ AdvanceXLInsertBuffer(void)
NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * BLCKSZ); NewPage = (XLogPageHeader) (XLogCtl->pages + nextidx * BLCKSZ);
Insert->curridx = nextidx; Insert->curridx = nextidx;
Insert->currpage = NewPage; Insert->currpage = NewPage;
Insert->currpos = ((char *) NewPage) + SizeOfXLogPHD; Insert->currpos = ((char *) NewPage) + SizeOfXLogShortPHD;
/* /*
* Be sure to re-zero the buffer so that bytes beyond what we've * Be sure to re-zero the buffer so that bytes beyond what we've
...@@ -1164,103 +1103,26 @@ AdvanceXLInsertBuffer(void) ...@@ -1164,103 +1103,26 @@ AdvanceXLInsertBuffer(void)
*/ */
NewPage->xlp_magic = XLOG_PAGE_MAGIC; NewPage->xlp_magic = XLOG_PAGE_MAGIC;
/* NewPage->xlp_info = 0; */ /* done by memset */ /* NewPage->xlp_info = 0; */ /* done by memset */
NewPage->xlp_sui = ThisStartUpID; NewPage->xlp_tli = ThisTimeLineID;
NewPage->xlp_pageaddr.xlogid = NewPageEndPtr.xlogid; NewPage->xlp_pageaddr.xlogid = NewPageEndPtr.xlogid;
NewPage->xlp_pageaddr.xrecoff = NewPageEndPtr.xrecoff - BLCKSZ; NewPage->xlp_pageaddr.xrecoff = NewPageEndPtr.xrecoff - BLCKSZ;
/* /*
* If first page of an XLOG segment file, add a FILE_HEADER record. * If first page of an XLOG segment file, make it a long header.
*/ */
if ((NewPage->xlp_pageaddr.xrecoff % XLogSegSize) == 0) if ((NewPage->xlp_pageaddr.xrecoff % XLogSegSize) == 0)
{ {
XLogRecPtr RecPtr; XLogLongPageHeader NewLongPage = (XLogLongPageHeader) NewPage;
XLogRecord *record;
XLogFileHeaderData *fhdr;
crc64 crc;
record = (XLogRecord *) Insert->currpos; NewLongPage->xlp_sysid = ControlFile->system_identifier;
record->xl_prev = Insert->PrevRecord; NewLongPage->xlp_seg_size = XLogSegSize;
record->xl_xact_prev.xlogid = 0; NewPage->xlp_info |= XLP_LONG_HEADER;
record->xl_xact_prev.xrecoff = 0; Insert->currpos = ((char *) NewPage) + SizeOfXLogLongPHD;
record->xl_xid = InvalidTransactionId;
record->xl_len = SizeOfXLogFHD;
record->xl_info = XLOG_FILE_HEADER;
record->xl_rmid = RM_XLOG_ID;
fhdr = (XLogFileHeaderData *) XLogRecGetData(record);
fhdr->xlfhd_sysid = ControlFile->system_identifier;
fhdr->xlfhd_xlogid = NewPage->xlp_pageaddr.xlogid;
fhdr->xlfhd_segno = NewPage->xlp_pageaddr.xrecoff / XLogSegSize;
fhdr->xlfhd_seg_size = XLogSegSize;
INIT_CRC64(crc);
COMP_CRC64(crc, fhdr, SizeOfXLogFHD);
COMP_CRC64(crc, (char *) record + sizeof(crc64),
SizeOfXLogRecord - sizeof(crc64));
FIN_CRC64(crc);
record->xl_crc = crc;
/* Compute record's XLOG location */
INSERT_RECPTR(RecPtr, Insert, nextidx);
/* Record begin of record in appropriate places */
Insert->PrevRecord = RecPtr;
Insert->currpos += SizeOfXLogRecord + SizeOfXLogFHD;
} }
return update_needed; return update_needed;
} }
/*
* Fill the remainder of the current XLOG page with an XLOG_WASTED_SPACE
* record, and advance to the next page. This has the same calling and
* result conditions as AdvanceXLInsertBuffer, except that
* AdvanceXLInsertBuffer expects the current page to be already filled.
*/
static bool
WasteXLInsertBuffer(void)
{
XLogCtlInsert *Insert = &XLogCtl->Insert;
XLogRecord *record;
XLogRecPtr RecPtr;
uint32 freespace;
uint16 curridx;
crc64 rdata_crc;
freespace = INSERT_FREESPACE(Insert);
Assert(freespace >= SizeOfXLogRecord);
freespace -= SizeOfXLogRecord;
curridx = Insert->curridx;
record = (XLogRecord *) Insert->currpos;
record->xl_prev = Insert->PrevRecord;
record->xl_xact_prev.xlogid = 0;
record->xl_xact_prev.xrecoff = 0;
record->xl_xid = InvalidTransactionId;
record->xl_len = freespace;
record->xl_info = XLOG_WASTED_SPACE;
record->xl_rmid = RM_XLOG_ID;
INIT_CRC64(rdata_crc);
COMP_CRC64(rdata_crc, XLogRecGetData(record), freespace);
COMP_CRC64(rdata_crc, (char *) record + sizeof(crc64),
SizeOfXLogRecord - sizeof(crc64));
FIN_CRC64(rdata_crc);
record->xl_crc = rdata_crc;
/* Compute record's XLOG location */
INSERT_RECPTR(RecPtr, Insert, curridx);
/* Record begin of record in appropriate places */
Insert->PrevRecord = RecPtr;
/* We needn't bother to advance Insert->currpos */
return AdvanceXLInsertBuffer();
}
/* /*
* Write and/or fsync the log at least as far as WriteRqst indicates. * Write and/or fsync the log at least as far as WriteRqst indicates.
* *
...@@ -1355,7 +1217,7 @@ XLogWrite(XLogwrtRqst WriteRqst) ...@@ -1355,7 +1217,7 @@ XLogWrite(XLogwrtRqst WriteRqst)
if (openLogFile < 0) if (openLogFile < 0)
{ {
XLByteToPrevSeg(LogwrtResult.Write, openLogId, openLogSeg); XLByteToPrevSeg(LogwrtResult.Write, openLogId, openLogSeg);
openLogFile = XLogFileOpen(openLogId, openLogSeg, false); openLogFile = XLogFileOpen(openLogId, openLogSeg);
openLogOff = 0; openLogOff = 0;
} }
...@@ -1439,7 +1301,7 @@ XLogWrite(XLogwrtRqst WriteRqst) ...@@ -1439,7 +1301,7 @@ XLogWrite(XLogwrtRqst WriteRqst)
if (openLogFile < 0) if (openLogFile < 0)
{ {
XLByteToPrevSeg(LogwrtResult.Write, openLogId, openLogSeg); XLByteToPrevSeg(LogwrtResult.Write, openLogId, openLogSeg);
openLogFile = XLogFileOpen(openLogId, openLogSeg, false); openLogFile = XLogFileOpen(openLogId, openLogSeg);
openLogOff = 0; openLogOff = 0;
} }
issue_xlog_fsync(); issue_xlog_fsync();
...@@ -1617,7 +1479,7 @@ XLogFileInit(uint32 log, uint32 seg, ...@@ -1617,7 +1479,7 @@ XLogFileInit(uint32 log, uint32 seg,
int fd; int fd;
int nbytes; int nbytes;
XLogFilePath(path, log, seg); XLogFilePath(path, ThisTimeLineID, log, seg);
/* /*
* Try to use existent file (checkpoint maker may have created it * Try to use existent file (checkpoint maker may have created it
...@@ -1730,6 +1592,109 @@ XLogFileInit(uint32 log, uint32 seg, ...@@ -1730,6 +1592,109 @@ XLogFileInit(uint32 log, uint32 seg,
return (fd); return (fd);
} }
/*
* Create a new XLOG file segment by copying a pre-existing one.
*
* log, seg: identify segment to be created.
*
* srcTLI, srclog, srcseg: identify segment to be copied (could be from
* a different timeline)
*
* Currently this is only used during recovery, and so there are no locking
* considerations. But we should be just as tense as XLogFileInit to avoid
* emplacing a bogus file.
*/
static void
XLogFileCopy(uint32 log, uint32 seg,
TimeLineID srcTLI, uint32 srclog, uint32 srcseg)
{
char path[MAXPGPATH];
char tmppath[MAXPGPATH];
char buffer[BLCKSZ];
int srcfd;
int fd;
int nbytes;
/*
* Open the source file
*/
XLogFilePath(path, srcTLI, srclog, srcseg);
srcfd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);
if (srcfd < 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not open file \"%s\": %m", path)));
/*
* Copy into a temp file name.
*/
snprintf(tmppath, MAXPGPATH, "%s/xlogtemp.%d",
XLogDir, (int) getpid());
unlink(tmppath);
/* do not use XLOG_SYNC_BIT here --- want to fsync only at end of fill */
fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL | PG_BINARY,
S_IRUSR | S_IWUSR);
if (fd < 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not create file \"%s\": %m", tmppath)));
/*
* Do the data copying.
*/
for (nbytes = 0; nbytes < XLogSegSize; nbytes += sizeof(buffer))
{
errno = 0;
if ((int) read(srcfd, buffer, sizeof(buffer)) != (int) sizeof(buffer))
{
if (errno != 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m", path)));
else
ereport(PANIC,
(errmsg("insufficient data in file \"%s\"", path)));
}
errno = 0;
if ((int) write(fd, buffer, sizeof(buffer)) != (int) sizeof(buffer))
{
int save_errno = errno;
/*
* If we fail to make the file, delete it to release disk
* space
*/
unlink(tmppath);
/* if write didn't set errno, assume problem is no disk space */
errno = save_errno ? save_errno : ENOSPC;
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
}
}
if (pg_fsync(fd) != 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", tmppath)));
if (close(fd))
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not close file \"%s\": %m", tmppath)));
close(srcfd);
/*
* Now move the segment into place with its final name.
*/
if (!InstallXLogFileSegment(log, seg, tmppath, false, 0, false))
elog(PANIC, "InstallXLogFileSegment should not have failed");
}
/* /*
* Install a new XLOG segment file as a current or future log segment. * Install a new XLOG segment file as a current or future log segment.
* *
...@@ -1763,7 +1728,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath, ...@@ -1763,7 +1728,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath,
char path[MAXPGPATH]; char path[MAXPGPATH];
struct stat stat_buf; struct stat stat_buf;
XLogFilePath(path, log, seg); XLogFilePath(path, ThisTimeLineID, log, seg);
/* /*
* We want to be sure that only one process does this at a time. * We want to be sure that only one process does this at a time.
...@@ -1789,7 +1754,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath, ...@@ -1789,7 +1754,7 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath,
return false; return false;
} }
NextLogSeg(log, seg); NextLogSeg(log, seg);
XLogFilePath(path, log, seg); XLogFilePath(path, ThisTimeLineID, log, seg);
} }
} }
...@@ -1820,73 +1785,102 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath, ...@@ -1820,73 +1785,102 @@ InstallXLogFileSegment(uint32 log, uint32 seg, char *tmppath,
} }
/* /*
* Open a pre-existing logfile segment. * Open a pre-existing logfile segment for writing.
*/ */
static int static int
XLogFileOpen(uint32 log, uint32 seg, bool econt) XLogFileOpen(uint32 log, uint32 seg)
{ {
char path[MAXPGPATH]; char path[MAXPGPATH];
int fd; int fd;
if (InArchiveRecovery) XLogFilePath(path, ThisTimeLineID, log, seg);
RestoreArchivedXLog(path, log, seg);
else
XLogFilePath(path, log, seg);
fd = BasicOpenFile(path, O_RDWR | PG_BINARY | XLOG_SYNC_BIT, fd = BasicOpenFile(path, O_RDWR | PG_BINARY | XLOG_SYNC_BIT,
S_IRUSR | S_IWUSR); S_IRUSR | S_IWUSR);
if (fd < 0) if (fd < 0)
{
if (econt && errno == ENOENT)
{
ereport(LOG,
(errcode_for_file_access(),
errmsg("could not open file \"%s\" (log file %u, segment %u): %m",
path, log, seg)));
return (fd);
}
ereport(PANIC, ereport(PANIC,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not open file \"%s\" (log file %u, segment %u): %m", errmsg("could not open file \"%s\" (log file %u, segment %u): %m",
path, log, seg))); path, log, seg)));
}
return fd;
}
/*
* Open a logfile segment for reading (during recovery).
*/
static int
XLogFileRead(uint32 log, uint32 seg, int emode)
{
char path[MAXPGPATH];
char xlogfname[MAXFNAMELEN];
ListCell *cell;
int fd;
/* /*
* XXX this is a pretty horrid hack. Remove after implementing timelines. * Loop looking for a suitable timeline ID: we might need to
* * read any of the timelines listed in expectedTLIs.
* if we switched back to local xlogs after having been
* restoring from archive, we need to make sure that the
* local files don't get removed by end-of-recovery checkpoint
* in case we need to re-run the recovery
* *
* we want to copy these away as soon as possible, so set * We expect curFileTLI on entry to be the TLI of the preceding file
* the archive status flag to .ready for them * in sequence, or 0 if there was no predecessor. We do not allow
* in case admin isn't cautious enough to have done this anyway * curFileTLI to go backwards; this prevents us from picking up the
* * wrong file when a parent timeline extends to higher segment numbers
* XXX this is completely broken, because there is no guarantee this file * than the child we want to read.
* is actually complete and ready to be archived. Also, what if there's
* a .done file for them?
*/ */
if (InArchiveRecovery && !restoredFromArchive) foreach(cell, expectedTLIs)
XLogArchiveNotifySeg(log, seg); {
TimeLineID tli = (TimeLineID) lfirst_int(cell);
return (fd); if (tli < curFileTLI)
break; /* don't bother looking at too-old TLIs */
if (InArchiveRecovery)
{
XLogFileName(xlogfname, tli, log, seg);
restoredFromArchive = RestoreArchivedFile(path, xlogfname,
"RECOVERYXLOG");
}
else
XLogFilePath(path, tli, log, seg);
fd = BasicOpenFile(path, O_RDONLY | PG_BINARY, 0);
if (fd >= 0)
{
/* Success! */
curFileTLI = tli;
return fd;
}
if (errno != ENOENT) /* unexpected failure? */
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not open file \"%s\" (log file %u, segment %u): %m",
path, log, seg)));
}
/* Couldn't find it. For simplicity, complain about front timeline */
XLogFilePath(path, recoveryTargetTLI, log, seg);
errno = ENOENT;
ereport(emode,
(errcode_for_file_access(),
errmsg("could not open file \"%s\" (log file %u, segment %u): %m",
path, log, seg)));
return -1;
} }
/* /*
* Get next logfile segment when using off-line archive for recovery * Attempt to retrieve the specified file from off-line archival storage.
*
* Attempt to retrieve the specified segment from off-line archival storage.
* If successful, fill "path" with its complete path (note that this will be * If successful, fill "path" with its complete path (note that this will be
* a temp file name that doesn't follow the normal naming convention). * a temp file name that doesn't follow the normal naming convention), and
* return TRUE.
* *
* If not successful, fill "path" with the name of the normal on-line segment * If not successful, fill "path" with the name of the normal on-line file
* file (which may or may not actually exist, but we'll try to use it). * (which may or may not actually exist, but we'll try to use it), and return
* FALSE.
*/ */
static void static bool
RestoreArchivedXLog(char *path, uint32 log, uint32 seg) RestoreArchivedFile(char *path, const char *xlogfname,
const char *recovername)
{ {
char xlogfname[MAXFNAMELEN];
char xlogpath[MAXPGPATH]; char xlogpath[MAXPGPATH];
char xlogRestoreCmd[MAXPGPATH]; char xlogRestoreCmd[MAXPGPATH];
char *dp; char *dp;
...@@ -1919,11 +1913,10 @@ RestoreArchivedXLog(char *path, uint32 log, uint32 seg) ...@@ -1919,11 +1913,10 @@ RestoreArchivedXLog(char *path, uint32 log, uint32 seg)
* The copy-from-archive filename is always the same, ensuring that we * The copy-from-archive filename is always the same, ensuring that we
* don't run out of disk space on long recoveries. * don't run out of disk space on long recoveries.
*/ */
XLogFileName(xlogfname, log, seg); snprintf(xlogpath, MAXPGPATH, "%s/%s", XLogDir, recovername);
snprintf(xlogpath, MAXPGPATH, "%s/RECOVERYXLOG", XLogDir);
/* /*
* Make sure there is no existing RECOVERYXLOG file. * Make sure there is no existing file named recovername.
*/ */
if (stat(xlogpath, &stat_buf) != 0) if (stat(xlogpath, &stat_buf) != 0)
{ {
...@@ -2004,8 +1997,7 @@ RestoreArchivedXLog(char *path, uint32 log, uint32 seg) ...@@ -2004,8 +1997,7 @@ RestoreArchivedXLog(char *path, uint32 log, uint32 seg)
(errmsg("restored log file \"%s\" from archive", (errmsg("restored log file \"%s\" from archive",
xlogfname))); xlogfname)));
strcpy(path, xlogpath); strcpy(path, xlogpath);
restoredFromArchive = true; return true;
return;
} }
if (errno != ENOENT) if (errno != ENOENT)
ereport(FATAL, ereport(FATAL,
...@@ -2033,8 +2025,8 @@ RestoreArchivedXLog(char *path, uint32 log, uint32 seg) ...@@ -2033,8 +2025,8 @@ RestoreArchivedXLog(char *path, uint32 log, uint32 seg)
* In many recovery scenarios we expect this to fail also, but * In many recovery scenarios we expect this to fail also, but
* if so that just means we've reached the end of WAL. * if so that just means we've reached the end of WAL.
*/ */
XLogFilePath(path, log, seg); snprintf(path, MAXPGPATH, "%s/%s", XLogDir, xlogfname);
restoredFromArchive = false; return false;
} }
/* /*
...@@ -2085,18 +2077,25 @@ MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr) ...@@ -2085,18 +2077,25 @@ MoveOfflineLogs(uint32 log, uint32 seg, XLogRecPtr endptr)
errmsg("could not open transaction log directory \"%s\": %m", errmsg("could not open transaction log directory \"%s\": %m",
XLogDir))); XLogDir)));
XLogFileName(lastoff, log, seg); XLogFileName(lastoff, ThisTimeLineID, log, seg);
errno = 0; errno = 0;
while ((xlde = readdir(xldir)) != NULL) while ((xlde = readdir(xldir)) != NULL)
{ {
/* /*
* use the alphanumeric sorting property of the filenames to decide * We ignore the timeline part of the XLOG segment identifiers in
* which ones are earlier than the lastoff segment * deciding whether a segment is still needed. This ensures that
* we won't prematurely remove a segment from a parent timeline.
* We could probably be a little more proactive about removing
* segments of non-parent timelines, but that would be a whole lot
* more complicated.
*
* We use the alphanumeric sorting property of the filenames to decide
* which ones are earlier than the lastoff segment.
*/ */
if (strlen(xlde->d_name) == 16 && if (strlen(xlde->d_name) == 24 &&
strspn(xlde->d_name, "0123456789ABCDEF") == 16 && strspn(xlde->d_name, "0123456789ABCDEF") == 24 &&
strcmp(xlde->d_name, lastoff) <= 0) strcmp(xlde->d_name + 8, lastoff + 8) <= 0)
{ {
bool recycle; bool recycle;
...@@ -2185,7 +2184,7 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn) ...@@ -2185,7 +2184,7 @@ RestoreBkpBlocks(XLogRecord *record, XLogRecPtr lsn)
page = (Page) BufferGetPage(buffer); page = (Page) BufferGetPage(buffer);
memcpy((char *) page, blk, BLCKSZ); memcpy((char *) page, blk, BLCKSZ);
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
} }
...@@ -2272,11 +2271,13 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer) ...@@ -2272,11 +2271,13 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer)
{ {
XLogRecord *record; XLogRecord *record;
XLogRecPtr tmpRecPtr = EndRecPtr; XLogRecPtr tmpRecPtr = EndRecPtr;
bool randAccess = false;
uint32 len, uint32 len,
total_len; total_len;
uint32 targetPageOff; uint32 targetPageOff;
uint32 targetRecOff;
uint32 pageHeaderSize;
unsigned i; unsigned i;
bool nextmode = false;
if (readBuf == NULL) if (readBuf == NULL)
{ {
...@@ -2295,7 +2296,6 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer) ...@@ -2295,7 +2296,6 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer)
if (RecPtr == NULL) if (RecPtr == NULL)
{ {
RecPtr = &tmpRecPtr; RecPtr = &tmpRecPtr;
nextmode = true;
/* fast case if next record is on same page */ /* fast case if next record is on same page */
if (nextRecord != NULL) if (nextRecord != NULL)
{ {
...@@ -2310,12 +2310,24 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer) ...@@ -2310,12 +2310,24 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer)
(tmpRecPtr.xlogid)++; (tmpRecPtr.xlogid)++;
tmpRecPtr.xrecoff = 0; tmpRecPtr.xrecoff = 0;
} }
tmpRecPtr.xrecoff += SizeOfXLogPHD; /* We will account for page header size below */
}
else
{
if (!XRecOffIsValid(RecPtr->xrecoff))
ereport(PANIC,
(errmsg("invalid record offset at %X/%X",
RecPtr->xlogid, RecPtr->xrecoff)));
/*
* Since we are going to a random position in WAL, forget any
* prior state about what timeline we were in, and allow it
* to be any timeline in expectedTLIs. We also set a flag to
* allow curFileTLI to go backwards (but we can't reset that
* variable right here, since we might not change files at all).
*/
lastPageTLI = 0; /* see comment in ValidXLOGHeader */
randAccess = true; /* allow curFileTLI to go backwards too */
} }
else if (!XRecOffIsValid(RecPtr->xrecoff))
ereport(PANIC,
(errmsg("invalid record offset at %X/%X",
RecPtr->xlogid, RecPtr->xrecoff)));
if (readFile >= 0 && !XLByteInSeg(*RecPtr, readId, readSeg)) if (readFile >= 0 && !XLByteInSeg(*RecPtr, readId, readSeg))
{ {
...@@ -2325,7 +2337,11 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer) ...@@ -2325,7 +2337,11 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer)
XLByteToSeg(*RecPtr, readId, readSeg); XLByteToSeg(*RecPtr, readId, readSeg);
if (readFile < 0) if (readFile < 0)
{ {
readFile = XLogFileOpen(readId, readSeg, (emode == LOG)); /* Now it's okay to reset curFileTLI if random fetch */
if (randAccess)
curFileTLI = 0;
readFile = XLogFileRead(readId, readSeg, emode);
if (readFile < 0) if (readFile < 0)
goto next_record_is_invalid; goto next_record_is_invalid;
readOff = (uint32) (-1); /* force read to occur below */ readOff = (uint32) (-1); /* force read to occur below */
...@@ -2351,11 +2367,30 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer) ...@@ -2351,11 +2367,30 @@ ReadRecord(XLogRecPtr *RecPtr, int emode, char *buffer)
readId, readSeg, readOff))); readId, readSeg, readOff)));
goto next_record_is_invalid; goto next_record_is_invalid;
} }
if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode, nextmode)) if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode))
goto next_record_is_invalid; goto next_record_is_invalid;
} }
pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
targetRecOff = RecPtr->xrecoff % BLCKSZ;
if (targetRecOff == 0)
{
/*
* Can only get here in the continuing-from-prev-page case, because
* XRecOffIsValid eliminated the zero-page-offset case otherwise.
* Need to skip over the new page's header.
*/
tmpRecPtr.xrecoff += pageHeaderSize;
targetRecOff = pageHeaderSize;
}
else if (targetRecOff < pageHeaderSize)
{
ereport(emode,
(errmsg("invalid record offset at %X/%X",
RecPtr->xlogid, RecPtr->xrecoff)));
goto next_record_is_invalid;
}
if ((((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) && if ((((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD) &&
RecPtr->xrecoff % BLCKSZ == SizeOfXLogPHD) targetRecOff == pageHeaderSize)
{ {
ereport(emode, ereport(emode,
(errmsg("contrecord is requested by %X/%X", (errmsg("contrecord is requested by %X/%X",
...@@ -2428,7 +2463,7 @@ got_record:; ...@@ -2428,7 +2463,7 @@ got_record:;
close(readFile); close(readFile);
readFile = -1; readFile = -1;
NextLogSeg(readId, readSeg); NextLogSeg(readId, readSeg);
readFile = XLogFileOpen(readId, readSeg, (emode == LOG)); readFile = XLogFileRead(readId, readSeg, emode);
if (readFile < 0) if (readFile < 0)
goto next_record_is_invalid; goto next_record_is_invalid;
readOff = 0; readOff = 0;
...@@ -2441,7 +2476,7 @@ got_record:; ...@@ -2441,7 +2476,7 @@ got_record:;
readId, readSeg, readOff))); readId, readSeg, readOff)));
goto next_record_is_invalid; goto next_record_is_invalid;
} }
if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode, true)) if (!ValidXLOGHeader((XLogPageHeader) readBuf, emode))
goto next_record_is_invalid; goto next_record_is_invalid;
if (!(((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD)) if (!(((XLogPageHeader) readBuf)->xlp_info & XLP_FIRST_IS_CONTRECORD))
{ {
...@@ -2450,7 +2485,8 @@ got_record:; ...@@ -2450,7 +2485,8 @@ got_record:;
readId, readSeg, readOff))); readId, readSeg, readOff)));
goto next_record_is_invalid; goto next_record_is_invalid;
} }
contrecord = (XLogContRecord *) ((char *) readBuf + SizeOfXLogPHD); pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
contrecord = (XLogContRecord *) ((char *) readBuf + pageHeaderSize);
if (contrecord->xl_rem_len == 0 || if (contrecord->xl_rem_len == 0 ||
total_len != (contrecord->xl_rem_len + gotlen)) total_len != (contrecord->xl_rem_len + gotlen))
{ {
...@@ -2460,7 +2496,7 @@ got_record:; ...@@ -2460,7 +2496,7 @@ got_record:;
readId, readSeg, readOff))); readId, readSeg, readOff)));
goto next_record_is_invalid; goto next_record_is_invalid;
} }
len = BLCKSZ - SizeOfXLogPHD - SizeOfXLogContRecord; len = BLCKSZ - pageHeaderSize - SizeOfXLogContRecord;
if (contrecord->xl_rem_len > len) if (contrecord->xl_rem_len > len)
{ {
memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord, len); memcpy(buffer, (char *) contrecord + SizeOfXLogContRecord, len);
...@@ -2474,7 +2510,8 @@ got_record:; ...@@ -2474,7 +2510,8 @@ got_record:;
} }
if (!RecordIsValid(record, *RecPtr, emode)) if (!RecordIsValid(record, *RecPtr, emode))
goto next_record_is_invalid; goto next_record_is_invalid;
if (BLCKSZ - SizeOfXLogRecord >= SizeOfXLogPHD + pageHeaderSize = XLogPageHeaderSize((XLogPageHeader) readBuf);
if (BLCKSZ - SizeOfXLogRecord >= pageHeaderSize +
SizeOfXLogContRecord + MAXALIGN(contrecord->xl_rem_len)) SizeOfXLogContRecord + MAXALIGN(contrecord->xl_rem_len))
{ {
nextRecord = (XLogRecord *) ((char *) contrecord + nextRecord = (XLogRecord *) ((char *) contrecord +
...@@ -2482,7 +2519,7 @@ got_record:; ...@@ -2482,7 +2519,7 @@ got_record:;
} }
EndRecPtr.xlogid = readId; EndRecPtr.xlogid = readId;
EndRecPtr.xrecoff = readSeg * XLogSegSize + readOff + EndRecPtr.xrecoff = readSeg * XLogSegSize + readOff +
SizeOfXLogPHD + SizeOfXLogContRecord + pageHeaderSize + SizeOfXLogContRecord +
MAXALIGN(contrecord->xl_rem_len); MAXALIGN(contrecord->xl_rem_len);
ReadRecPtr = *RecPtr; ReadRecPtr = *RecPtr;
return record; return record;
...@@ -2514,7 +2551,7 @@ next_record_is_invalid:; ...@@ -2514,7 +2551,7 @@ next_record_is_invalid:;
* ReadRecord. It's not intended for use from anywhere else. * ReadRecord. It's not intended for use from anywhere else.
*/ */
static bool static bool
ValidXLOGHeader(XLogPageHeader hdr, int emode, bool checkSUI) ValidXLOGHeader(XLogPageHeader hdr, int emode)
{ {
XLogRecPtr recaddr; XLogRecPtr recaddr;
...@@ -2532,46 +2569,416 @@ ValidXLOGHeader(XLogPageHeader hdr, int emode, bool checkSUI) ...@@ -2532,46 +2569,416 @@ ValidXLOGHeader(XLogPageHeader hdr, int emode, bool checkSUI)
hdr->xlp_info, readId, readSeg, readOff))); hdr->xlp_info, readId, readSeg, readOff)));
return false; return false;
} }
recaddr.xlogid = readId; if (hdr->xlp_info & XLP_LONG_HEADER)
recaddr.xrecoff = readSeg * XLogSegSize + readOff;
if (!XLByteEQ(hdr->xlp_pageaddr, recaddr))
{ {
ereport(emode, XLogLongPageHeader longhdr = (XLogLongPageHeader) hdr;
(errmsg("unexpected pageaddr %X/%X in log file %u, segment %u, offset %u",
hdr->xlp_pageaddr.xlogid, hdr->xlp_pageaddr.xrecoff,
readId, readSeg, readOff)));
return false;
}
/* if (longhdr->xlp_sysid != ControlFile->system_identifier)
* We disbelieve a SUI less than the previous page's SUI, or more than
* a few counts greater. In theory as many as 512 shutdown checkpoint
* records could appear on a 32K-sized xlog page, so that's the most
* differential there could legitimately be.
*
* Note this check can only be applied when we are reading the next page
* in sequence, so ReadRecord passes a flag indicating whether to
* check.
*/
if (checkSUI)
{
if (hdr->xlp_sui < lastReadSUI ||
hdr->xlp_sui > lastReadSUI + 512)
{ {
ereport(emode, char fhdrident_str[32];
/* translator: SUI = startup id */ char sysident_str[32];
(errmsg("out-of-sequence SUI %u (after %u) in log file %u, segment %u, offset %u",
hdr->xlp_sui, lastReadSUI,
readId, readSeg, readOff)));
return false;
}
}
lastReadSUI = hdr->xlp_sui;
return true;
}
/* /*
* I/O routines for pg_control * Format sysids separately to keep platform-dependent format
* code out of the translatable message string.
*/
snprintf(fhdrident_str, sizeof(fhdrident_str), UINT64_FORMAT,
longhdr->xlp_sysid);
snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT,
ControlFile->system_identifier);
ereport(emode,
(errmsg("WAL file is from different system"),
errdetail("WAL file SYSID is %s, pg_control SYSID is %s",
fhdrident_str, sysident_str)));
return false;
}
if (longhdr->xlp_seg_size != XLogSegSize)
{
ereport(emode,
(errmsg("WAL file is from different system"),
errdetail("Incorrect XLOG_SEG_SIZE in page header.")));
return false;
}
}
recaddr.xlogid = readId;
recaddr.xrecoff = readSeg * XLogSegSize + readOff;
if (!XLByteEQ(hdr->xlp_pageaddr, recaddr))
{
ereport(emode,
(errmsg("unexpected pageaddr %X/%X in log file %u, segment %u, offset %u",
hdr->xlp_pageaddr.xlogid, hdr->xlp_pageaddr.xrecoff,
readId, readSeg, readOff)));
return false;
}
/*
* Check page TLI is one of the expected values.
*/
if (!list_member_int(expectedTLIs, (int) hdr->xlp_tli))
{
ereport(emode,
(errmsg("unexpected timeline ID %u in log file %u, segment %u, offset %u",
hdr->xlp_tli,
readId, readSeg, readOff)));
return false;
}
/*
* Since child timelines are always assigned a TLI greater than their
* immediate parent's TLI, we should never see TLI go backwards across
* successive pages of a consistent WAL sequence.
*
* Of course this check should only be applied when advancing sequentially
* across pages; therefore ReadRecord resets lastPageTLI to zero when
* going to a random page.
*/
if (hdr->xlp_tli < lastPageTLI)
{
ereport(emode,
(errmsg("out-of-sequence timeline ID %u (after %u) in log file %u, segment %u, offset %u",
hdr->xlp_tli, lastPageTLI,
readId, readSeg, readOff)));
return false;
}
lastPageTLI = hdr->xlp_tli;
return true;
}
/*
* Try to read a timeline's history file.
*
* If successful, return the list of component TLIs (the given TLI followed by
* its ancestor TLIs). If we can't find the history file, assume that the
* timeline has no parents, and return a list of just the specified timeline
* ID.
*/
static List *
readTimeLineHistory(TimeLineID targetTLI)
{
List *result;
char path[MAXPGPATH];
char histfname[MAXFNAMELEN];
char fline[MAXPGPATH];
FILE *fd;
if (InArchiveRecovery)
{
TLHistoryFileName(histfname, targetTLI);
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY");
}
else
TLHistoryFilePath(path, targetTLI);
fd = AllocateFile(path, "r");
if (fd == NULL)
{
if (errno != ENOENT)
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not open \"%s\": %m", path)));
/* Not there, so assume no parents */
return list_make1_int((int) targetTLI);
}
result = NIL;
/*
* Parse the file...
*/
while (fgets(fline, MAXPGPATH, fd) != NULL)
{
/* skip leading whitespace and check for # comment */
char *ptr;
char *endptr;
TimeLineID tli;
for (ptr = fline; *ptr; ptr++)
{
if (!isspace((unsigned char) *ptr))
break;
}
if (*ptr == '\0' || *ptr == '#')
continue;
/* expect a numeric timeline ID as first field of line */
tli = (TimeLineID) strtoul(ptr, &endptr, 0);
if (endptr == ptr)
ereport(FATAL,
(errmsg("syntax error in history file: %s", fline),
errhint("Expected a numeric timeline ID.")));
if (result &&
tli <= (TimeLineID) linitial_int(result))
ereport(FATAL,
(errmsg("invalid data in history file: %s", fline),
errhint("Timeline IDs must be in increasing sequence.")));
/* Build list with newest item first */
result = lcons_int((int) tli, result);
/* we ignore the remainder of each line */
}
FreeFile(fd);
if (result &&
targetTLI <= (TimeLineID) linitial_int(result))
ereport(FATAL,
(errmsg("invalid data in history file \"%s\"", path),
errhint("Timeline IDs must be less than child timeline's ID.")));
result = lcons_int((int) targetTLI, result);
ereport(DEBUG3,
(errmsg_internal("history of timeline %u is %s",
targetTLI, nodeToString(result))));
return result;
}
/*
* Probe whether a timeline history file exists for the given timeline ID
*/
static bool
existsTimeLineHistory(TimeLineID probeTLI)
{
char path[MAXPGPATH];
char histfname[MAXFNAMELEN];
FILE *fd;
if (InArchiveRecovery)
{
TLHistoryFileName(histfname, probeTLI);
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY");
}
else
TLHistoryFilePath(path, probeTLI);
fd = AllocateFile(path, "r");
if (fd != NULL)
{
FreeFile(fd);
return true;
}
else
{
if (errno != ENOENT)
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not open \"%s\": %m", path)));
return false;
}
}
/*
* Find the newest existing timeline, assuming that startTLI exists.
*
* Note: while this is somewhat heuristic, it does positively guarantee
* that (result + 1) is not a known timeline, and therefore it should
* be safe to assign that ID to a new timeline.
*/
static TimeLineID
findNewestTimeLine(TimeLineID startTLI)
{
TimeLineID newestTLI;
TimeLineID probeTLI;
/*
* The algorithm is just to probe for the existence of timeline history
* files. XXX is it useful to allow gaps in the sequence?
*/
newestTLI = startTLI;
for (probeTLI = startTLI + 1; ; probeTLI++)
{
if (existsTimeLineHistory(probeTLI))
{
newestTLI = probeTLI; /* probeTLI exists */
}
else
{
/* doesn't exist, assume we're done */
break;
}
}
return newestTLI;
}
/*
* Create a new timeline history file.
*
* newTLI: ID of the new timeline
* parentTLI: ID of its immediate parent
* endTLI et al: ID of the last used WAL file, for annotation purposes
*
* Currently this is only used during recovery, and so there are no locking
* considerations. But we should be just as tense as XLogFileInit to avoid
* emplacing a bogus file.
*/
static void
writeTimeLineHistory(TimeLineID newTLI, TimeLineID parentTLI,
TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg)
{
char path[MAXPGPATH];
char tmppath[MAXPGPATH];
char histfname[MAXFNAMELEN];
char xlogfname[MAXFNAMELEN];
char buffer[BLCKSZ];
int srcfd;
int fd;
int nbytes;
Assert(newTLI > parentTLI); /* else bad selection of newTLI */
/*
* Write into a temp file name.
*/
snprintf(tmppath, MAXPGPATH, "%s/xlogtemp.%d",
XLogDir, (int) getpid());
unlink(tmppath);
/* do not use XLOG_SYNC_BIT here --- want to fsync only at end of fill */
fd = BasicOpenFile(tmppath, O_RDWR | O_CREAT | O_EXCL,
S_IRUSR | S_IWUSR);
if (fd < 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not create file \"%s\": %m", tmppath)));
/*
* If a history file exists for the parent, copy it verbatim
*/
if (InArchiveRecovery)
{
TLHistoryFileName(histfname, parentTLI);
RestoreArchivedFile(path, histfname, "RECOVERYHISTORY");
}
else
TLHistoryFilePath(path, parentTLI);
srcfd = BasicOpenFile(path, O_RDONLY, 0);
if (srcfd < 0)
{
if (errno != ENOENT)
ereport(FATAL,
(errcode_for_file_access(),
errmsg("could not open \"%s\": %m", path)));
/* Not there, so assume parent has no parents */
}
else
{
for (;;)
{
errno = 0;
nbytes = (int) read(srcfd, buffer, sizeof(buffer));
if (nbytes < 0 || errno != 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not read file \"%s\": %m", path)));
if (nbytes == 0)
break;
errno = 0;
if ((int) write(fd, buffer, nbytes) != nbytes)
{
int save_errno = errno;
/*
* If we fail to make the file, delete it to release disk
* space
*/
unlink(tmppath);
/* if write didn't set errno, assume problem is no disk space */
errno = save_errno ? save_errno : ENOSPC;
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
}
}
close(srcfd);
}
/*
* Append one line with the details of this timeline split.
*
* If we did have a parent file, insert an extra newline just in case
* the parent file failed to end with one.
*/
XLogFileName(xlogfname, endTLI, endLogId, endLogSeg);
snprintf(buffer, sizeof(buffer),
"%s%u\t%s\t%s transaction %u at %s\n",
(srcfd < 0) ? "" : "\n",
parentTLI,
xlogfname,
recoveryStopAfter ? "after" : "before",
recoveryStopXid,
str_time(recoveryStopTime));
nbytes = strlen(buffer);
errno = 0;
if ((int) write(fd, buffer, nbytes) != nbytes)
{
int save_errno = errno;
/*
* If we fail to make the file, delete it to release disk
* space
*/
unlink(tmppath);
/* if write didn't set errno, assume problem is no disk space */
errno = save_errno ? save_errno : ENOSPC;
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", tmppath)));
}
if (pg_fsync(fd) != 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", tmppath)));
if (close(fd))
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not close file \"%s\": %m", tmppath)));
/*
* Now move the completed history file into place with its final name.
*/
TLHistoryFilePath(path, newTLI);
/*
* Prefer link() to rename() here just to be really sure that we don't
* overwrite an existing logfile. However, there shouldn't be one, so
* rename() is an acceptable substitute except for the truly paranoid.
*/
#if HAVE_WORKING_LINK
if (link(tmppath, path) < 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not link file \"%s\" to \"%s\": %m",
tmppath, path)));
unlink(tmppath);
#else
if (rename(tmppath, path) < 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not rename file \"%s\" to \"%s\": %m",
tmppath, path)));
#endif
/* The history file can be archived immediately. */
TLHistoryFileName(histfname, newTLI);
XLogArchiveNotify(histfname);
}
/*
* I/O routines for pg_control
* *
* *ControlFile is a buffer in shared memory that holds an image of the * *ControlFile is a buffer in shared memory that holds an image of the
* contents of pg_control. WriteControlFile() initializes pg_control * contents of pg_control. WriteControlFile() initializes pg_control
...@@ -2956,8 +3363,8 @@ BootStrapXLOG(void) ...@@ -2956,8 +3363,8 @@ BootStrapXLOG(void)
CheckPoint checkPoint; CheckPoint checkPoint;
char *buffer; char *buffer;
XLogPageHeader page; XLogPageHeader page;
XLogLongPageHeader longpage;
XLogRecord *record; XLogRecord *record;
XLogFileHeaderData *fhdr;
bool use_existent; bool use_existent;
uint64 sysidentifier; uint64 sysidentifier;
struct timeval tv; struct timeval tv;
...@@ -2979,6 +3386,9 @@ BootStrapXLOG(void) ...@@ -2979,6 +3386,9 @@ BootStrapXLOG(void)
sysidentifier = ((uint64) tv.tv_sec) << 32; sysidentifier = ((uint64) tv.tv_sec) << 32;
sysidentifier |= (uint32) (tv.tv_sec | tv.tv_usec); sysidentifier |= (uint32) (tv.tv_sec | tv.tv_usec);
/* First timeline ID is always 1 */
ThisTimeLineID = 1;
/* Use malloc() to ensure buffer is MAXALIGNED */ /* Use malloc() to ensure buffer is MAXALIGNED */
buffer = (char *) malloc(BLCKSZ); buffer = (char *) malloc(BLCKSZ);
page = (XLogPageHeader) buffer; page = (XLogPageHeader) buffer;
...@@ -2986,9 +3396,9 @@ BootStrapXLOG(void) ...@@ -2986,9 +3396,9 @@ BootStrapXLOG(void)
/* Set up information for the initial checkpoint record */ /* Set up information for the initial checkpoint record */
checkPoint.redo.xlogid = 0; checkPoint.redo.xlogid = 0;
checkPoint.redo.xrecoff = SizeOfXLogPHD + SizeOfXLogRecord + SizeOfXLogFHD; checkPoint.redo.xrecoff = SizeOfXLogLongPHD;
checkPoint.undo = checkPoint.redo; checkPoint.undo = checkPoint.redo;
checkPoint.ThisStartUpID = 0; checkPoint.ThisTimeLineID = ThisTimeLineID;
checkPoint.nextXid = FirstNormalTransactionId; checkPoint.nextXid = FirstNormalTransactionId;
checkPoint.nextOid = BootstrapObjectIdData; checkPoint.nextOid = BootstrapObjectIdData;
checkPoint.time = time(NULL); checkPoint.time = time(NULL);
...@@ -2999,38 +3409,18 @@ BootStrapXLOG(void) ...@@ -2999,38 +3409,18 @@ BootStrapXLOG(void)
/* Set up the XLOG page header */ /* Set up the XLOG page header */
page->xlp_magic = XLOG_PAGE_MAGIC; page->xlp_magic = XLOG_PAGE_MAGIC;
page->xlp_info = 0; page->xlp_info = XLP_LONG_HEADER;
page->xlp_sui = checkPoint.ThisStartUpID; page->xlp_tli = ThisTimeLineID;
page->xlp_pageaddr.xlogid = 0; page->xlp_pageaddr.xlogid = 0;
page->xlp_pageaddr.xrecoff = 0; page->xlp_pageaddr.xrecoff = 0;
longpage = (XLogLongPageHeader) page;
/* Insert the file header record */ longpage->xlp_sysid = sysidentifier;
record = (XLogRecord *) ((char *) page + SizeOfXLogPHD); longpage->xlp_seg_size = XLogSegSize;
record->xl_prev.xlogid = 0;
record->xl_prev.xrecoff = 0;
record->xl_xact_prev.xlogid = 0;
record->xl_xact_prev.xrecoff = 0;
record->xl_xid = InvalidTransactionId;
record->xl_len = SizeOfXLogFHD;
record->xl_info = XLOG_FILE_HEADER;
record->xl_rmid = RM_XLOG_ID;
fhdr = (XLogFileHeaderData *) XLogRecGetData(record);
fhdr->xlfhd_sysid = sysidentifier;
fhdr->xlfhd_xlogid = 0;
fhdr->xlfhd_segno = 0;
fhdr->xlfhd_seg_size = XLogSegSize;
INIT_CRC64(crc);
COMP_CRC64(crc, fhdr, SizeOfXLogFHD);
COMP_CRC64(crc, (char *) record + sizeof(crc64),
SizeOfXLogRecord - sizeof(crc64));
FIN_CRC64(crc);
record->xl_crc = crc;
/* Insert the initial checkpoint record */ /* Insert the initial checkpoint record */
record = (XLogRecord *) ((char *) page + SizeOfXLogPHD + SizeOfXLogRecord + SizeOfXLogFHD); record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD);
record->xl_prev.xlogid = 0; record->xl_prev.xlogid = 0;
record->xl_prev.xrecoff = SizeOfXLogPHD; record->xl_prev.xrecoff = 0;
record->xl_xact_prev.xlogid = 0; record->xl_xact_prev.xlogid = 0;
record->xl_xact_prev.xrecoff = 0; record->xl_xact_prev.xrecoff = 0;
record->xl_xid = InvalidTransactionId; record->xl_xid = InvalidTransactionId;
...@@ -3050,7 +3440,7 @@ BootStrapXLOG(void) ...@@ -3050,7 +3440,7 @@ BootStrapXLOG(void)
use_existent = false; use_existent = false;
openLogFile = XLogFileInit(0, 0, &use_existent, false); openLogFile = XLogFileInit(0, 0, &use_existent, false);
/* Write the first page with the initial records */ /* Write the first page with the initial record */
errno = 0; errno = 0;
if (write(openLogFile, buffer, BLCKSZ) != BLCKSZ) if (write(openLogFile, buffer, BLCKSZ) != BLCKSZ)
{ {
...@@ -3120,6 +3510,8 @@ readRecoveryCommandFile(void) ...@@ -3120,6 +3510,8 @@ readRecoveryCommandFile(void)
char recoveryCommandFile[MAXPGPATH]; char recoveryCommandFile[MAXPGPATH];
FILE *fd; FILE *fd;
char cmdline[MAXPGPATH]; char cmdline[MAXPGPATH];
TimeLineID rtli = 0;
bool rtliGiven = false;
bool syntaxError = false; bool syntaxError = false;
snprintf(recoveryCommandFile, MAXPGPATH, "%s/recovery.conf", DataDir); snprintf(recoveryCommandFile, MAXPGPATH, "%s/recovery.conf", DataDir);
...@@ -3177,11 +3569,31 @@ readRecoveryCommandFile(void) ...@@ -3177,11 +3569,31 @@ readRecoveryCommandFile(void)
} }
if (strcmp(tok1,"restore_command") == 0) { if (strcmp(tok1,"restore_command") == 0) {
StrNCpy(recoveryRestoreCommand, tok2, MAXPGPATH); recoveryRestoreCommand = pstrdup(tok2);
ereport(LOG, ereport(LOG,
(errmsg("restore_command = \"%s\"", (errmsg("restore_command = \"%s\"",
recoveryRestoreCommand))); recoveryRestoreCommand)));
} }
else if (strcmp(tok1,"recovery_target_timeline") == 0) {
rtliGiven = true;
if (strcmp(tok2, "latest") == 0)
rtli = 0;
else
{
errno = 0;
rtli = (TimeLineID) strtoul(tok2, NULL, 0);
if (errno == EINVAL || errno == ERANGE)
ereport(FATAL,
(errmsg("recovery_target_timeline is not a valid number: \"%s\"",
tok2)));
}
if (rtli)
ereport(LOG,
(errmsg("recovery_target_timeline = %u", rtli)));
else
ereport(LOG,
(errmsg("recovery_target_timeline = latest")));
}
else if (strcmp(tok1,"recovery_target_xid") == 0) { else if (strcmp(tok1,"recovery_target_xid") == 0) {
errno = 0; errno = 0;
recoveryTargetXid = (TransactionId) strtoul(tok2, NULL, 0); recoveryTargetXid = (TransactionId) strtoul(tok2, NULL, 0);
...@@ -3246,22 +3658,44 @@ readRecoveryCommandFile(void) ...@@ -3246,22 +3658,44 @@ readRecoveryCommandFile(void)
errhint("Lines should have the format parameter = 'value'."))); errhint("Lines should have the format parameter = 'value'.")));
/* Check that required parameters were supplied */ /* Check that required parameters were supplied */
if (recoveryRestoreCommand[0] == '\0') if (recoveryRestoreCommand == NULL)
ereport(FATAL, ereport(FATAL,
(errmsg("recovery command file \"%s\" did not specify restore_command", (errmsg("recovery command file \"%s\" did not specify restore_command",
recoveryCommandFile))); recoveryCommandFile)));
/* Enable fetching from archive recovery area */
InArchiveRecovery = true;
/* /*
* clearly indicate our state * If user specified recovery_target_timeline, validate it or compute the
* "latest" value. We can't do this until after we've gotten the restore
* command and set InArchiveRecovery, because we need to fetch timeline
* history files from the archive.
*/ */
InArchiveRecovery = true; if (rtliGiven)
{
if (rtli)
{
/* Timeline 1 does not have a history file, all else should */
if (rtli != 1 && !existsTimeLineHistory(rtli))
ereport(FATAL,
(errmsg("recovery_target_timeline %u does not exist",
rtli)));
recoveryTargetTLI = rtli;
}
else
{
/* We start the "latest" search from pg_control's timeline */
recoveryTargetTLI = findNewestTimeLine(recoveryTargetTLI);
}
}
} }
/* /*
* Exit archive-recovery state * Exit archive-recovery state
*/ */
static void static void
exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff) exitArchiveRecovery(TimeLineID endTLI, uint32 endLogId, uint32 endLogSeg)
{ {
char recoveryPath[MAXPGPATH]; char recoveryPath[MAXPGPATH];
char xlogpath[MAXPGPATH]; char xlogpath[MAXPGPATH];
...@@ -3269,7 +3703,7 @@ exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff) ...@@ -3269,7 +3703,7 @@ exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff)
char recoveryCommandDone[MAXPGPATH]; char recoveryCommandDone[MAXPGPATH];
/* /*
* Disable fetches from archive, so we can use XLogFileOpen below. * We are no longer in archive recovery state.
*/ */
InArchiveRecovery = false; InArchiveRecovery = false;
...@@ -3294,10 +3728,12 @@ exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff) ...@@ -3294,10 +3728,12 @@ exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff)
* more descriptive of what our current database state is, because that * more descriptive of what our current database state is, because that
* is what we replayed from. * is what we replayed from.
* *
* XXX there ought to be a timeline increment somewhere around here. * Note that if we are establishing a new timeline, ThisTimeLineID is
* already set to the new value, and so we will create a new file instead
* of overwriting any existing file.
*/ */
snprintf(recoveryPath, MAXPGPATH, "%s/RECOVERYXLOG", XLogDir); snprintf(recoveryPath, MAXPGPATH, "%s/RECOVERYXLOG", XLogDir);
XLogFilePath(xlogpath, endLogId, endLogSeg); XLogFilePath(xlogpath, ThisTimeLineID, endLogId, endLogSeg);
if (restoredFromArchive) if (restoredFromArchive)
{ {
...@@ -3319,61 +3755,26 @@ exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff) ...@@ -3319,61 +3755,26 @@ exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff)
* RECOVERYXLOG laying about, get rid of it. * RECOVERYXLOG laying about, get rid of it.
*/ */
unlink(recoveryPath); /* ignore any error */ unlink(recoveryPath); /* ignore any error */
/*
* If we are establishing a new timeline, we have to copy data
* from the last WAL segment of the old timeline to create a
* starting WAL segment for the new timeline.
*/
if (endTLI != ThisTimeLineID)
XLogFileCopy(endLogId, endLogSeg,
endTLI, endLogId, endLogSeg);
} }
/* /*
* If we restored to a point-in-time, then the current WAL segment * Let's just make real sure there are not .ready or .done flags posted
* probably contains records beyond the stop point. These represent an * for the new segment.
* extreme hazard: if we crash in the near future, the replay apparatus
* will know no reason why it shouldn't replay them. Therefore,
* explicitly zero out all the remaining pages of the segment. (We need
* not worry about the partial page in which the last record ends, since
* StartUpXlog will handle zeroing that. Also, there's nothing to do
* if we are right at a segment boundary.)
*
* XXX segment files beyond thhe current one also represent a hazard
* for the same reason. Need to invent timelines to fix this.
*/ */
XLogFileName(xlogpath, ThisTimeLineID, endLogId, endLogSeg);
XLogArchiveCleanup(xlogpath);
/* align xrecoff to next page, then drop segment part */ /* Get rid of any remaining recovered timeline-history file, too */
if (xrecoff % BLCKSZ != 0) snprintf(recoveryPath, MAXPGPATH, "%s/RECOVERYHISTORY", XLogDir);
xrecoff += (BLCKSZ - xrecoff % BLCKSZ); unlink(recoveryPath); /* ignore any error */
xrecoff %= XLogSegSize;
if (recoveryTarget && xrecoff != 0)
{
int fd;
char zbuffer[BLCKSZ];
fd = XLogFileOpen(endLogId, endLogSeg, false);
MemSet(zbuffer, 0, sizeof(zbuffer));
if (lseek(fd, (off_t) xrecoff, SEEK_SET) < 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not seek in file \"%s\": %m",
xlogpath)));
for (; xrecoff < XLogSegSize; xrecoff += sizeof(zbuffer))
{
errno = 0;
if ((int) write(fd, zbuffer, sizeof(zbuffer)) != (int) sizeof(zbuffer))
{
/* if write didn't set errno, assume problem is no disk space */
if (errno == 0)
errno = ENOSPC;
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not write to file \"%s\": %m", xlogpath)));
}
}
if (pg_fsync(fd) != 0)
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not fsync file \"%s\": %m", xlogpath)));
if (close(fd))
ereport(PANIC,
(errcode_for_file_access(),
errmsg("could not close file \"%s\": %m", xlogpath)));
}
/* /*
* Rename the config file out of the way, so that we don't accidentally * Rename the config file out of the way, so that we don't accidentally
...@@ -3398,6 +3799,8 @@ exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff) ...@@ -3398,6 +3799,8 @@ exitArchiveRecovery(uint32 endLogId, uint32 endLogSeg, uint32 xrecoff)
* *
* Returns TRUE if we are stopping, FALSE otherwise. On TRUE return, * Returns TRUE if we are stopping, FALSE otherwise. On TRUE return,
* *includeThis is set TRUE if we should apply this record before stopping. * *includeThis is set TRUE if we should apply this record before stopping.
* Also, some information is saved in recoveryStopXid et al for use in
* annotating the new timeline's history file.
*/ */
static bool static bool
recoveryStopsHere(XLogRecord *record, bool *includeThis) recoveryStopsHere(XLogRecord *record, bool *includeThis)
...@@ -3466,27 +3869,31 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis) ...@@ -3466,27 +3869,31 @@ recoveryStopsHere(XLogRecord *record, bool *includeThis)
if (stopsHere) if (stopsHere)
{ {
recoveryStopXid = record->xl_xid;
recoveryStopTime = recordXtime;
recoveryStopAfter = *includeThis;
if (record_info == XLOG_XACT_COMMIT) if (record_info == XLOG_XACT_COMMIT)
{ {
if (*includeThis) if (recoveryStopAfter)
ereport(LOG, ereport(LOG,
(errmsg("recovery stopping after commit of transaction %u, time %s", (errmsg("recovery stopping after commit of transaction %u, time %s",
record->xl_xid, str_time(recordXtime)))); recoveryStopXid, str_time(recoveryStopTime))));
else else
ereport(LOG, ereport(LOG,
(errmsg("recovery stopping before commit of transaction %u, time %s", (errmsg("recovery stopping before commit of transaction %u, time %s",
record->xl_xid, str_time(recordXtime)))); recoveryStopXid, str_time(recoveryStopTime))));
} }
else else
{ {
if (*includeThis) if (recoveryStopAfter)
ereport(LOG, ereport(LOG,
(errmsg("recovery stopping after abort of transaction %u, time %s", (errmsg("recovery stopping after abort of transaction %u, time %s",
record->xl_xid, str_time(recordXtime)))); recoveryStopXid, str_time(recoveryStopTime))));
else else
ereport(LOG, ereport(LOG,
(errmsg("recovery stopping before abort of transaction %u, time %s", (errmsg("recovery stopping before abort of transaction %u, time %s",
record->xl_xid, str_time(recordXtime)))); recoveryStopXid, str_time(recoveryStopTime))));
} }
} }
...@@ -3502,6 +3909,7 @@ StartupXLOG(void) ...@@ -3502,6 +3909,7 @@ StartupXLOG(void)
XLogCtlInsert *Insert; XLogCtlInsert *Insert;
CheckPoint checkPoint; CheckPoint checkPoint;
bool wasShutdown; bool wasShutdown;
bool needNewTimeLine = false;
XLogRecPtr RecPtr, XLogRecPtr RecPtr,
LastRec, LastRec,
checkPointLoc, checkPointLoc,
...@@ -3557,12 +3965,21 @@ StartupXLOG(void) ...@@ -3557,12 +3965,21 @@ StartupXLOG(void)
pg_usleep(60000000L); pg_usleep(60000000L);
#endif #endif
/*
* Initialize on the assumption we want to recover to the same timeline
* that's active according to pg_control.
*/
recoveryTargetTLI = ControlFile->checkPointCopy.ThisTimeLineID;
/* /*
* Check for recovery control file, and if so set up state for * Check for recovery control file, and if so set up state for
* offline recovery * offline recovery
*/ */
readRecoveryCommandFile(); readRecoveryCommandFile();
/* Now we can determine the list of expected TLIs */
expectedTLIs = readTimeLineHistory(recoveryTargetTLI);
/* /*
* Get the last valid checkpoint record. If the latest one according * Get the last valid checkpoint record. If the latest one according
* to pg_control is broken, try the next-to-last one. * to pg_control is broken, try the next-to-last one.
...@@ -3611,17 +4028,11 @@ StartupXLOG(void) ...@@ -3611,17 +4028,11 @@ StartupXLOG(void)
ShmemVariableCache->oidCount = 0; ShmemVariableCache->oidCount = 0;
/* /*
* If it was a shutdown checkpoint, then any following WAL entries * We must replay WAL entries using the same TimeLineID they were created
* were created under the next StartUpID; if it was a regular * under, so temporarily adopt the TLI indicated by the checkpoint (see
* checkpoint then any following WAL entries were created under the * also xlog_redo()).
* same StartUpID. We must replay WAL entries using the same StartUpID
* they were created under, so temporarily adopt that SUI (see also
* xlog_redo()).
*/ */
if (wasShutdown) ThisTimeLineID = checkPoint.ThisTimeLineID;
ThisStartUpID = checkPoint.ThisStartUpID + 1;
else
ThisStartUpID = checkPoint.ThisStartUpID;
RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo; RedoRecPtr = XLogCtl->Insert.RedoRecPtr = checkPoint.redo;
...@@ -3663,12 +4074,18 @@ StartupXLOG(void) ...@@ -3663,12 +4074,18 @@ StartupXLOG(void)
RmgrTable[rmid].rm_startup(); RmgrTable[rmid].rm_startup();
} }
/* Is REDO required ? */ /*
* Find the first record that logically follows the checkpoint ---
* it might physically precede it, though.
*/
if (XLByteLT(checkPoint.redo, RecPtr)) if (XLByteLT(checkPoint.redo, RecPtr))
{
/* back up to find the record */
record = ReadRecord(&(checkPoint.redo), PANIC, buffer); record = ReadRecord(&(checkPoint.redo), PANIC, buffer);
}
else else
{ {
/* read past CheckPoint record */ /* just have to read next record after CheckPoint */
record = ReadRecord(NULL, LOG, buffer); record = ReadRecord(NULL, LOG, buffer);
} }
...@@ -3708,6 +4125,7 @@ StartupXLOG(void) ...@@ -3708,6 +4125,7 @@ StartupXLOG(void)
*/ */
if (recoveryStopsHere(record, &recoveryApply)) if (recoveryStopsHere(record, &recoveryApply))
{ {
needNewTimeLine = true; /* see below */
recoveryContinue = false; recoveryContinue = false;
if (!recoveryApply) if (!recoveryApply)
break; break;
...@@ -3752,6 +4170,26 @@ StartupXLOG(void) ...@@ -3752,6 +4170,26 @@ StartupXLOG(void)
EndOfLog = EndRecPtr; EndOfLog = EndRecPtr;
XLByteToPrevSeg(EndOfLog, endLogId, endLogSeg); XLByteToPrevSeg(EndOfLog, endLogId, endLogSeg);
/*
* Consider whether we need to assign a new timeline ID.
*
* If we stopped short of the end of WAL during recovery, then we
* are generating a new timeline and must assign it a unique new ID.
* Otherwise, we can just extend the timeline we were in when we
* ran out of WAL.
*/
if (needNewTimeLine)
{
ThisTimeLineID = findNewestTimeLine(recoveryTargetTLI) + 1;
ereport(LOG,
(errmsg("selected new timeline ID: %u", ThisTimeLineID)));
writeTimeLineHistory(ThisTimeLineID, recoveryTargetTLI,
curFileTLI, endLogId, endLogSeg);
}
/* Save the selected TimeLineID in shared memory, too */
XLogCtl->ThisTimeLineID = ThisTimeLineID;
/* /*
* We are now done reading the old WAL. Turn off archive fetching * We are now done reading the old WAL. Turn off archive fetching
* if it was active, and make a writable copy of the last WAL segment. * if it was active, and make a writable copy of the last WAL segment.
...@@ -3759,7 +4197,7 @@ StartupXLOG(void) ...@@ -3759,7 +4197,7 @@ StartupXLOG(void)
* readBuf; we will use that below.) * readBuf; we will use that below.)
*/ */
if (InArchiveRecovery) if (InArchiveRecovery)
exitArchiveRecovery(endLogId, endLogSeg, EndOfLog.xrecoff); exitArchiveRecovery(curFileTLI, endLogId, endLogSeg);
/* /*
* Prepare to write WAL starting at EndOfLog position, and init xlog * Prepare to write WAL starting at EndOfLog position, and init xlog
...@@ -3768,7 +4206,7 @@ StartupXLOG(void) ...@@ -3768,7 +4206,7 @@ StartupXLOG(void)
*/ */
openLogId = endLogId; openLogId = endLogId;
openLogSeg = endLogSeg; openLogSeg = endLogSeg;
openLogFile = XLogFileOpen(openLogId, openLogSeg, false); openLogFile = XLogFileOpen(openLogId, openLogSeg);
openLogOff = 0; openLogOff = 0;
ControlFile->logId = openLogId; ControlFile->logId = openLogId;
ControlFile->logSeg = openLogSeg + 1; ControlFile->logSeg = openLogSeg + 1;
...@@ -3812,9 +4250,8 @@ StartupXLOG(void) ...@@ -3812,9 +4250,8 @@ StartupXLOG(void)
* XLogWrite()). * XLogWrite()).
* *
* Note: it might seem we should do AdvanceXLInsertBuffer() here, but * Note: it might seem we should do AdvanceXLInsertBuffer() here, but
* we can't since we haven't yet determined the correct StartUpID * this is sufficient. The first actual attempt to insert a log
* to put into the new page's header. The first actual attempt to * record will advance the insert state.
* insert a log record will advance the insert state.
*/ */
XLogCtl->Write.curridx = NextBufIdx(0); XLogCtl->Write.curridx = NextBufIdx(0);
} }
...@@ -3860,22 +4297,15 @@ StartupXLOG(void) ...@@ -3860,22 +4297,15 @@ StartupXLOG(void)
RmgrTable[rmid].rm_cleanup(); RmgrTable[rmid].rm_cleanup();
} }
/*
* At this point, ThisStartUpID is the largest SUI that we could
* find evidence for in the WAL entries. But check it against
* pg_control's latest checkpoint, to make sure that we can't
* accidentally re-use an already-used SUI.
*/
if (ThisStartUpID < ControlFile->checkPointCopy.ThisStartUpID)
ThisStartUpID = ControlFile->checkPointCopy.ThisStartUpID;
/* /*
* Perform a new checkpoint to update our recovery activity to * Perform a new checkpoint to update our recovery activity to
* disk. * disk.
* *
* Note that we write a shutdown checkpoint. This is correct since * Note that we write a shutdown checkpoint rather than an on-line
* the records following it will use SUI one more than what is * one. This is not particularly critical, but since we may be
* shown in the checkpoint's ThisStartUpID. * assigning a new TLI, using a shutdown checkpoint allows us to
* have the rule that TLI only changes in shutdown checkpoints,
* which allows some extra error checking in xlog_redo.
* *
* In case we had to use the secondary checkpoint, make sure that it * In case we had to use the secondary checkpoint, make sure that it
* will still be shown as the secondary checkpoint after this * will still be shown as the secondary checkpoint after this
...@@ -3890,31 +4320,12 @@ StartupXLOG(void) ...@@ -3890,31 +4320,12 @@ StartupXLOG(void)
*/ */
XLogCloseRelationCache(); XLogCloseRelationCache();
} }
else
{
/*
* If we are not doing recovery, then we saw a checkpoint with
* nothing after it, and we can safely use StartUpID equal to one
* more than the checkpoint's SUI. But just for paranoia's sake,
* check against pg_control too.
*/
ThisStartUpID = checkPoint.ThisStartUpID;
if (ThisStartUpID < ControlFile->checkPointCopy.ThisStartUpID)
ThisStartUpID = ControlFile->checkPointCopy.ThisStartUpID;
}
/* /*
* Preallocate additional log files, if wanted. * Preallocate additional log files, if wanted.
*/ */
PreallocXlogFiles(EndOfLog); PreallocXlogFiles(EndOfLog);
/*
* Advance StartUpID to one more than the highest value used
* previously.
*/
ThisStartUpID++;
XLogCtl->ThisStartUpID = ThisStartUpID;
/* /*
* Okay, we're officially UP. * Okay, we're officially UP.
*/ */
...@@ -4018,18 +4429,18 @@ ReadCheckpointRecord(XLogRecPtr RecPtr, ...@@ -4018,18 +4429,18 @@ ReadCheckpointRecord(XLogRecPtr RecPtr,
/* /*
* This must be called during startup of a backend process, except that * This must be called during startup of a backend process, except that
* it need not be called in a standalone backend (which does StartupXLOG * it need not be called in a standalone backend (which does StartupXLOG
* instead). We need to initialize the local copies of ThisStartUpID and * instead). We need to initialize the local copies of ThisTimeLineID and
* RedoRecPtr. * RedoRecPtr.
* *
* Note: before Postgres 7.5, we went to some effort to keep the postmaster * Note: before Postgres 7.5, we went to some effort to keep the postmaster
* process's copies of ThisStartUpID and RedoRecPtr valid too. This was * process's copies of ThisTimeLineID and RedoRecPtr valid too. This was
* unnecessary however, since the postmaster itself never touches XLOG anyway. * unnecessary however, since the postmaster itself never touches XLOG anyway.
*/ */
void void
InitXLOGAccess(void) InitXLOGAccess(void)
{ {
/* ThisStartUpID doesn't change so we need no lock to copy it */ /* ThisTimeLineID doesn't change so we need no lock to copy it */
ThisStartUpID = XLogCtl->ThisStartUpID; ThisTimeLineID = XLogCtl->ThisTimeLineID;
/* Use GetRedoRecPtr to copy the RedoRecPtr safely */ /* Use GetRedoRecPtr to copy the RedoRecPtr safely */
(void) GetRedoRecPtr(); (void) GetRedoRecPtr();
} }
...@@ -4110,7 +4521,7 @@ CreateCheckPoint(bool shutdown, bool force) ...@@ -4110,7 +4521,7 @@ CreateCheckPoint(bool shutdown, bool force)
} }
MemSet(&checkPoint, 0, sizeof(checkPoint)); MemSet(&checkPoint, 0, sizeof(checkPoint));
checkPoint.ThisStartUpID = ThisStartUpID; checkPoint.ThisTimeLineID = ThisTimeLineID;
checkPoint.time = time(NULL); checkPoint.time = time(NULL);
LWLockAcquire(WALInsertLock, LW_EXCLUSIVE); LWLockAcquire(WALInsertLock, LW_EXCLUSIVE);
...@@ -4372,8 +4783,20 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) ...@@ -4372,8 +4783,20 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
ShmemVariableCache->nextXid = checkPoint.nextXid; ShmemVariableCache->nextXid = checkPoint.nextXid;
ShmemVariableCache->nextOid = checkPoint.nextOid; ShmemVariableCache->nextOid = checkPoint.nextOid;
ShmemVariableCache->oidCount = 0; ShmemVariableCache->oidCount = 0;
/* Any later WAL records should be run with shutdown SUI plus 1 */ /*
ThisStartUpID = checkPoint.ThisStartUpID + 1; * TLI may change in a shutdown checkpoint, but it shouldn't decrease
*/
if (checkPoint.ThisTimeLineID != ThisTimeLineID)
{
if (checkPoint.ThisTimeLineID < ThisTimeLineID ||
!list_member_int(expectedTLIs,
(int) checkPoint.ThisTimeLineID))
ereport(PANIC,
(errmsg("unexpected timeline ID %u (after %u) in checkpoint record",
checkPoint.ThisTimeLineID, ThisTimeLineID)));
/* Following WAL records should be run with new TLI */
ThisTimeLineID = checkPoint.ThisTimeLineID;
}
} }
else if (info == XLOG_CHECKPOINT_ONLINE) else if (info == XLOG_CHECKPOINT_ONLINE)
{ {
...@@ -4389,40 +4812,11 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record) ...@@ -4389,40 +4812,11 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
ShmemVariableCache->nextOid = checkPoint.nextOid; ShmemVariableCache->nextOid = checkPoint.nextOid;
ShmemVariableCache->oidCount = 0; ShmemVariableCache->oidCount = 0;
} }
/* Any later WAL records should be run with the then-active SUI */ /* TLI should not change in an on-line checkpoint */
ThisStartUpID = checkPoint.ThisStartUpID; if (checkPoint.ThisTimeLineID != ThisTimeLineID)
}
else if (info == XLOG_FILE_HEADER)
{
XLogFileHeaderData fhdr;
memcpy(&fhdr, XLogRecGetData(record), sizeof(XLogFileHeaderData));
if (fhdr.xlfhd_sysid != ControlFile->system_identifier)
{
char fhdrident_str[32];
char sysident_str[32];
/*
* Format sysids separately to keep platform-dependent format
* code out of the translatable message string.
*/
snprintf(fhdrident_str, sizeof(fhdrident_str), UINT64_FORMAT,
fhdr.xlfhd_sysid);
snprintf(sysident_str, sizeof(sysident_str), UINT64_FORMAT,
ControlFile->system_identifier);
ereport(PANIC,
(errmsg("WAL file is from different system"),
errdetail("WAL file SYSID is %s, pg_control SYSID is %s",
fhdrident_str, sysident_str)));
}
if (fhdr.xlfhd_seg_size != XLogSegSize)
ereport(PANIC, ereport(PANIC,
(errmsg("WAL file is from different system"), (errmsg("unexpected timeline ID %u (should be %u) in checkpoint record",
errdetail("Incorrect XLOG_SEG_SIZE in file header."))); checkPoint.ThisTimeLineID, ThisTimeLineID)));
}
else if (info == XLOG_WASTED_SPACE)
{
/* ignore */
} }
} }
...@@ -4442,10 +4836,10 @@ xlog_desc(char *buf, uint8 xl_info, char *rec) ...@@ -4442,10 +4836,10 @@ xlog_desc(char *buf, uint8 xl_info, char *rec)
CheckPoint *checkpoint = (CheckPoint *) rec; CheckPoint *checkpoint = (CheckPoint *) rec;
sprintf(buf + strlen(buf), "checkpoint: redo %X/%X; undo %X/%X; " sprintf(buf + strlen(buf), "checkpoint: redo %X/%X; undo %X/%X; "
"sui %u; xid %u; oid %u; %s", "tli %u; xid %u; oid %u; %s",
checkpoint->redo.xlogid, checkpoint->redo.xrecoff, checkpoint->redo.xlogid, checkpoint->redo.xrecoff,
checkpoint->undo.xlogid, checkpoint->undo.xrecoff, checkpoint->undo.xlogid, checkpoint->undo.xrecoff,
checkpoint->ThisStartUpID, checkpoint->nextXid, checkpoint->ThisTimeLineID, checkpoint->nextXid,
checkpoint->nextOid, checkpoint->nextOid,
(info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online"); (info == XLOG_CHECKPOINT_SHUTDOWN) ? "shutdown" : "online");
} }
...@@ -4456,22 +4850,6 @@ xlog_desc(char *buf, uint8 xl_info, char *rec) ...@@ -4456,22 +4850,6 @@ xlog_desc(char *buf, uint8 xl_info, char *rec)
memcpy(&nextOid, rec, sizeof(Oid)); memcpy(&nextOid, rec, sizeof(Oid));
sprintf(buf + strlen(buf), "nextOid: %u", nextOid); sprintf(buf + strlen(buf), "nextOid: %u", nextOid);
} }
else if (info == XLOG_FILE_HEADER)
{
XLogFileHeaderData *fhdr = (XLogFileHeaderData *) rec;
sprintf(buf + strlen(buf),
"file header: sysid " UINT64_FORMAT "; "
"xlogid %X segno %X; seg_size %X",
fhdr->xlfhd_sysid,
fhdr->xlfhd_xlogid,
fhdr->xlfhd_segno,
fhdr->xlfhd_seg_size);
}
else if (info == XLOG_WASTED_SPACE)
{
strcat(buf, "wasted space");
}
else else
strcat(buf, "UNKNOWN"); strcat(buf, "UNKNOWN");
} }
......
...@@ -2,169 +2,31 @@ ...@@ -2,169 +2,31 @@
* *
* xlogutils.c * xlogutils.c
* *
* PostgreSQL transaction log manager utility routines
*
* This file contains support routines that are used by XLOG replay functions.
* None of this code is used during normal system operation.
*
* *
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.31 2004/06/18 06:13:15 tgl Exp $ * $PostgreSQL: pgsql/src/backend/access/transam/xlogutils.c,v 1.32 2004/07/21 22:31:20 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#include "postgres.h" #include "postgres.h"
#include "access/htup.h"
#include "access/xlogutils.h" #include "access/xlogutils.h"
#include "catalog/pg_database.h" #include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "storage/smgr.h" #include "storage/smgr.h"
#include "utils/hsearch.h" #include "utils/hsearch.h"
#include "utils/relcache.h"
/*
* ---------------------------------------------------------------
*
* Index support functions
*
*----------------------------------------------------------------
*/
/*
* Check if specified heap tuple was inserted by given
* xaction/command and return
*
* - -1 if not
* - 0 if there is no tuple at all
* - 1 if yes
*/
int
XLogIsOwnerOfTuple(RelFileNode hnode, ItemPointer iptr,
TransactionId xid, CommandId cid)
{
Relation reln;
Buffer buffer;
Page page;
ItemId lp;
HeapTupleHeader htup;
reln = XLogOpenRelation(false, RM_HEAP_ID, hnode);
if (!RelationIsValid(reln))
return (0);
buffer = ReadBuffer(reln, ItemPointerGetBlockNumber(iptr));
if (!BufferIsValid(buffer))
return (0);
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = (Page) BufferGetPage(buffer);
if (PageIsNew((PageHeader) page) ||
ItemPointerGetOffsetNumber(iptr) > PageGetMaxOffsetNumber(page))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
return (0);
}
lp = PageGetItemId(page, ItemPointerGetOffsetNumber(iptr));
if (!ItemIdIsUsed(lp) || ItemIdDeleted(lp))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
return (0);
}
htup = (HeapTupleHeader) PageGetItem(page, lp);
Assert(PageGetSUI(page) == ThisStartUpID);
if (!TransactionIdEquals(HeapTupleHeaderGetXmin(htup), xid) ||
HeapTupleHeaderGetCmin(htup) != cid)
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
return (-1);
}
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
return (1);
}
/* /*
* MUST BE CALLED ONLY ON RECOVERY.
*
* Check if exists valid (inserted by not aborted xaction) heap tuple
* for given item pointer
*/
bool
XLogIsValidTuple(RelFileNode hnode, ItemPointer iptr)
{
Relation reln;
Buffer buffer;
Page page;
ItemId lp;
HeapTupleHeader htup;
reln = XLogOpenRelation(false, RM_HEAP_ID, hnode);
if (!RelationIsValid(reln))
return (false);
buffer = ReadBuffer(reln, ItemPointerGetBlockNumber(iptr));
if (!BufferIsValid(buffer))
return (false);
LockBuffer(buffer, BUFFER_LOCK_SHARE);
page = (Page) BufferGetPage(buffer);
if (PageIsNew((PageHeader) page) ||
ItemPointerGetOffsetNumber(iptr) > PageGetMaxOffsetNumber(page))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
return (false);
}
if (PageGetSUI(page) != ThisStartUpID)
{
Assert(PageGetSUI(page) < ThisStartUpID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
return (true);
}
lp = PageGetItemId(page, ItemPointerGetOffsetNumber(iptr));
if (!ItemIdIsUsed(lp) || ItemIdDeleted(lp))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
return (false);
}
htup = (HeapTupleHeader) PageGetItem(page, lp);
/* MUST CHECK WASN'T TUPLE INSERTED IN PREV STARTUP */
if (!(htup->t_infomask & HEAP_XMIN_COMMITTED))
{
if (htup->t_infomask & HEAP_XMIN_INVALID ||
(htup->t_infomask & HEAP_MOVED_IN &&
TransactionIdDidAbort(HeapTupleHeaderGetXvac(htup))) ||
TransactionIdDidAbort(HeapTupleHeaderGetXmin(htup)))
{
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
return (false);
}
}
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
ReleaseBuffer(buffer);
return (true);
}
/*
* ---------------------------------------------------------------
* *
* Storage related support functions * Storage related support functions
* *
*----------------------------------------------------------------
*/ */
Buffer Buffer
...@@ -198,8 +60,10 @@ XLogReadBuffer(bool extend, Relation reln, BlockNumber blkno) ...@@ -198,8 +60,10 @@ XLogReadBuffer(bool extend, Relation reln, BlockNumber blkno)
return (buffer); return (buffer);
} }
/* /*
* "Relation" cache * Lightweight "Relation" cache --- this substitutes for the normal relcache
* during XLOG replay.
*/ */
typedef struct XLogRelDesc typedef struct XLogRelDesc
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.113 2004/07/12 05:37:03 tgl Exp $ * $PostgreSQL: pgsql/src/backend/commands/sequence.c,v 1.114 2004/07/21 22:31:21 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -279,7 +279,7 @@ DefineSequence(CreateSeqStmt *seq) ...@@ -279,7 +279,7 @@ DefineSequence(CreateSeqStmt *seq)
recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata); recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
} }
END_CRIT_SECTION(); END_CRIT_SECTION();
...@@ -354,7 +354,7 @@ AlterSequence(AlterSeqStmt *stmt) ...@@ -354,7 +354,7 @@ AlterSequence(AlterSeqStmt *stmt)
recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata); recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
} }
END_CRIT_SECTION(); END_CRIT_SECTION();
...@@ -553,7 +553,7 @@ nextval(PG_FUNCTION_ARGS) ...@@ -553,7 +553,7 @@ nextval(PG_FUNCTION_ARGS)
recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata); recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
} }
/* update on-disk data */ /* update on-disk data */
...@@ -689,7 +689,7 @@ do_setval(RangeVar *sequence, int64 next, bool iscalled) ...@@ -689,7 +689,7 @@ do_setval(RangeVar *sequence, int64 next, bool iscalled)
recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata); recptr = XLogInsert(RM_SEQ_ID, XLOG_SEQ_LOG | XLOG_NO_TRAN, rdata);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
} }
/* save info in sequence relation */ /* save info in sequence relation */
...@@ -1091,7 +1091,7 @@ seq_redo(XLogRecPtr lsn, XLogRecord *record) ...@@ -1091,7 +1091,7 @@ seq_redo(XLogRecPtr lsn, XLogRecord *record)
elog(PANIC, "seq_redo: failed to add item to page"); elog(PANIC, "seq_redo: failed to add item to page");
PageSetLSN(page, lsn); PageSetLSN(page, lsn);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
LockBuffer(buffer, BUFFER_LOCK_UNLOCK); LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
WriteBuffer(buffer); WriteBuffer(buffer);
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.121 2004/07/19 02:47:06 tgl Exp $ * $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.122 2004/07/21 22:31:21 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -5448,7 +5448,7 @@ copy_relation_data(Relation rel, SMgrRelation dst) ...@@ -5448,7 +5448,7 @@ copy_relation_data(Relation rel, SMgrRelation dst)
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata); recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
END_CRIT_SECTION(); END_CRIT_SECTION();
} }
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.283 2004/07/20 22:56:29 momjian Exp $ * $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.284 2004/07/21 22:31:21 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -2341,7 +2341,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel, ...@@ -2341,7 +2341,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
recptr = log_heap_clean(onerel, buf, unused, uncnt); recptr = log_heap_clean(onerel, buf, unused, uncnt);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
} }
else else
{ {
...@@ -2491,10 +2491,10 @@ move_chain_tuple(Relation rel, ...@@ -2491,10 +2491,10 @@ move_chain_tuple(Relation rel,
if (old_buf != dst_buf) if (old_buf != dst_buf)
{ {
PageSetLSN(old_page, recptr); PageSetLSN(old_page, recptr);
PageSetSUI(old_page, ThisStartUpID); PageSetTLI(old_page, ThisTimeLineID);
} }
PageSetLSN(dst_page, recptr); PageSetLSN(dst_page, recptr);
PageSetSUI(dst_page, ThisStartUpID); PageSetTLI(dst_page, ThisTimeLineID);
} }
else else
{ {
...@@ -2611,9 +2611,9 @@ move_plain_tuple(Relation rel, ...@@ -2611,9 +2611,9 @@ move_plain_tuple(Relation rel,
dst_buf, &newtup); dst_buf, &newtup);
PageSetLSN(old_page, recptr); PageSetLSN(old_page, recptr);
PageSetSUI(old_page, ThisStartUpID); PageSetTLI(old_page, ThisTimeLineID);
PageSetLSN(dst_page, recptr); PageSetLSN(dst_page, recptr);
PageSetSUI(dst_page, ThisStartUpID); PageSetTLI(dst_page, ThisTimeLineID);
} }
else else
{ {
...@@ -2807,7 +2807,7 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage) ...@@ -2807,7 +2807,7 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage)
recptr = log_heap_clean(onerel, buffer, unused, uncnt); recptr = log_heap_clean(onerel, buffer, unused, uncnt);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
} }
else else
{ {
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.42 2004/06/05 19:48:07 tgl Exp $ * $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.43 2004/07/21 22:31:21 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -532,7 +532,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, ...@@ -532,7 +532,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
recptr = log_heap_clean(onerel, buffer, unused, uncnt); recptr = log_heap_clean(onerel, buffer, unused, uncnt);
PageSetLSN(page, recptr); PageSetLSN(page, recptr);
PageSetSUI(page, ThisStartUpID); PageSetTLI(page, ThisTimeLineID);
} }
else else
{ {
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/postmaster/pgarch.c,v 1.1 2004/07/19 02:47:08 tgl Exp $ * $PostgreSQL: pgsql/src/backend/postmaster/pgarch.c,v 1.2 2004/07/21 22:31:22 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -31,9 +31,10 @@ ...@@ -31,9 +31,10 @@
#include <sys/time.h> #include <sys/time.h>
#include <unistd.h> #include <unistd.h>
#include "postmaster/pgarch.h" #include "access/xlog_internal.h"
#include "libpq/pqsignal.h" #include "libpq/pqsignal.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "postmaster/pgarch.h"
#include "postmaster/postmaster.h" #include "postmaster/postmaster.h"
#include "storage/fd.h" #include "storage/fd.h"
#include "storage/ipc.h" #include "storage/ipc.h"
...@@ -63,8 +64,8 @@ ...@@ -63,8 +64,8 @@
* ---------- * ----------
*/ */
#define MIN_XFN_CHARS 16 #define MIN_XFN_CHARS 16
#define MAX_XFN_CHARS 16 #define MAX_XFN_CHARS 24
#define VALID_XFN_CHARS "0123456789ABCDEF" #define VALID_XFN_CHARS "0123456789ABCDEF.history"
#define NUM_ARCHIVE_RETRIES 3 #define NUM_ARCHIVE_RETRIES 3
...@@ -73,8 +74,6 @@ ...@@ -73,8 +74,6 @@
* Local data * Local data
* ---------- * ----------
*/ */
static char XLogDir[MAXPGPATH];
static char XLogArchiveStatusDir[MAXPGPATH];
static time_t last_pgarch_start_time; static time_t last_pgarch_start_time;
/* /*
...@@ -265,9 +264,8 @@ PgArchiverMain(int argc, char *argv[]) ...@@ -265,9 +264,8 @@ PgArchiverMain(int argc, char *argv[])
init_ps_display("archiver process", "", ""); init_ps_display("archiver process", "", "");
set_ps_display(""); set_ps_display("");
/* Init XLOG file paths */ /* Init XLOG file paths --- needed in EXEC_BACKEND case */
snprintf(XLogDir, MAXPGPATH, "%s/pg_xlog", DataDir); XLOGPathInit();
snprintf(XLogArchiveStatusDir, MAXPGPATH, "%s/archive_status", XLogDir);
pgarch_MainLoop(); pgarch_MainLoop();
...@@ -497,6 +495,12 @@ pgarch_archiveXlog(char *xlog) ...@@ -497,6 +495,12 @@ pgarch_archiveXlog(char *xlog)
* 1) to maintain the sequential chain of xlogs required for recovery * 1) to maintain the sequential chain of xlogs required for recovery
* 2) because the oldest ones will sooner become candidates for * 2) because the oldest ones will sooner become candidates for
* recycling at time of checkpoint * recycling at time of checkpoint
*
* NOTE: the "oldest" comparison will presently consider all segments of
* a timeline with a smaller ID to be older than all segments of a timeline
* with a larger ID; the net result being that past timelines are given
* higher priority for archiving. This seems okay, or at least not
* obviously worth changing.
*/ */
static bool static bool
pgarch_readyXlog(char *xlog) pgarch_readyXlog(char *xlog)
...@@ -507,11 +511,13 @@ pgarch_readyXlog(char *xlog) ...@@ -507,11 +511,13 @@ pgarch_readyXlog(char *xlog)
* It is possible to optimise this code, though only a single * It is possible to optimise this code, though only a single
* file is expected on the vast majority of calls, so.... * file is expected on the vast majority of calls, so....
*/ */
char XLogArchiveStatusDir[MAXPGPATH];
char newxlog[MAX_XFN_CHARS + 6 + 1]; char newxlog[MAX_XFN_CHARS + 6 + 1];
DIR *rldir; DIR *rldir;
struct dirent *rlde; struct dirent *rlde;
bool found = false; bool found = false;
snprintf(XLogArchiveStatusDir, MAXPGPATH, "%s/archive_status", XLogDir);
rldir = AllocateDir(XLogArchiveStatusDir); rldir = AllocateDir(XLogArchiveStatusDir);
if (rldir == NULL) if (rldir == NULL)
ereport(ERROR, ereport(ERROR,
...@@ -575,14 +581,12 @@ pgarch_archiveDone(char *xlog) ...@@ -575,14 +581,12 @@ pgarch_archiveDone(char *xlog)
{ {
char rlogready[MAXPGPATH]; char rlogready[MAXPGPATH];
char rlogdone[MAXPGPATH]; char rlogdone[MAXPGPATH];
int rc;
snprintf(rlogready, MAXPGPATH, "%s/%s.ready", XLogArchiveStatusDir, xlog); StatusFilePath(rlogready, xlog, ".ready");
snprintf(rlogdone, MAXPGPATH, "%s/%s.done", XLogArchiveStatusDir, xlog); StatusFilePath(rlogdone, xlog, ".done");
rc = rename(rlogready, rlogdone); if (rename(rlogready, rlogdone) < 0)
if (rc < 0)
ereport(WARNING, ereport(WARNING,
(errcode_for_file_access(), (errcode_for_file_access(),
errmsg("could not rename \"%s\": %m", errmsg("could not rename \"%s\" to \"%s\": %m",
rlogready))); rlogready, rlogdone)));
} }
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.28 2004/06/05 19:48:08 tgl Exp $ * $PostgreSQL: pgsql/src/backend/storage/lmgr/deadlock.c,v 1.29 2004/07/21 22:31:22 tgl Exp $
* *
* Interface: * Interface:
* *
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
*/ */
#include "postgres.h" #include "postgres.h"
#include "catalog/pg_class.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "storage/proc.h" #include "storage/proc.h"
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* copyright (c) Oliver Elphick <olly@lfix.co.uk>, 2001; * copyright (c) Oliver Elphick <olly@lfix.co.uk>, 2001;
* licence: BSD * licence: BSD
* *
* $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.17 2004/06/03 00:07:36 momjian Exp $ * $PostgreSQL: pgsql/src/bin/pg_controldata/pg_controldata.c,v 1.18 2004/07/21 22:31:23 tgl Exp $
*/ */
#include "postgres.h" #include "postgres.h"
...@@ -165,7 +165,7 @@ main(int argc, char *argv[]) ...@@ -165,7 +165,7 @@ main(int argc, char *argv[])
ControlFile.checkPointCopy.redo.xlogid, ControlFile.checkPointCopy.redo.xrecoff); ControlFile.checkPointCopy.redo.xlogid, ControlFile.checkPointCopy.redo.xrecoff);
printf(_("Latest checkpoint's UNDO location: %X/%X\n"), printf(_("Latest checkpoint's UNDO location: %X/%X\n"),
ControlFile.checkPointCopy.undo.xlogid, ControlFile.checkPointCopy.undo.xrecoff); ControlFile.checkPointCopy.undo.xlogid, ControlFile.checkPointCopy.undo.xrecoff);
printf(_("Latest checkpoint's StartUpID: %u\n"), ControlFile.checkPointCopy.ThisStartUpID); printf(_("Latest checkpoint's TimeLineID: %u\n"), ControlFile.checkPointCopy.ThisTimeLineID);
printf(_("Latest checkpoint's NextXID: %u\n"), ControlFile.checkPointCopy.nextXid); printf(_("Latest checkpoint's NextXID: %u\n"), ControlFile.checkPointCopy.nextXid);
printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid);
printf(_("Time of latest checkpoint: %s\n"), ckpttime_str); printf(_("Time of latest checkpoint: %s\n"), ckpttime_str);
......
...@@ -23,22 +23,22 @@ ...@@ -23,22 +23,22 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.20 2004/06/03 00:07:37 momjian Exp $ * $PostgreSQL: pgsql/src/bin/pg_resetxlog/pg_resetxlog.c,v 1.21 2004/07/21 22:31:24 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#include "postgres.h" #include "postgres.h"
#include <errno.h>
#include <unistd.h>
#include <time.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <fcntl.h>
#include <dirent.h> #include <dirent.h>
#include <fcntl.h>
#include <locale.h> #include <locale.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <time.h>
#include <unistd.h>
#include "access/xlog.h" #include "access/xlog.h"
#include "access/xlog_internal.h"
#include "catalog/catversion.h" #include "catalog/catversion.h"
#include "catalog/pg_control.h" #include "catalog/pg_control.h"
...@@ -48,27 +48,7 @@ extern char *optarg; ...@@ -48,27 +48,7 @@ extern char *optarg;
#define _(x) gettext((x)) #define _(x) gettext((x))
/******************** stuff copied from xlog.c ********************/ char XLogDir[MAXPGPATH]; /* not static, see xlog_internal.h */
/* Increment an xlogid/segment pair */
#define NextLogSeg(logId, logSeg) \
do { \
if ((logSeg) >= XLogSegsPerFile-1) \
{ \
(logId)++; \
(logSeg) = 0; \
} \
else \
(logSeg)++; \
} while (0)
#define XLogFileName(path, log, seg) \
snprintf(path, MAXPGPATH, "%s/%08X%08X", \
XLogDir, log, seg)
/******************** end of stuff copied from xlog.c ********************/
static char XLogDir[MAXPGPATH];
static char ControlFilePath[MAXPGPATH]; static char ControlFilePath[MAXPGPATH];
static ControlFileData ControlFile; /* pg_control values */ static ControlFileData ControlFile; /* pg_control values */
...@@ -388,9 +368,9 @@ GuessControlValues(void) ...@@ -388,9 +368,9 @@ GuessControlValues(void)
ControlFile.system_identifier = sysidentifier; ControlFile.system_identifier = sysidentifier;
ControlFile.checkPointCopy.redo.xlogid = 0; ControlFile.checkPointCopy.redo.xlogid = 0;
ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogPHD + SizeOfXLogRecord + SizeOfXLogFHD; ControlFile.checkPointCopy.redo.xrecoff = SizeOfXLogLongPHD;
ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo; ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
ControlFile.checkPointCopy.ThisStartUpID = 0; ControlFile.checkPointCopy.ThisTimeLineID = 1;
ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */ ControlFile.checkPointCopy.nextXid = (TransactionId) 514; /* XXX */
ControlFile.checkPointCopy.nextOid = BootstrapObjectIdData; ControlFile.checkPointCopy.nextOid = BootstrapObjectIdData;
ControlFile.checkPointCopy.time = time(NULL); ControlFile.checkPointCopy.time = time(NULL);
...@@ -430,7 +410,7 @@ GuessControlValues(void) ...@@ -430,7 +410,7 @@ GuessControlValues(void)
/* /*
* XXX eventually, should try to grovel through old XLOG to develop * XXX eventually, should try to grovel through old XLOG to develop
* more accurate values for startupid, nextXID, and nextOID. * more accurate values for TimeLineID, nextXID, and nextOID.
*/ */
} }
...@@ -463,7 +443,7 @@ PrintControlValues(bool guessed) ...@@ -463,7 +443,7 @@ PrintControlValues(bool guessed)
printf(_("Database system identifier: %s\n"), sysident_str); printf(_("Database system identifier: %s\n"), sysident_str);
printf(_("Current log file ID: %u\n"), ControlFile.logId); printf(_("Current log file ID: %u\n"), ControlFile.logId);
printf(_("Next log file segment: %u\n"), ControlFile.logSeg); printf(_("Next log file segment: %u\n"), ControlFile.logSeg);
printf(_("Latest checkpoint's StartUpID: %u\n"), ControlFile.checkPointCopy.ThisStartUpID); printf(_("Latest checkpoint's TimeLineID: %u\n"), ControlFile.checkPointCopy.ThisTimeLineID);
printf(_("Latest checkpoint's NextXID: %u\n"), ControlFile.checkPointCopy.nextXid); printf(_("Latest checkpoint's NextXID: %u\n"), ControlFile.checkPointCopy.nextXid);
printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid); printf(_("Latest checkpoint's NextOID: %u\n"), ControlFile.checkPointCopy.nextOid);
printf(_("Database block size: %u\n"), ControlFile.blcksz); printf(_("Database block size: %u\n"), ControlFile.blcksz);
...@@ -506,7 +486,7 @@ RewriteControlFile(void) ...@@ -506,7 +486,7 @@ RewriteControlFile(void)
ControlFile.checkPointCopy.redo.xlogid = newXlogId; ControlFile.checkPointCopy.redo.xlogid = newXlogId;
ControlFile.checkPointCopy.redo.xrecoff = ControlFile.checkPointCopy.redo.xrecoff =
newXlogSeg * XLogSegSize + SizeOfXLogPHD + SizeOfXLogRecord + SizeOfXLogFHD; newXlogSeg * XLogSegSize + SizeOfXLogLongPHD;
ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo; ControlFile.checkPointCopy.undo = ControlFile.checkPointCopy.redo;
ControlFile.checkPointCopy.time = time(NULL); ControlFile.checkPointCopy.time = time(NULL);
...@@ -634,8 +614,8 @@ WriteEmptyXLOG(void) ...@@ -634,8 +614,8 @@ WriteEmptyXLOG(void)
{ {
char *buffer; char *buffer;
XLogPageHeader page; XLogPageHeader page;
XLogLongPageHeader longpage;
XLogRecord *record; XLogRecord *record;
XLogFileHeaderData *fhdr;
crc64 crc; crc64 crc;
char path[MAXPGPATH]; char path[MAXPGPATH];
int fd; int fd;
...@@ -648,44 +628,23 @@ WriteEmptyXLOG(void) ...@@ -648,44 +628,23 @@ WriteEmptyXLOG(void)
/* Set up the XLOG page header */ /* Set up the XLOG page header */
page->xlp_magic = XLOG_PAGE_MAGIC; page->xlp_magic = XLOG_PAGE_MAGIC;
page->xlp_info = 0; page->xlp_info = XLP_LONG_HEADER;
page->xlp_sui = ControlFile.checkPointCopy.ThisStartUpID; page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID;
page->xlp_pageaddr.xlogid = page->xlp_pageaddr.xlogid =
ControlFile.checkPointCopy.redo.xlogid; ControlFile.checkPointCopy.redo.xlogid;
page->xlp_pageaddr.xrecoff = page->xlp_pageaddr.xrecoff =
ControlFile.checkPointCopy.redo.xrecoff - ControlFile.checkPointCopy.redo.xrecoff - SizeOfXLogLongPHD;
(SizeOfXLogPHD + SizeOfXLogRecord + SizeOfXLogFHD); longpage = (XLogLongPageHeader) page;
longpage->xlp_sysid = ControlFile.system_identifier;
longpage->xlp_seg_size = XLogSegSize;
/* Insert the file header record */ /* Insert the initial checkpoint record */
record = (XLogRecord *) ((char *) page + SizeOfXLogPHD); record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD);
record->xl_prev.xlogid = 0; record->xl_prev.xlogid = 0;
record->xl_prev.xrecoff = 0; record->xl_prev.xrecoff = 0;
record->xl_xact_prev.xlogid = 0; record->xl_xact_prev.xlogid = 0;
record->xl_xact_prev.xrecoff = 0; record->xl_xact_prev.xrecoff = 0;
record->xl_xid = InvalidTransactionId; record->xl_xid = InvalidTransactionId;
record->xl_len = SizeOfXLogFHD;
record->xl_info = XLOG_FILE_HEADER;
record->xl_rmid = RM_XLOG_ID;
fhdr = (XLogFileHeaderData *) XLogRecGetData(record);
fhdr->xlfhd_sysid = ControlFile.system_identifier;
fhdr->xlfhd_xlogid = page->xlp_pageaddr.xlogid;
fhdr->xlfhd_segno = page->xlp_pageaddr.xrecoff / XLogSegSize;
fhdr->xlfhd_seg_size = XLogSegSize;
INIT_CRC64(crc);
COMP_CRC64(crc, fhdr, SizeOfXLogFHD);
COMP_CRC64(crc, (char *) record + sizeof(crc64),
SizeOfXLogRecord - sizeof(crc64));
FIN_CRC64(crc);
record->xl_crc = crc;
/* Insert the initial checkpoint record */
record = (XLogRecord *) ((char *) page + SizeOfXLogPHD + SizeOfXLogRecord + SizeOfXLogFHD);
record->xl_prev.xlogid = page->xlp_pageaddr.xlogid;
record->xl_prev.xrecoff = page->xlp_pageaddr.xrecoff + SizeOfXLogPHD;
record->xl_xact_prev.xlogid = 0;
record->xl_xact_prev.xrecoff = 0;
record->xl_xid = InvalidTransactionId;
record->xl_len = sizeof(CheckPoint); record->xl_len = sizeof(CheckPoint);
record->xl_info = XLOG_CHECKPOINT_SHUTDOWN; record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
record->xl_rmid = RM_XLOG_ID; record->xl_rmid = RM_XLOG_ID;
...@@ -700,7 +659,8 @@ WriteEmptyXLOG(void) ...@@ -700,7 +659,8 @@ WriteEmptyXLOG(void)
record->xl_crc = crc; record->xl_crc = crc;
/* Write the first page */ /* Write the first page */
XLogFileName(path, newXlogId, newXlogSeg); XLogFilePath(path, ControlFile.checkPointCopy.ThisTimeLineID,
newXlogId, newXlogSeg);
unlink(path); unlink(path);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/xact.h,v 1.65 2004/07/17 03:30:38 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/xact.h,v 1.66 2004/07/21 22:31:25 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#define XACT_H #define XACT_H
#include "access/xlog.h" #include "access/xlog.h"
#include "storage/relfilenode.h"
#include "utils/nabstime.h" #include "utils/nabstime.h"
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.53 2004/07/19 02:47:13 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.54 2004/07/21 22:31:25 tgl Exp $
*/ */
#ifndef XLOG_H #ifndef XLOG_H
#define XLOG_H #define XLOG_H
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
#include "access/rmgr.h" #include "access/rmgr.h"
#include "access/transam.h" #include "access/transam.h"
#include "access/xlogdefs.h" #include "access/xlogdefs.h"
#include "storage/bufmgr.h" #include "storage/buf.h"
#include "utils/pg_crc.h" #include "utils/pg_crc.h"
...@@ -76,107 +76,6 @@ typedef struct XLogRecord ...@@ -76,107 +76,6 @@ typedef struct XLogRecord
#define XLOG_NO_TRAN XLR_INFO_MASK #define XLOG_NO_TRAN XLR_INFO_MASK
/* /*
* Header info for a backup block appended to an XLOG record.
*
* Note that the backup block has its own CRC, and is not covered by
* the CRC of the XLOG record proper. Also note that we don't attempt
* to align either the BkpBlock struct or the block's data.
*/
typedef struct BkpBlock
{
crc64 crc;
RelFileNode node;
BlockNumber block;
} BkpBlock;
/*
* When there is not enough space on current page for whole record, we
* continue on the next page with continuation record. (However, the
* XLogRecord header will never be split across pages; if there's less than
* SizeOfXLogRecord space left at the end of a page, we just waste it.)
*
* Note that xl_rem_len includes backup-block data, unlike xl_len in the
* initial header.
*/
typedef struct XLogContRecord
{
uint32 xl_rem_len; /* total len of remaining data for record */
/* ACTUAL LOG DATA FOLLOWS AT END OF STRUCT */
} XLogContRecord;
#define SizeOfXLogContRecord MAXALIGN(sizeof(XLogContRecord))
/*
* Each page of XLOG file has a header like this:
*/
#define XLOG_PAGE_MAGIC 0xD05B /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData
{
uint16 xlp_magic; /* magic value for correctness checks */
uint16 xlp_info; /* flag bits, see below */
StartUpID xlp_sui; /* StartUpID of first record on page */
XLogRecPtr xlp_pageaddr; /* XLOG address of this page */
} XLogPageHeaderData;
#define SizeOfXLogPHD MAXALIGN(sizeof(XLogPageHeaderData))
typedef XLogPageHeaderData *XLogPageHeader;
/* When record crosses page boundary, set this flag in new page's header */
#define XLP_FIRST_IS_CONTRECORD 0x0001
/* All defined flag bits in xlp_info (used for validity checking of header) */
#define XLP_ALL_FLAGS 0x0001
/*
* We break each logical log file (xlogid value) into segment files of the
* size indicated by XLOG_SEG_SIZE. One possible segment at the end of each
* log file is wasted, to ensure that we don't have problems representing
* last-byte-position-plus-1.
*/
#define XLogSegSize ((uint32) XLOG_SEG_SIZE)
#define XLogSegsPerFile (((uint32) 0xffffffff) / XLogSegSize)
#define XLogFileSize (XLogSegsPerFile * XLogSegSize)
/*
* The first XLOG record in each segment file is always an XLOG_FILE_HEADER
* record. This record does nothing as far as XLOG replay is concerned,
* but it is useful for verifying that we haven't mixed up XLOG segment files.
* The body of an XLOG_FILE_HEADER record is a struct XLogFileHeaderData.
* Note: the xlogid/segno fields are really redundant with xlp_pageaddr in
* the page header, but we store them anyway as an extra check.
*/
typedef struct XLogFileHeaderData
{
uint64 xlfhd_sysid; /* system identifier from pg_control */
uint32 xlfhd_xlogid; /* logical log file # */
uint32 xlfhd_segno; /* segment number within logical log file */
uint32 xlfhd_seg_size; /* just as a cross-check */
} XLogFileHeaderData;
#define SizeOfXLogFHD MAXALIGN(sizeof(XLogFileHeaderData))
/*
* Method table for resource managers.
*
* RmgrTable[] is indexed by RmgrId values (see rmgr.h).
*/
typedef struct RmgrData
{
const char *rm_name;
void (*rm_redo) (XLogRecPtr lsn, XLogRecord *rptr);
void (*rm_undo) (XLogRecPtr lsn, XLogRecord *rptr);
void (*rm_desc) (char *buf, uint8 xl_info, char *rec);
void (*rm_startup) (void);
void (*rm_cleanup) (void);
} RmgrData;
extern RmgrData RmgrTable[];
/*--------------------
* List of these structs is used to pass data to XLogInsert(). * List of these structs is used to pass data to XLogInsert().
* *
* If buffer is valid then XLOG will check if buffer must be backed up * If buffer is valid then XLOG will check if buffer must be backed up
...@@ -188,7 +87,6 @@ extern RmgrData RmgrTable[]; ...@@ -188,7 +87,6 @@ extern RmgrData RmgrTable[];
* the XLOG record, since we assume it's present in the buffer. Therefore, * the XLOG record, since we assume it's present in the buffer. Therefore,
* rmgr redo routines MUST pay attention to XLR_BKP_BLOCK_X to know what * rmgr redo routines MUST pay attention to XLR_BKP_BLOCK_X to know what
* is actually stored in the XLOG record. * is actually stored in the XLOG record.
*--------------------
*/ */
typedef struct XLogRecData typedef struct XLogRecData
{ {
...@@ -198,7 +96,7 @@ typedef struct XLogRecData ...@@ -198,7 +96,7 @@ typedef struct XLogRecData
struct XLogRecData *next; struct XLogRecData *next;
} XLogRecData; } XLogRecData;
extern StartUpID ThisStartUpID; /* current SUI */ extern TimeLineID ThisTimeLineID; /* current TLI */
extern bool InRecovery; extern bool InRecovery;
extern XLogRecPtr MyLastRecPtr; extern XLogRecPtr MyLastRecPtr;
extern bool MyXactMadeXLogEntry; extern bool MyXactMadeXLogEntry;
......
/*
* xlog_internal.h
*
* PostgreSQL transaction log internal declarations
*
* NOTE: this file is intended to contain declarations useful for
* manipulating the XLOG files directly, but it is not supposed to be
* needed by rmgr routines (redo/undo support for individual record types).
* So the XLogRecord typedef and associated stuff appear in xlog.h.
*
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/access/xlog_internal.h,v 1.1 2004/07/21 22:31:25 tgl Exp $
*/
#ifndef XLOG_INTERNAL_H
#define XLOG_INTERNAL_H
#include "access/xlog.h"
#include "storage/block.h"
#include "storage/relfilenode.h"
/*
* Header info for a backup block appended to an XLOG record.
*
* Note that the backup block has its own CRC, and is not covered by
* the CRC of the XLOG record proper. Also note that we don't attempt
* to align either the BkpBlock struct or the block's data.
*/
typedef struct BkpBlock
{
crc64 crc;
RelFileNode node;
BlockNumber block;
} BkpBlock;
/*
* When there is not enough space on current page for whole record, we
* continue on the next page with continuation record. (However, the
* XLogRecord header will never be split across pages; if there's less than
* SizeOfXLogRecord space left at the end of a page, we just waste it.)
*
* Note that xl_rem_len includes backup-block data, unlike xl_len in the
* initial header.
*/
typedef struct XLogContRecord
{
uint32 xl_rem_len; /* total len of remaining data for record */
/* ACTUAL LOG DATA FOLLOWS AT END OF STRUCT */
} XLogContRecord;
#define SizeOfXLogContRecord MAXALIGN(sizeof(XLogContRecord))
/*
* Each page of XLOG file has a header like this:
*/
#define XLOG_PAGE_MAGIC 0xD05B /* can be used as WAL version indicator */
typedef struct XLogPageHeaderData
{
uint16 xlp_magic; /* magic value for correctness checks */
uint16 xlp_info; /* flag bits, see below */
TimeLineID xlp_tli; /* TimeLineID of first record on page */
XLogRecPtr xlp_pageaddr; /* XLOG address of this page */
} XLogPageHeaderData;
#define SizeOfXLogShortPHD MAXALIGN(sizeof(XLogPageHeaderData))
typedef XLogPageHeaderData *XLogPageHeader;
/*
* When the XLP_LONG_HEADER flag is set, we store additional fields in the
* page header. (This is ordinarily done just in the first page of an
* XLOG file.) The additional fields serve to identify the file accurately.
*/
typedef struct XLogLongPageHeaderData
{
XLogPageHeaderData std; /* standard header fields */
uint64 xlp_sysid; /* system identifier from pg_control */
uint32 xlp_seg_size; /* just as a cross-check */
} XLogLongPageHeaderData;
#define SizeOfXLogLongPHD MAXALIGN(sizeof(XLogLongPageHeaderData))
typedef XLogLongPageHeaderData *XLogLongPageHeader;
/* When record crosses page boundary, set this flag in new page's header */
#define XLP_FIRST_IS_CONTRECORD 0x0001
/* This flag indicates a "long" page header */
#define XLP_LONG_HEADER 0x0002
/* All defined flag bits in xlp_info (used for validity checking of header) */
#define XLP_ALL_FLAGS 0x0003
#define XLogPageHeaderSize(hdr) \
(((hdr)->xlp_info & XLP_LONG_HEADER) ? SizeOfXLogLongPHD : SizeOfXLogShortPHD)
/*
* We break each logical log file (xlogid value) into segment files of the
* size indicated by XLOG_SEG_SIZE. One possible segment at the end of each
* log file is wasted, to ensure that we don't have problems representing
* last-byte-position-plus-1.
*/
#define XLogSegSize ((uint32) XLOG_SEG_SIZE)
#define XLogSegsPerFile (((uint32) 0xffffffff) / XLogSegSize)
#define XLogFileSize (XLogSegsPerFile * XLogSegSize)
/*
* Macros for manipulating XLOG pointers
*/
/* Increment an xlogid/segment pair */
#define NextLogSeg(logId, logSeg) \
do { \
if ((logSeg) >= XLogSegsPerFile-1) \
{ \
(logId)++; \
(logSeg) = 0; \
} \
else \
(logSeg)++; \
} while (0)
/* Decrement an xlogid/segment pair (assume it's not 0,0) */
#define PrevLogSeg(logId, logSeg) \
do { \
if (logSeg) \
(logSeg)--; \
else \
{ \
(logId)--; \
(logSeg) = XLogSegsPerFile-1; \
} \
} while (0)
/*
* Compute ID and segment from an XLogRecPtr.
*
* For XLByteToSeg, do the computation at face value. For XLByteToPrevSeg,
* a boundary byte is taken to be in the previous segment. This is suitable
* for deciding which segment to write given a pointer to a record end,
* for example. (We can assume xrecoff is not zero, since no valid recptr
* can have that.)
*/
#define XLByteToSeg(xlrp, logId, logSeg) \
( logId = (xlrp).xlogid, \
logSeg = (xlrp).xrecoff / XLogSegSize \
)
#define XLByteToPrevSeg(xlrp, logId, logSeg) \
( logId = (xlrp).xlogid, \
logSeg = ((xlrp).xrecoff - 1) / XLogSegSize \
)
/*
* Is an XLogRecPtr within a particular XLOG segment?
*
* For XLByteInSeg, do the computation at face value. For XLByteInPrevSeg,
* a boundary byte is taken to be in the previous segment.
*/
#define XLByteInSeg(xlrp, logId, logSeg) \
((xlrp).xlogid == (logId) && \
(xlrp).xrecoff / XLogSegSize == (logSeg))
#define XLByteInPrevSeg(xlrp, logId, logSeg) \
((xlrp).xlogid == (logId) && \
((xlrp).xrecoff - 1) / XLogSegSize == (logSeg))
/* Check if an xrecoff value is in a plausible range */
#define XRecOffIsValid(xrecoff) \
((xrecoff) % BLCKSZ >= SizeOfXLogShortPHD && \
(BLCKSZ - (xrecoff) % BLCKSZ) >= SizeOfXLogRecord)
/*
* These macros encapsulate knowledge about the exact layout of XLog file
* names, timeline history file names, and archive-status file names.
*/
#define MAXFNAMELEN 32
#define XLogFileName(fname, tli, log, seg) \
snprintf(fname, MAXFNAMELEN, "%08X%08X%08X", tli, log, seg)
#define XLogFilePath(path, tli, log, seg) \
snprintf(path, MAXPGPATH, "%s/%08X%08X%08X", XLogDir, tli, log, seg)
#define TLHistoryFileName(fname, tli) \
snprintf(fname, MAXFNAMELEN, "%08X.history", tli)
#define TLHistoryFilePath(path, tli) \
snprintf(path, MAXPGPATH, "%s/%08X.history", XLogDir, tli)
#define StatusFilePath(path, xlog, suffix) \
snprintf(path, MAXPGPATH, "%s/archive_status/%s%s", XLogDir, xlog, suffix)
extern char XLogDir[MAXPGPATH];
/*
* _INTL_MAXLOGRECSZ: max space needed for a record including header and
* any backup-block data.
*/
#define _INTL_MAXLOGRECSZ (SizeOfXLogRecord + MAXLOGRECSZ + \
XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
/*
* Method table for resource managers.
*
* RmgrTable[] is indexed by RmgrId values (see rmgr.h).
*/
typedef struct RmgrData
{
const char *rm_name;
void (*rm_redo) (XLogRecPtr lsn, XLogRecord *rptr);
void (*rm_undo) (XLogRecPtr lsn, XLogRecord *rptr);
void (*rm_desc) (char *buf, uint8 xl_info, char *rec);
void (*rm_startup) (void);
void (*rm_cleanup) (void);
} RmgrData;
extern const RmgrData RmgrTable[];
#endif /* XLOG_INTERNAL_H */
...@@ -2,12 +2,12 @@ ...@@ -2,12 +2,12 @@
* xlogdefs.h * xlogdefs.h
* *
* Postgres transaction log manager record pointer and * Postgres transaction log manager record pointer and
* system startup number definitions * timeline number definitions
* *
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/xlogdefs.h,v 1.11 2003/12/20 17:31:21 momjian Exp $ * $PostgreSQL: pgsql/src/include/access/xlogdefs.h,v 1.12 2004/07/21 22:31:25 tgl Exp $
*/ */
#ifndef XLOG_DEFS_H #ifndef XLOG_DEFS_H
#define XLOG_DEFS_H #define XLOG_DEFS_H
...@@ -33,12 +33,6 @@ typedef struct XLogRecPtr ...@@ -33,12 +33,6 @@ typedef struct XLogRecPtr
uint32 xrecoff; /* byte offset of location in log file */ uint32 xrecoff; /* byte offset of location in log file */
} XLogRecPtr; } XLogRecPtr;
typedef struct XLogwrtResult
{
XLogRecPtr Write; /* last byte + 1 written out */
XLogRecPtr Flush; /* last byte + 1 flushed */
} XLogwrtResult;
/* /*
* Macros for comparing XLogRecPtrs * Macros for comparing XLogRecPtrs
...@@ -57,10 +51,16 @@ typedef struct XLogwrtResult ...@@ -57,10 +51,16 @@ typedef struct XLogwrtResult
#define XLByteEQ(a, b) \ #define XLByteEQ(a, b) \
((a).xlogid == (b).xlogid && (a).xrecoff == (b).xrecoff) ((a).xlogid == (b).xlogid && (a).xrecoff == (b).xrecoff)
/* /*
* StartUpID (SUI) - system startups counter. It's to allow removing * TimeLineID (TLI) - identifies different database histories to prevent
* pg_clog after shutdown, in future. * confusion after restoring a prior state of a database installation.
* TLI does not change in a normal stop/restart of the database (including
* crash-and-recover cases); but we must assign a new TLI after doing
* a recovery to a prior state, a/k/a point-in-time recovery. This makes
* the new WAL logfile sequence we generate distinguishable from the
* sequence that was generated in the previous incarnation.
*/ */
typedef uint32 StartUpID; typedef uint32 TimeLineID;
#endif /* XLOG_DEFS_H */ #endif /* XLOG_DEFS_H */
...@@ -6,19 +6,15 @@ ...@@ -6,19 +6,15 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/access/xlogutils.h,v 1.14 2004/02/11 22:55:25 tgl Exp $ * $PostgreSQL: pgsql/src/include/access/xlogutils.h,v 1.15 2004/07/21 22:31:25 tgl Exp $
*/ */
#ifndef XLOG_UTILS_H #ifndef XLOG_UTILS_H
#define XLOG_UTILS_H #define XLOG_UTILS_H
#include "access/rmgr.h" #include "access/rmgr.h"
#include "storage/buf.h" #include "storage/buf.h"
#include "storage/itemptr.h"
#include "utils/rel.h" #include "utils/rel.h"
extern int XLogIsOwnerOfTuple(RelFileNode hnode, ItemPointer iptr,
TransactionId xid, CommandId cid);
extern bool XLogIsValidTuple(RelFileNode hnode, ItemPointer iptr);
extern void XLogInitRelationCache(void); extern void XLogInitRelationCache(void);
extern void XLogCloseRelationCache(void); extern void XLogCloseRelationCache(void);
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.15 2004/06/03 02:08:05 tgl Exp $ * $PostgreSQL: pgsql/src/include/catalog/pg_control.h,v 1.16 2004/07/21 22:31:25 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
/* Version identifier for this pg_control format */ /* Version identifier for this pg_control format */
#define PG_CONTROL_VERSION 73 #define PG_CONTROL_VERSION 74
/* /*
* Body of CheckPoint XLOG records. This is declared here because we keep * Body of CheckPoint XLOG records. This is declared here because we keep
...@@ -30,13 +30,13 @@ ...@@ -30,13 +30,13 @@
*/ */
typedef struct CheckPoint typedef struct CheckPoint
{ {
XLogRecPtr redo; /* next RecPtr available when we */ XLogRecPtr redo; /* next RecPtr available when we
/* began to create CheckPoint */ * began to create CheckPoint
/* (i.e. REDO start point) */ * (i.e. REDO start point) */
XLogRecPtr undo; /* first record of oldest in-progress */ XLogRecPtr undo; /* first record of oldest in-progress
/* transaction when we started */ * transaction when we started
/* (i.e. UNDO end point) */ * (i.e. UNDO end point) */
StartUpID ThisStartUpID; /* current SUI */ TimeLineID ThisTimeLineID; /* current TLI */
TransactionId nextXid; /* next free XID */ TransactionId nextXid; /* next free XID */
Oid nextOid; /* next free OID */ Oid nextOid; /* next free OID */
time_t time; /* time stamp of checkpoint */ time_t time; /* time stamp of checkpoint */
...@@ -46,8 +46,6 @@ typedef struct CheckPoint ...@@ -46,8 +46,6 @@ typedef struct CheckPoint
#define XLOG_CHECKPOINT_SHUTDOWN 0x00 #define XLOG_CHECKPOINT_SHUTDOWN 0x00
#define XLOG_CHECKPOINT_ONLINE 0x10 #define XLOG_CHECKPOINT_ONLINE 0x10
#define XLOG_NEXTOID 0x30 #define XLOG_NEXTOID 0x30
#define XLOG_FILE_HEADER 0x40
#define XLOG_WASTED_SPACE 0x50
/* System status indicator */ /* System status indicator */
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.59 2004/07/01 00:51:43 tgl Exp $ * $PostgreSQL: pgsql/src/include/storage/bufpage.h,v 1.60 2004/07/21 22:31:26 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -87,13 +87,22 @@ typedef uint16 LocationIndex; ...@@ -87,13 +87,22 @@ typedef uint16 LocationIndex;
/* /*
* disk page organization * disk page organization
*
* space management information generic to any page * space management information generic to any page
* *
* pd_lsn - identifies xlog record for last change to this page.
* pd_tli - ditto.
* pd_lower - offset to start of free space. * pd_lower - offset to start of free space.
* pd_upper - offset to end of free space. * pd_upper - offset to end of free space.
* pd_special - offset to start of special space. * pd_special - offset to start of special space.
* pd_pagesize_version - size in bytes and page layout version number. * pd_pagesize_version - size in bytes and page layout version number.
* *
* The LSN is used by the buffer manager to enforce the basic rule of WAL:
* "thou shalt write xlog before data". A dirty buffer cannot be dumped
* to disk until xlog has been flushed at least as far as the page's LSN.
* We also store the TLI for identification purposes (it is not clear that
* this is actually necessary, but it seems like a good idea).
*
* The page version number and page size are packed together into a single * The page version number and page size are packed together into a single
* uint16 field. This is for historical reasons: before PostgreSQL 7.3, * uint16 field. This is for historical reasons: before PostgreSQL 7.3,
* there was no concept of a page version number, and doing it this way * there was no concept of a page version number, and doing it this way
...@@ -109,13 +118,10 @@ typedef uint16 LocationIndex; ...@@ -109,13 +118,10 @@ typedef uint16 LocationIndex;
*/ */
typedef struct PageHeaderData typedef struct PageHeaderData
{ {
/* XXX LSN is member of *any* block, not */ /* XXX LSN is member of *any* block, not only page-organized ones */
/* only page-organized - 'll change later */ XLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog
XLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog */ * record for last change to this page */
/* record for last change of this page */ TimeLineID pd_tli; /* TLI of last change */
StartUpID pd_sui; /* SUI of last changes (currently it's */
/* used by heap AM only) */
LocationIndex pd_lower; /* offset to start of free space */ LocationIndex pd_lower; /* offset to start of free space */
LocationIndex pd_upper; /* offset to end of free space */ LocationIndex pd_upper; /* offset to end of free space */
LocationIndex pd_special; /* offset to start of special space */ LocationIndex pd_special; /* offset to start of special space */
...@@ -298,10 +304,10 @@ typedef PageHeaderData *PageHeader; ...@@ -298,10 +304,10 @@ typedef PageHeaderData *PageHeader;
#define PageSetLSN(page, lsn) \ #define PageSetLSN(page, lsn) \
(((PageHeader) (page))->pd_lsn = (lsn)) (((PageHeader) (page))->pd_lsn = (lsn))
#define PageGetSUI(page) \ #define PageGetTLI(page) \
(((PageHeader) (page))->pd_sui) (((PageHeader) (page))->pd_tli)
#define PageSetSUI(page, sui) \ #define PageSetTLI(page, tli) \
(((PageHeader) (page))->pd_sui = (StartUpID) (sui)) (((PageHeader) (page))->pd_tli = (tli))
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
* extern declarations * extern declarations
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment