Commit 1fde38be authored by Magnus Hagander's avatar Magnus Hagander

Allow on-line enabling and disabling of data checksums

This makes it possible to turn checksums on in a live cluster, without
the previous need for dump/reload or logical replication (and to turn it
off).

Enabling checkusm starts a background process in the form of a
launcher/worker combination that goes through the entire database and
recalculates checksums on each and every page. Only when all pages have
been checksummed are they fully enabled in the cluster. Any failure of
the process will revert to checksums off and the process has to be
started.

This adds a new WAL record that indicates the state of checksums, so
the process works across replicated clusters.

Authors: Magnus Hagander and Daniel Gustafsson
Review: Tomas Vondra, Michael Banck, Heikki Linnakangas, Andrey Borodin
parent c39e903d
...@@ -19540,6 +19540,71 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup()); ...@@ -19540,6 +19540,71 @@ postgres=# SELECT * FROM pg_walfile_name_offset(pg_stop_backup());
</sect2> </sect2>
<sect2 id="functions-admin-checksum">
<title>Data Checksum Functions</title>
<para>
The functions shown in <xref linkend="functions-checksums-table" /> can
be used to enable or disable data checksums in a running cluster.
See <xref linkend="checksums" /> for details.
</para>
<table id="functions-checksums-table">
<title>Checksum <acronym>SQL</acronym> Functions</title>
<tgroup cols="3">
<thead>
<row>
<entry>Function</entry>
<entry>Return Type</entry>
<entry>Description</entry>
</row>
</thead>
<tbody>
<row>
<entry>
<indexterm>
<primary>pg_enable_data_checksums</primary>
</indexterm>
<literal><function>pg_enable_data_checksums(<optional><parameter>cost_delay</parameter> <type>int</type>, <parameter>cost_limit</parameter> <type>int</type></optional>)</function></literal>
</entry>
<entry>
void
</entry>
<entry>
<para>
Initiates data checksums for the cluster. This will switch the data checksums mode
to <literal>in progress</literal> and start a background worker that will process
all data in the database and enable checksums for it. When all data pages have had
checksums enabled, the cluster will automatically switch to checksums
<literal>on</literal>.
</para>
<para>
If <parameter>cost_delay</parameter> and <parameter>cost_limit</parameter> are
specified, the speed of the process is throttled using the same principles as
<link linkend="runtime-config-resource-vacuum-cost">Cost-based Vacuum Delay</link>.
</para>
</entry>
</row>
<row>
<entry>
<indexterm>
<primary>pg_disable_data_checksums</primary>
</indexterm>
<literal><function>pg_disable_data_checksums()</function></literal>
</entry>
<entry>
void
</entry>
<entry>
Disables data checksums for the cluster.
</entry>
</row>
</tbody>
</tgroup>
</table>
</sect2>
<sect2 id="functions-admin-dbobject"> <sect2 id="functions-admin-dbobject">
<title>Database Object Management Functions</title> <title>Database Object Management Functions</title>
......
...@@ -211,6 +211,7 @@ Complete list of usable sgml source files in this directory. ...@@ -211,6 +211,7 @@ Complete list of usable sgml source files in this directory.
<!ENTITY pgResetwal SYSTEM "pg_resetwal.sgml"> <!ENTITY pgResetwal SYSTEM "pg_resetwal.sgml">
<!ENTITY pgRestore SYSTEM "pg_restore.sgml"> <!ENTITY pgRestore SYSTEM "pg_restore.sgml">
<!ENTITY pgRewind SYSTEM "pg_rewind.sgml"> <!ENTITY pgRewind SYSTEM "pg_rewind.sgml">
<!ENTITY pgVerifyChecksums SYSTEM "pg_verify_checksums.sgml">
<!ENTITY pgtestfsync SYSTEM "pgtestfsync.sgml"> <!ENTITY pgtestfsync SYSTEM "pgtestfsync.sgml">
<!ENTITY pgtesttiming SYSTEM "pgtesttiming.sgml"> <!ENTITY pgtesttiming SYSTEM "pgtesttiming.sgml">
<!ENTITY pgupgrade SYSTEM "pgupgrade.sgml"> <!ENTITY pgupgrade SYSTEM "pgupgrade.sgml">
......
...@@ -195,9 +195,9 @@ PostgreSQL documentation ...@@ -195,9 +195,9 @@ PostgreSQL documentation
<para> <para>
Use checksums on data pages to help detect corruption by the Use checksums on data pages to help detect corruption by the
I/O system that would otherwise be silent. Enabling checksums I/O system that would otherwise be silent. Enabling checksums
may incur a noticeable performance penalty. This option can only may incur a noticeable performance penalty. If set, checksums
be set during initialization, and cannot be changed later. If are calculated for all objects, in all databases. See
set, checksums are calculated for all objects, in all databases. <xref linkend="checksums" /> for details.
</para> </para>
</listitem> </listitem>
</varlistentry> </varlistentry>
......
<!--
doc/src/sgml/ref/pg_verify_checksums.sgml
PostgreSQL documentation
-->
<refentry id="pgverifychecksums">
<indexterm zone="pgverifychecksums">
<primary>pg_verify_checksums</primary>
</indexterm>
<refmeta>
<refentrytitle><application>pg_verify_checksums</application></refentrytitle>
<manvolnum>1</manvolnum>
<refmiscinfo>Application</refmiscinfo>
</refmeta>
<refnamediv>
<refname>pg_verify_checksums</refname>
<refpurpose>verify data checksums in an offline <productname>PostgreSQL</productname> database cluster</refpurpose>
</refnamediv>
<refsynopsisdiv>
<cmdsynopsis>
<command>pg_verify_checksums</command>
<arg choice="opt"><replaceable class="parameter">option</replaceable></arg>
<arg choice="opt"><arg choice="opt"><option>-D</option></arg> <replaceable class="parameter">datadir</replaceable></arg>
</cmdsynopsis>
</refsynopsisdiv>
<refsect1 id="r1-app-pg_verify_checksums-1">
<title>Description</title>
<para>
<command>pg_verify_checksums</command> verifies data checksums in a PostgreSQL
cluster. It must be run against a cluster that's offline.
</para>
</refsect1>
<refsect1>
<title>Options</title>
<para>
The following command-line options are available:
<variablelist>
<varlistentry>
<term><option>-r <replaceable>relfilenode</replaceable></option></term>
<listitem>
<para>
Only validate checksums in the relation with specified relfilenode.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-f</option></term>
<listitem>
<para>
Force check even if checksums are disabled on cluster.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-d</option></term>
<listitem>
<para>
Enable debug output. Lists all checked blocks and their checksum.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-V</option></term>
<term><option>--version</option></term>
<listitem>
<para>
Print the <application>pg_verify_checksums</application> version and exit.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-?</option></term>
<term><option>--help</option></term>
<listitem>
<para>
Show help about <application>pg_verify_checksums</application> command line
arguments, and exit.
</para>
</listitem>
</varlistentry>
</variablelist>
</para>
</refsect1>
<refsect1>
<title>Notes</title>
<para>
Can only be run when the server is offline.
</para>
</refsect1>
<refsect1>
<title>See Also</title>
<simplelist type="inline">
<member><xref linkend="checksums"/></member>
</simplelist>
</refsect1>
</refentry>
...@@ -284,6 +284,7 @@ ...@@ -284,6 +284,7 @@
&pgtestfsync; &pgtestfsync;
&pgtesttiming; &pgtesttiming;
&pgupgrade; &pgupgrade;
&pgVerifyChecksums;
&pgwaldump; &pgwaldump;
&postgres; &postgres;
&postmaster; &postmaster;
......
...@@ -230,6 +230,87 @@ ...@@ -230,6 +230,87 @@
</para> </para>
</sect1> </sect1>
<sect1 id="checksums">
<title>Data checksums</title>
<indexterm>
<primary>checksums</primary>
</indexterm>
<para>
Data pages are not checksum protected by default, but this can optionally be enabled for a cluster.
When enabled, each data page will be assigned a checksum that is updated when the page is
written and verified every time the page is read. Only data pages are protected by checksums,
internal data structures and temporary files are not.
</para>
<para>
Checksums are normally enabled when the cluster is initialized using
<link linkend="app-initdb-data-checksums"><application>initdb</application></link>. They
can also be enabled or disabled at runtime. In all cases, checksums are enabled or disabled
at the full cluster level, and cannot be specified individually for databases or tables.
</para>
<para>
The current state of checksums in the cluster can be verified by viewing the value
of the read-only configuration variable <xref linkend="guc-data-checksums" /> by
issuing the command <command>SHOW data_checksums</command>.
</para>
<para>
When attempting to recover from corrupt data it may be necessary to bypass the checksum
protection in order to recover data. To do this, temporarily set the configuration parameter
<xref linkend="guc-ignore-checksum-failure" />.
</para>
<sect2 id="checksums-enable-disable">
<title>On-line enabling of checksums</title>
<para>
Checksums can be enabled or disabled online, by calling the appropriate
<link linkend="functions-admin-checksum">functions</link>.
Disabling of checksums takes effect immediately when the function is called.
</para>
<para>
Enabling checksums will put the cluster in <literal>inprogress</literal> mode.
During this time, checksums will be written but not verified. In addition to
this, a background worker process is started that enables checksums on all
existing data in the cluster. Once this worker has completed processing all
databases in the cluster, the checksum mode will automatically switch to
<literal>on</literal>.
</para>
<para>
The process will initially wait for all open transactions to finish before
it starts, so that it can be certain that there are no tables that have been
created inside a transaction that has not committed yet and thus would not
be visible to the process enabling checksums. It will also, for each database,
wait for all pre-existing temporary tables to get removed before it finishes.
If long-lived temporary tables are used in the application it may be necessary
to terminate these application connections to allow the process to complete.
Information about open transactions and connections with temporary tables is
written to log.
</para>
<para>
If the cluster is stopped while in <literal>inprogress</literal> mode, for
any reason, then this process must be restarted manually. To do this,
re-execute the function <function>pg_enable_data_checksums()</function>
once the cluster has been restarted. It is not possible to resume the work,
the process has to start over and re-process the cluster.
</para>
<note>
<para>
Enabling checksums can cause significant I/O to the system, as most of the
database pages will need to be rewritten, and will be written both to the
data files and the WAL.
</para>
</note>
</sect2>
</sect1>
<sect1 id="wal-intro"> <sect1 id="wal-intro">
<title>Write-Ahead Logging (<acronym>WAL</acronym>)</title> <title>Write-Ahead Logging (<acronym>WAL</acronym>)</title>
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "access/xlog.h" #include "access/xlog.h"
#include "access/xlog_internal.h" #include "access/xlog_internal.h"
#include "catalog/pg_control.h" #include "catalog/pg_control.h"
#include "storage/bufpage.h"
#include "utils/guc.h" #include "utils/guc.h"
#include "utils/timestamp.h" #include "utils/timestamp.h"
...@@ -137,6 +138,18 @@ xlog_desc(StringInfo buf, XLogReaderState *record) ...@@ -137,6 +138,18 @@ xlog_desc(StringInfo buf, XLogReaderState *record)
xlrec.ThisTimeLineID, xlrec.PrevTimeLineID, xlrec.ThisTimeLineID, xlrec.PrevTimeLineID,
timestamptz_to_str(xlrec.end_time)); timestamptz_to_str(xlrec.end_time));
} }
else if (info == XLOG_CHECKSUMS)
{
xl_checksum_state xlrec;
memcpy(&xlrec, rec, sizeof(xl_checksum_state));
if (xlrec.new_checksumtype == PG_DATA_CHECKSUM_VERSION)
appendStringInfo(buf, "on");
else if (xlrec.new_checksumtype == PG_DATA_CHECKSUM_INPROGRESS_VERSION)
appendStringInfo(buf, "inprogress");
else
appendStringInfo(buf, "off");
}
} }
const char * const char *
...@@ -182,6 +195,9 @@ xlog_identify(uint8 info) ...@@ -182,6 +195,9 @@ xlog_identify(uint8 info)
case XLOG_FPI_FOR_HINT: case XLOG_FPI_FOR_HINT:
id = "FPI_FOR_HINT"; id = "FPI_FOR_HINT";
break; break;
case XLOG_CHECKSUMS:
id = "CHECKSUMS";
break;
} }
return id; return id;
......
...@@ -856,6 +856,7 @@ static void SetLatestXTime(TimestampTz xtime); ...@@ -856,6 +856,7 @@ static void SetLatestXTime(TimestampTz xtime);
static void SetCurrentChunkStartTime(TimestampTz xtime); static void SetCurrentChunkStartTime(TimestampTz xtime);
static void CheckRequiredParameterValues(void); static void CheckRequiredParameterValues(void);
static void XLogReportParameters(void); static void XLogReportParameters(void);
static void XlogChecksums(ChecksumType new_type);
static void checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI, static void checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI,
TimeLineID prevTLI); TimeLineID prevTLI);
static void LocalSetXLogInsertAllowed(void); static void LocalSetXLogInsertAllowed(void);
...@@ -1033,7 +1034,7 @@ XLogInsertRecord(XLogRecData *rdata, ...@@ -1033,7 +1034,7 @@ XLogInsertRecord(XLogRecData *rdata,
Assert(RedoRecPtr < Insert->RedoRecPtr); Assert(RedoRecPtr < Insert->RedoRecPtr);
RedoRecPtr = Insert->RedoRecPtr; RedoRecPtr = Insert->RedoRecPtr;
} }
doPageWrites = (Insert->fullPageWrites || Insert->forcePageWrites); doPageWrites = (Insert->fullPageWrites || Insert->forcePageWrites || DataChecksumsInProgress());
if (fpw_lsn != InvalidXLogRecPtr && fpw_lsn <= RedoRecPtr && doPageWrites) if (fpw_lsn != InvalidXLogRecPtr && fpw_lsn <= RedoRecPtr && doPageWrites)
{ {
...@@ -4673,10 +4674,6 @@ ReadControlFile(void) ...@@ -4673,10 +4674,6 @@ ReadControlFile(void)
(SizeOfXLogLongPHD - SizeOfXLogShortPHD); (SizeOfXLogLongPHD - SizeOfXLogShortPHD);
CalculateCheckpointSegments(); CalculateCheckpointSegments();
/* Make the initdb settings visible as GUC variables, too */
SetConfigOption("data_checksums", DataChecksumsEnabled() ? "yes" : "no",
PGC_INTERNAL, PGC_S_OVERRIDE);
} }
void void
...@@ -4748,12 +4745,90 @@ GetMockAuthenticationNonce(void) ...@@ -4748,12 +4745,90 @@ GetMockAuthenticationNonce(void)
* Are checksums enabled for data pages? * Are checksums enabled for data pages?
*/ */
bool bool
DataChecksumsEnabled(void) DataChecksumsNeedWrite(void)
{ {
Assert(ControlFile != NULL); Assert(ControlFile != NULL);
return (ControlFile->data_checksum_version > 0); return (ControlFile->data_checksum_version > 0);
} }
bool
DataChecksumsNeedVerify(void)
{
Assert(ControlFile != NULL);
/*
* Only verify checksums if they are fully enabled in the cluster. In
* inprogress state they are only updated, not verified.
*/
return (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_VERSION);
}
bool
DataChecksumsInProgress(void)
{
Assert(ControlFile != NULL);
return (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_VERSION);
}
void
SetDataChecksumsInProgress(void)
{
Assert(ControlFile != NULL);
if (ControlFile->data_checksum_version > 0)
return;
XlogChecksums(PG_DATA_CHECKSUM_INPROGRESS_VERSION);
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
ControlFile->data_checksum_version = PG_DATA_CHECKSUM_INPROGRESS_VERSION;
UpdateControlFile();
LWLockRelease(ControlFileLock);
}
void
SetDataChecksumsOn(void)
{
Assert(ControlFile != NULL);
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
if (ControlFile->data_checksum_version != PG_DATA_CHECKSUM_INPROGRESS_VERSION)
{
LWLockRelease(ControlFileLock);
elog(ERROR, "Checksums not in inprogress mode");
}
ControlFile->data_checksum_version = PG_DATA_CHECKSUM_VERSION;
UpdateControlFile();
LWLockRelease(ControlFileLock);
XlogChecksums(PG_DATA_CHECKSUM_VERSION);
}
void
SetDataChecksumsOff(void)
{
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
ControlFile->data_checksum_version = 0;
UpdateControlFile();
LWLockRelease(ControlFileLock);
XlogChecksums(0);
}
/* guc hook */
const char *
show_data_checksums(void)
{
if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_VERSION)
return "on";
else if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_VERSION)
return "inprogress";
else
return "off";
}
/* /*
* Returns a fake LSN for unlogged relations. * Returns a fake LSN for unlogged relations.
* *
...@@ -7788,6 +7863,16 @@ StartupXLOG(void) ...@@ -7788,6 +7863,16 @@ StartupXLOG(void)
*/ */
CompleteCommitTsInitialization(); CompleteCommitTsInitialization();
/*
* If we reach this point with checksums in inprogress state, we notify
* the user that they need to manually restart the process to enable
* checksums.
*/
if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_VERSION)
ereport(WARNING,
(errmsg("checksum state is \"inprogress\" with no worker"),
errhint("Either disable or enable checksums by calling the pg_disable_data_checksums() or pg_enable_data_checksums() functions.")));
/* /*
* All done with end-of-recovery actions. * All done with end-of-recovery actions.
* *
...@@ -9541,6 +9626,22 @@ XLogReportParameters(void) ...@@ -9541,6 +9626,22 @@ XLogReportParameters(void)
} }
} }
/*
* Log the new state of checksums
*/
static void
XlogChecksums(ChecksumType new_type)
{
xl_checksum_state xlrec;
xlrec.new_checksumtype = new_type;
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, sizeof(xl_checksum_state));
XLogInsert(RM_XLOG_ID, XLOG_CHECKSUMS);
}
/* /*
* Update full_page_writes in shared memory, and write an * Update full_page_writes in shared memory, and write an
* XLOG_FPW_CHANGE record if necessary. * XLOG_FPW_CHANGE record if necessary.
...@@ -9969,6 +10070,17 @@ xlog_redo(XLogReaderState *record) ...@@ -9969,6 +10070,17 @@ xlog_redo(XLogReaderState *record)
/* Keep track of full_page_writes */ /* Keep track of full_page_writes */
lastFullPageWrites = fpw; lastFullPageWrites = fpw;
} }
else if (info == XLOG_CHECKSUMS)
{
xl_checksum_state state;
memcpy(&state, XLogRecGetData(record), sizeof(xl_checksum_state));
LWLockAcquire(ControlFileLock, LW_EXCLUSIVE);
ControlFile->data_checksum_version = state.new_checksumtype;
UpdateControlFile();
LWLockRelease(ControlFileLock);
}
} }
#ifdef WAL_DEBUG #ifdef WAL_DEBUG
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include "catalog/pg_type.h" #include "catalog/pg_type.h"
#include "funcapi.h" #include "funcapi.h"
#include "miscadmin.h" #include "miscadmin.h"
#include "postmaster/checksumhelper.h"
#include "replication/walreceiver.h" #include "replication/walreceiver.h"
#include "storage/smgr.h" #include "storage/smgr.h"
#include "utils/builtins.h" #include "utils/builtins.h"
...@@ -698,3 +699,61 @@ pg_backup_start_time(PG_FUNCTION_ARGS) ...@@ -698,3 +699,61 @@ pg_backup_start_time(PG_FUNCTION_ARGS)
PG_RETURN_DATUM(xtime); PG_RETURN_DATUM(xtime);
} }
/*
* Disables checksums for the cluster, unless already disabled.
*
* Has immediate effect - the checksums are set to off right away.
*/
Datum
disable_data_checksums(PG_FUNCTION_ARGS)
{
/*
* If we don't need to write new checksums, then clearly they are already
* disabled.
*/
if (!DataChecksumsNeedWrite())
ereport(ERROR,
(errmsg("data checksums already disabled")));
ShutdownChecksumHelperIfRunning();
SetDataChecksumsOff();
PG_RETURN_VOID();
}
/*
* Enables checksums for the cluster, unless already enabled.
*
* Supports vacuum-like cost-based throttling, to limit system load.
* Starts a background worker that updates checksums on existing data.
*/
Datum
enable_data_checksums(PG_FUNCTION_ARGS)
{
int cost_delay = PG_GETARG_INT32(0);
int cost_limit = PG_GETARG_INT32(1);
if (cost_delay < 0)
ereport(ERROR,
(errmsg("cost delay cannot be less than zero")));
if (cost_limit <= 0)
ereport(ERROR,
(errmsg("cost limit must be a positive value")));
/*
* Allow state change from "off" or from "inprogress", since this is how
* we restart the worker if necessary.
*/
if (DataChecksumsNeedVerify())
ereport(ERROR,
(errmsg("data checksums already enabled")));
SetDataChecksumsInProgress();
if (!StartChecksumHelperLauncher(cost_delay, cost_limit))
ereport(ERROR,
(errmsg("failed to start checksum helper process")));
PG_RETURN_VOID();
}
...@@ -1027,6 +1027,11 @@ CREATE OR REPLACE FUNCTION pg_stop_backup ( ...@@ -1027,6 +1027,11 @@ CREATE OR REPLACE FUNCTION pg_stop_backup (
RETURNS SETOF record STRICT VOLATILE LANGUAGE internal as 'pg_stop_backup_v2' RETURNS SETOF record STRICT VOLATILE LANGUAGE internal as 'pg_stop_backup_v2'
PARALLEL RESTRICTED; PARALLEL RESTRICTED;
CREATE OR REPLACE FUNCTION pg_enable_data_checksums (
cost_delay int DEFAULT 0, cost_limit int DEFAULT 100)
RETURNS void STRICT VOLATILE LANGUAGE internal AS 'enable_data_checksums'
PARALLEL RESTRICTED;
-- legacy definition for compatibility with 9.3 -- legacy definition for compatibility with 9.3
CREATE OR REPLACE FUNCTION CREATE OR REPLACE FUNCTION
json_populate_record(base anyelement, from_json json, use_json_as_text boolean DEFAULT false) json_populate_record(base anyelement, from_json json, use_json_as_text boolean DEFAULT false)
......
...@@ -12,7 +12,8 @@ subdir = src/backend/postmaster ...@@ -12,7 +12,8 @@ subdir = src/backend/postmaster
top_builddir = ../../.. top_builddir = ../../..
include $(top_builddir)/src/Makefile.global include $(top_builddir)/src/Makefile.global
OBJS = autovacuum.o bgworker.o bgwriter.o checkpointer.o fork_process.o \ OBJS = autovacuum.o bgworker.o bgwriter.o checkpointer.o checksumhelper.o \
pgarch.o pgstat.o postmaster.o startup.o syslogger.o walwriter.o fork_process.o pgarch.o pgstat.o postmaster.o startup.o syslogger.o \
walwriter.o
include $(top_srcdir)/src/backend/common.mk include $(top_srcdir)/src/backend/common.mk
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include "pgstat.h" #include "pgstat.h"
#include "port/atomics.h" #include "port/atomics.h"
#include "postmaster/bgworker_internals.h" #include "postmaster/bgworker_internals.h"
#include "postmaster/checksumhelper.h"
#include "postmaster/postmaster.h" #include "postmaster/postmaster.h"
#include "replication/logicallauncher.h" #include "replication/logicallauncher.h"
#include "replication/logicalworker.h" #include "replication/logicalworker.h"
...@@ -129,6 +130,12 @@ static const struct ...@@ -129,6 +130,12 @@ static const struct
}, },
{ {
"ApplyWorkerMain", ApplyWorkerMain "ApplyWorkerMain", ApplyWorkerMain
},
{
"ChecksumHelperLauncherMain", ChecksumHelperLauncherMain
},
{
"ChecksumHelperWorkerMain", ChecksumHelperWorkerMain
} }
}; };
......
This diff is collapsed.
...@@ -4125,6 +4125,11 @@ pgstat_get_backend_desc(BackendType backendType) ...@@ -4125,6 +4125,11 @@ pgstat_get_backend_desc(BackendType backendType)
case B_WAL_WRITER: case B_WAL_WRITER:
backendDesc = "walwriter"; backendDesc = "walwriter";
break; break;
case B_CHECKSUMHELPER_LAUNCHER:
backendDesc = "checksumhelper launcher";
break;
case B_CHECKSUMHELPER_WORKER:
backendDesc = "checksumhelper worker";
} }
return backendDesc; return backendDesc;
......
...@@ -1383,7 +1383,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf ...@@ -1383,7 +1383,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf
_tarWriteHeader(tarfilename, NULL, statbuf, false); _tarWriteHeader(tarfilename, NULL, statbuf, false);
if (!noverify_checksums && DataChecksumsEnabled()) if (!noverify_checksums && DataChecksumsNeedVerify())
{ {
char *filename; char *filename;
......
...@@ -198,6 +198,7 @@ DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) ...@@ -198,6 +198,7 @@ DecodeXLogOp(LogicalDecodingContext *ctx, XLogRecordBuffer *buf)
case XLOG_FPW_CHANGE: case XLOG_FPW_CHANGE:
case XLOG_FPI_FOR_HINT: case XLOG_FPI_FOR_HINT:
case XLOG_FPI: case XLOG_FPI:
case XLOG_CHECKSUMS:
break; break;
default: default:
elog(ERROR, "unexpected RM_XLOG_ID record type: %u", info); elog(ERROR, "unexpected RM_XLOG_ID record type: %u", info);
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include "postmaster/autovacuum.h" #include "postmaster/autovacuum.h"
#include "postmaster/bgworker_internals.h" #include "postmaster/bgworker_internals.h"
#include "postmaster/bgwriter.h" #include "postmaster/bgwriter.h"
#include "postmaster/checksumhelper.h"
#include "postmaster/postmaster.h" #include "postmaster/postmaster.h"
#include "replication/logicallauncher.h" #include "replication/logicallauncher.h"
#include "replication/slot.h" #include "replication/slot.h"
...@@ -261,6 +262,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port) ...@@ -261,6 +262,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
WalSndShmemInit(); WalSndShmemInit();
WalRcvShmemInit(); WalRcvShmemInit();
ApplyLauncherShmemInit(); ApplyLauncherShmemInit();
ChecksumHelperShmemInit();
/* /*
* Set up other modules that need some shared memory space * Set up other modules that need some shared memory space
......
...@@ -9,7 +9,8 @@ have a very low measured incidence according to research on large server farms, ...@@ -9,7 +9,8 @@ have a very low measured incidence according to research on large server farms,
http://www.cs.toronto.edu/~bianca/papers/sigmetrics09.pdf, discussed http://www.cs.toronto.edu/~bianca/papers/sigmetrics09.pdf, discussed
2010/12/22 on -hackers list. 2010/12/22 on -hackers list.
Current implementation requires this be enabled system-wide at initdb time. Checksums can be enabled at initdb time, but can also be turned on and off
using pg_enable_data_checksums()/pg_disable_data_checksums() at runtime.
The checksum is not valid at all times on a data page!! The checksum is not valid at all times on a data page!!
The checksum is valid when the page leaves the shared pool and is checked The checksum is valid when the page leaves the shared pool and is checked
......
...@@ -93,7 +93,7 @@ PageIsVerified(Page page, BlockNumber blkno) ...@@ -93,7 +93,7 @@ PageIsVerified(Page page, BlockNumber blkno)
*/ */
if (!PageIsNew(page)) if (!PageIsNew(page))
{ {
if (DataChecksumsEnabled()) if (DataChecksumsNeedVerify())
{ {
checksum = pg_checksum_page((char *) page, blkno); checksum = pg_checksum_page((char *) page, blkno);
...@@ -1168,7 +1168,7 @@ PageSetChecksumCopy(Page page, BlockNumber blkno) ...@@ -1168,7 +1168,7 @@ PageSetChecksumCopy(Page page, BlockNumber blkno)
static char *pageCopy = NULL; static char *pageCopy = NULL;
/* If we don't need a checksum, just return the passed-in data */ /* If we don't need a checksum, just return the passed-in data */
if (PageIsNew(page) || !DataChecksumsEnabled()) if (PageIsNew(page) || !DataChecksumsNeedWrite())
return (char *) page; return (char *) page;
/* /*
...@@ -1195,7 +1195,7 @@ void ...@@ -1195,7 +1195,7 @@ void
PageSetChecksumInplace(Page page, BlockNumber blkno) PageSetChecksumInplace(Page page, BlockNumber blkno)
{ {
/* If we don't need a checksum, just return */ /* If we don't need a checksum, just return */
if (PageIsNew(page) || !DataChecksumsEnabled()) if (PageIsNew(page) || !DataChecksumsNeedWrite())
return; return;
((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno); ((PageHeader) page)->pd_checksum = pg_checksum_page((char *) page, blkno);
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include "access/transam.h" #include "access/transam.h"
#include "access/twophase.h" #include "access/twophase.h"
#include "access/xact.h" #include "access/xact.h"
#include "access/xlog.h"
#include "access/xlog_internal.h" #include "access/xlog_internal.h"
#include "catalog/namespace.h" #include "catalog/namespace.h"
#include "catalog/pg_authid.h" #include "catalog/pg_authid.h"
...@@ -68,6 +69,7 @@ ...@@ -68,6 +69,7 @@
#include "replication/walreceiver.h" #include "replication/walreceiver.h"
#include "replication/walsender.h" #include "replication/walsender.h"
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/checksum.h"
#include "storage/dsm_impl.h" #include "storage/dsm_impl.h"
#include "storage/standby.h" #include "storage/standby.h"
#include "storage/fd.h" #include "storage/fd.h"
...@@ -419,6 +421,17 @@ static const struct config_enum_entry password_encryption_options[] = { ...@@ -419,6 +421,17 @@ static const struct config_enum_entry password_encryption_options[] = {
{NULL, 0, false} {NULL, 0, false}
}; };
/*
* data_checksum used to be a boolean, but was only set by initdb so there is
* no need to support variants of boolean input.
*/
static const struct config_enum_entry data_checksum_options[] = {
{"on", DATA_CHECKSUMS_ON, true},
{"off", DATA_CHECKSUMS_OFF, true},
{"inprogress", DATA_CHECKSUMS_INPROGRESS, true},
{NULL, 0, false}
};
/* /*
* Options for enum values stored in other modules * Options for enum values stored in other modules
*/ */
...@@ -514,7 +527,7 @@ static int max_identifier_length; ...@@ -514,7 +527,7 @@ static int max_identifier_length;
static int block_size; static int block_size;
static int segment_size; static int segment_size;
static int wal_block_size; static int wal_block_size;
static bool data_checksums; static int data_checksums_tmp; /* only accessed locally! */
static bool integer_datetimes; static bool integer_datetimes;
static bool assert_enabled; static bool assert_enabled;
...@@ -1683,17 +1696,6 @@ static struct config_bool ConfigureNamesBool[] = ...@@ -1683,17 +1696,6 @@ static struct config_bool ConfigureNamesBool[] =
NULL, NULL, NULL NULL, NULL, NULL
}, },
{
{"data_checksums", PGC_INTERNAL, PRESET_OPTIONS,
gettext_noop("Shows whether data checksums are turned on for this cluster."),
NULL,
GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE
},
&data_checksums,
false,
NULL, NULL, NULL
},
{ {
{"syslog_sequence_numbers", PGC_SIGHUP, LOGGING_WHERE, {"syslog_sequence_numbers", PGC_SIGHUP, LOGGING_WHERE,
gettext_noop("Add sequence number to syslog messages to avoid duplicate suppression."), gettext_noop("Add sequence number to syslog messages to avoid duplicate suppression."),
...@@ -4111,6 +4113,17 @@ static struct config_enum ConfigureNamesEnum[] = ...@@ -4111,6 +4113,17 @@ static struct config_enum ConfigureNamesEnum[] =
NULL, NULL, NULL NULL, NULL, NULL
}, },
{
{"data_checksums", PGC_INTERNAL, PRESET_OPTIONS,
gettext_noop("Shows whether data checksums are turned on for this cluster."),
NULL,
GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE
},
&data_checksums_tmp,
DATA_CHECKSUMS_OFF, data_checksum_options,
NULL, NULL, show_data_checksums
},
/* End-of-list marker */ /* End-of-list marker */
{ {
{NULL, 0, 0, NULL, NULL}, NULL, 0, NULL, NULL, NULL, NULL {NULL, 0, 0, NULL, NULL}, NULL, 0, NULL, NULL, NULL, NULL
......
...@@ -26,6 +26,7 @@ SUBDIRS = \ ...@@ -26,6 +26,7 @@ SUBDIRS = \
pg_test_fsync \ pg_test_fsync \
pg_test_timing \ pg_test_timing \
pg_upgrade \ pg_upgrade \
pg_verify_checksums \
pg_waldump \ pg_waldump \
pgbench \ pgbench \
psql \ psql \
......
...@@ -590,6 +590,15 @@ check_control_data(ControlData *oldctrl, ...@@ -590,6 +590,15 @@ check_control_data(ControlData *oldctrl,
* check_for_isn_and_int8_passing_mismatch(). * check_for_isn_and_int8_passing_mismatch().
*/ */
/*
* If checksums have been turned on in the old cluster, but the
* checksumhelper have yet to finish, then disallow upgrading. The user
* should either let the process finish, or turn off checksums, before
* retrying.
*/
if (oldctrl->data_checksum_version == 2)
pg_fatal("transition to data checksums not completed in old cluster\n");
/* /*
* We might eventually allow upgrades from checksum to no-checksum * We might eventually allow upgrades from checksum to no-checksum
* clusters. * clusters.
......
...@@ -226,7 +226,7 @@ typedef struct ...@@ -226,7 +226,7 @@ typedef struct
uint32 large_object; uint32 large_object;
bool date_is_int; bool date_is_int;
bool float8_pass_by_value; bool float8_pass_by_value;
bool data_checksum_version; uint32 data_checksum_version;
} ControlData; } ControlData;
/* /*
......
#-------------------------------------------------------------------------
#
# Makefile for src/bin/pg_verify_checksums
#
# Copyright (c) 1998-2018, PostgreSQL Global Development Group
#
# src/bin/pg_verify_checksums/Makefile
#
#-------------------------------------------------------------------------
PGFILEDESC = "pg_verify_checksums - verify data checksums in an offline cluster"
PGAPPICON=win32
subdir = src/bin/pg_verify_checksums
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
OBJS= pg_verify_checksums.o $(WIN32RES)
all: pg_verify_checksums
pg_verify_checksums: $(OBJS) | submake-libpgport
$(CC) $(CFLAGS) $^ $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
install: all installdirs
$(INSTALL_PROGRAM) pg_verify_checksums$(X) '$(DESTDIR)$(bindir)/pg_verify_checksums$(X)'
installdirs:
$(MKDIR_P) '$(DESTDIR)$(bindir)'
uninstall:
rm -f '$(DESTDIR)$(bindir)/pg_verify_checksums$(X)'
clean distclean maintainer-clean:
rm -f pg_verify_checksums$(X) $(OBJS)
rm -rf tmp_check
/*
* pg_verify_checksums
*
* Verifies page level checksums in an offline cluster
*
* Copyright (c) 2010-2018, PostgreSQL Global Development Group
*
* src/bin/pg_verify_checksums/pg_verify_checksums.c
*/
#define FRONTEND 1
#include "postgres.h"
#include "catalog/pg_control.h"
#include "common/controldata_utils.h"
#include "storage/bufpage.h"
#include "storage/checksum.h"
#include "storage/checksum_impl.h"
#include <sys/stat.h>
#include <dirent.h>
#include <unistd.h>
#include "pg_getopt.h"
static int64 files = 0;
static int64 blocks = 0;
static int64 badblocks = 0;
static ControlFileData *ControlFile;
static char *only_relfilenode = NULL;
static bool debug = false;
static const char *progname;
static void
usage()
{
printf(_("%s verifies page level checksums in offline PostgreSQL database cluster.\n\n"), progname);
printf(_("Usage:\n"));
printf(_(" %s [OPTION] [DATADIR]\n"), progname);
printf(_("\nOptions:\n"));
printf(_(" [-D] DATADIR data directory\n"));
printf(_(" -f, force check even if checksums are disabled\n"));
printf(_(" -r relfilenode check only relation with specified relfilenode\n"));
printf(_(" -d debug output, listing all checked blocks\n"));
printf(_(" -V, --version output version information, then exit\n"));
printf(_(" -?, --help show this help, then exit\n"));
printf(_("\nIf no data directory (DATADIR) is specified, "
"the environment variable PGDATA\nis used.\n\n"));
printf(_("Report bugs to <pgsql-bugs@postgresql.org>.\n"));
}
static const char *skip[] = {
"pg_control",
"pg_filenode.map",
"pg_internal.init",
"PG_VERSION",
NULL,
};
static bool
skipfile(char *fn)
{
const char **f;
if (strcmp(fn, ".") == 0 ||
strcmp(fn, "..") == 0)
return true;
for (f = skip; *f; f++)
if (strcmp(*f, fn) == 0)
return true;
return false;
}
static void
scan_file(char *fn, int segmentno)
{
char buf[BLCKSZ];
PageHeader header = (PageHeader) buf;
int f;
int blockno;
f = open(fn, 0);
if (f < 0)
{
fprintf(stderr, _("%s: could not open file \"%s\": %m\n"), progname, fn);
exit(1);
}
files++;
for (blockno = 0;; blockno++)
{
uint16 csum;
int r = read(f, buf, BLCKSZ);
if (r == 0)
break;
if (r != BLCKSZ)
{
fprintf(stderr, _("%s: short read of block %d in file \"%s\", got only %d bytes\n"),
progname, blockno, fn, r);
exit(1);
}
blocks++;
csum = pg_checksum_page(buf, blockno + segmentno * RELSEG_SIZE);
if (csum != header->pd_checksum)
{
if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_VERSION)
fprintf(stderr, _("%s: checksum verification failed in file \"%s\", block %d: calculated checksum %X but expected %X\n"),
progname, fn, blockno, csum, header->pd_checksum);
badblocks++;
}
else if (debug)
fprintf(stderr, _("%s: checksum verified in file \"%s\", block %d: %X\n"),
progname, fn, blockno, csum);
}
close(f);
}
static void
scan_directory(char *basedir, char *subdir)
{
char path[MAXPGPATH];
DIR *dir;
struct dirent *de;
snprintf(path, MAXPGPATH, "%s/%s", basedir, subdir);
dir = opendir(path);
if (!dir)
{
fprintf(stderr, _("%s: could not open directory \"%s\": %m\n"),
progname, path);
exit(1);
}
while ((de = readdir(dir)) != NULL)
{
char fn[MAXPGPATH];
struct stat st;
if (skipfile(de->d_name))
continue;
snprintf(fn, MAXPGPATH, "%s/%s", path, de->d_name);
if (lstat(fn, &st) < 0)
{
fprintf(stderr, _("%s: could not stat file \"%s\": %m\n"),
progname, fn);
exit(1);
}
if (S_ISREG(st.st_mode))
{
char *forkpath,
*segmentpath;
int segmentno = 0;
/*
* Cut off at the segment boundary (".") to get the segment number
* in order to mix it into the checksum. Then also cut off at the
* fork boundary, to get the relfilenode the file belongs to for
* filtering.
*/
segmentpath = strchr(de->d_name, '.');
if (segmentpath != NULL)
{
*segmentpath++ = '\0';
segmentno = atoi(segmentpath);
if (segmentno == 0)
{
fprintf(stderr, _("%s: invalid segment number %d in filename \"%s\"\n"),
progname, segmentno, fn);
exit(1);
}
}
forkpath = strchr(de->d_name, '_');
if (forkpath != NULL)
*forkpath++ = '\0';
if (only_relfilenode && strcmp(only_relfilenode, de->d_name) != 0)
/* Relfilenode not to be included */
continue;
scan_file(fn, segmentno);
}
else if (S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode))
scan_directory(path, de->d_name);
}
closedir(dir);
}
int
main(int argc, char *argv[])
{
char *DataDir = NULL;
bool force = false;
int c;
bool crc_ok;
set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_verify_checksums"));
progname = get_progname(argv[0]);
if (argc > 1)
{
if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0)
{
usage();
exit(0);
}
if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0)
{
puts("pg_verify_checksums (PostgreSQL) " PG_VERSION);
exit(0);
}
}
while ((c = getopt(argc, argv, "D:fr:d")) != -1)
{
switch (c)
{
case 'd':
debug = true;
break;
case 'D':
DataDir = optarg;
break;
case 'f':
force = true;
break;
case 'r':
if (atoi(optarg) <= 0)
{
fprintf(stderr, _("%s: invalid relfilenode: %s\n"), progname, optarg);
exit(1);
}
only_relfilenode = pstrdup(optarg);
break;
default:
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
}
}
if (DataDir == NULL)
{
if (optind < argc)
DataDir = argv[optind++];
else
DataDir = getenv("PGDATA");
/* If no DataDir was specified, and none could be found, error out */
if (DataDir == NULL)
{
fprintf(stderr, _("%s: no data directory specified\n"), progname);
fprintf(stderr, _("Try \"%s --help\" for more information.\n"), progname);
exit(1);
}
}
/* Complain if any arguments remain */
if (optind < argc)
{
fprintf(stderr, _("%s: too many command-line arguments (first is \"%s\")\n"),
progname, argv[optind]);
fprintf(stderr, _("Try \"%s --help\" for more information.\n"),
progname);
exit(1);
}
/* Check if cluster is running */
ControlFile = get_controlfile(DataDir, progname, &crc_ok);
if (!crc_ok)
{
fprintf(stderr, _("%s: pg_control CRC value is incorrect.\n"), progname);
exit(1);
}
if (ControlFile->state != DB_SHUTDOWNED &&
ControlFile->state != DB_SHUTDOWNED_IN_RECOVERY)
{
fprintf(stderr, _("%s: cluster must be shut down to verify checksums.\n"), progname);
exit(1);
}
if (ControlFile->data_checksum_version == 0 && !force)
{
fprintf(stderr, _("%s: data checksums are not enabled in cluster.\n"), progname);
exit(1);
}
/* Scan all files */
scan_directory(DataDir, "global");
scan_directory(DataDir, "base");
scan_directory(DataDir, "pg_tblspc");
printf(_("Checksum scan completed\n"));
printf(_("Data checksum version: %d\n"), ControlFile->data_checksum_version);
printf(_("Files scanned: %" INT64_MODIFIER "d\n"), files);
printf(_("Blocks scanned: %" INT64_MODIFIER "d\n"), blocks);
if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_INPROGRESS_VERSION)
printf(_("Blocks left in progress: %" INT64_MODIFIER "d\n"), badblocks);
else
printf(_("Bad checksums: %" INT64_MODIFIER "d\n"), badblocks);
if (badblocks > 0)
return 1;
return 0;
}
...@@ -154,7 +154,7 @@ extern PGDLLIMPORT int wal_level; ...@@ -154,7 +154,7 @@ extern PGDLLIMPORT int wal_level;
* of the bits make it to disk, but the checksum wouldn't match. Also WAL-log * of the bits make it to disk, but the checksum wouldn't match. Also WAL-log
* them if forced by wal_log_hints=on. * them if forced by wal_log_hints=on.
*/ */
#define XLogHintBitIsNeeded() (DataChecksumsEnabled() || wal_log_hints) #define XLogHintBitIsNeeded() (DataChecksumsNeedWrite() || wal_log_hints)
/* Do we need to WAL-log information required only for Hot Standby and logical replication? */ /* Do we need to WAL-log information required only for Hot Standby and logical replication? */
#define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_REPLICA) #define XLogStandbyInfoActive() (wal_level >= WAL_LEVEL_REPLICA)
...@@ -257,7 +257,13 @@ extern char *XLogFileNameP(TimeLineID tli, XLogSegNo segno); ...@@ -257,7 +257,13 @@ extern char *XLogFileNameP(TimeLineID tli, XLogSegNo segno);
extern void UpdateControlFile(void); extern void UpdateControlFile(void);
extern uint64 GetSystemIdentifier(void); extern uint64 GetSystemIdentifier(void);
extern char *GetMockAuthenticationNonce(void); extern char *GetMockAuthenticationNonce(void);
extern bool DataChecksumsEnabled(void); extern bool DataChecksumsNeedWrite(void);
extern bool DataChecksumsNeedVerify(void);
extern bool DataChecksumsInProgress(void);
extern void SetDataChecksumsInProgress(void);
extern void SetDataChecksumsOn(void);
extern void SetDataChecksumsOff(void);
extern const char *show_data_checksums(void);
extern XLogRecPtr GetFakeLSNForUnloggedRel(void); extern XLogRecPtr GetFakeLSNForUnloggedRel(void);
extern Size XLOGShmemSize(void); extern Size XLOGShmemSize(void);
extern void XLOGShmemInit(void); extern void XLOGShmemInit(void);
......
...@@ -25,6 +25,7 @@ ...@@ -25,6 +25,7 @@
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "pgtime.h" #include "pgtime.h"
#include "storage/block.h" #include "storage/block.h"
#include "storage/checksum.h"
#include "storage/relfilenode.h" #include "storage/relfilenode.h"
...@@ -240,6 +241,12 @@ typedef struct xl_restore_point ...@@ -240,6 +241,12 @@ typedef struct xl_restore_point
char rp_name[MAXFNAMELEN]; char rp_name[MAXFNAMELEN];
} xl_restore_point; } xl_restore_point;
/* Information logged when checksum level is changed */
typedef struct xl_checksum_state
{
ChecksumType new_checksumtype;
} xl_checksum_state;
/* End of recovery mark, when we don't do an END_OF_RECOVERY checkpoint */ /* End of recovery mark, when we don't do an END_OF_RECOVERY checkpoint */
typedef struct xl_end_of_recovery typedef struct xl_end_of_recovery
{ {
......
...@@ -53,6 +53,6 @@ ...@@ -53,6 +53,6 @@
*/ */
/* yyyymmddN */ /* yyyymmddN */
#define CATALOG_VERSION_NO 201804051 #define CATALOG_VERSION_NO 201804052
#endif #endif
...@@ -76,6 +76,7 @@ typedef struct CheckPoint ...@@ -76,6 +76,7 @@ typedef struct CheckPoint
#define XLOG_END_OF_RECOVERY 0x90 #define XLOG_END_OF_RECOVERY 0x90
#define XLOG_FPI_FOR_HINT 0xA0 #define XLOG_FPI_FOR_HINT 0xA0
#define XLOG_FPI 0xB0 #define XLOG_FPI 0xB0
#define XLOG_CHECKSUMS 0xC0
/* /*
......
...@@ -5583,6 +5583,11 @@ DESCR("pg_controldata recovery state information as a function"); ...@@ -5583,6 +5583,11 @@ DESCR("pg_controldata recovery state information as a function");
DATA(insert OID = 3444 ( pg_control_init PGNSP PGUID 12 1 0 0 0 f f f t f v s 0 0 2249 "" "{23,23,23,23,23,23,23,23,23,16,16,23}" "{o,o,o,o,o,o,o,o,o,o,o,o}" "{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,float4_pass_by_value,float8_pass_by_value,data_page_checksum_version}" _null_ _null_ pg_control_init _null_ _null_ _null_ )); DATA(insert OID = 3444 ( pg_control_init PGNSP PGUID 12 1 0 0 0 f f f t f v s 0 0 2249 "" "{23,23,23,23,23,23,23,23,23,16,16,23}" "{o,o,o,o,o,o,o,o,o,o,o,o}" "{max_data_alignment,database_block_size,blocks_per_segment,wal_block_size,bytes_per_wal_segment,max_identifier_length,max_index_columns,max_toast_chunk_size,large_object_chunk_size,float4_pass_by_value,float8_pass_by_value,data_page_checksum_version}" _null_ _null_ pg_control_init _null_ _null_ _null_ ));
DESCR("pg_controldata init state information as a function"); DESCR("pg_controldata init state information as a function");
DATA(insert OID = 3996 ( pg_disable_data_checksums PGNSP PGUID 12 1 0 0 0 f f f t f v s 0 0 2278 "" _null_ _null_ _null_ _null_ _null_ disable_data_checksums _null_ _null_ _null_ ));
DESCR("disable data checksums");
DATA(insert OID = 3998 ( pg_enable_data_checksums PGNSP PGUID 12 1 0 0 0 f f f t f v s 2 0 2278 "23 23" _null_ _null_ "{cost_delay,cost_limit}" _null_ _null_ enable_data_checksums _null_ _null_ _null_ ));
DESCR("enable data checksums");
/* collation management functions */ /* collation management functions */
DATA(insert OID = 3445 ( pg_import_system_collations PGNSP PGUID 12 100 0 0 0 f f f t f v u 1 0 23 "4089" _null_ _null_ _null_ _null_ _null_ pg_import_system_collations _null_ _null_ _null_ )); DATA(insert OID = 3445 ( pg_import_system_collations PGNSP PGUID 12 100 0 0 0 f f f t f v u 1 0 23 "4089" _null_ _null_ _null_ _null_ _null_ pg_import_system_collations _null_ _null_ _null_ ));
DESCR("import collations from operating system"); DESCR("import collations from operating system");
......
...@@ -710,7 +710,9 @@ typedef enum BackendType ...@@ -710,7 +710,9 @@ typedef enum BackendType
B_STARTUP, B_STARTUP,
B_WAL_RECEIVER, B_WAL_RECEIVER,
B_WAL_SENDER, B_WAL_SENDER,
B_WAL_WRITER B_WAL_WRITER,
B_CHECKSUMHELPER_LAUNCHER,
B_CHECKSUMHELPER_WORKER
} BackendType; } BackendType;
......
/*-------------------------------------------------------------------------
*
* checksumhelper.h
* header file for checksum helper background worker
*
*
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/postmaster/checksumhelper.h
*
*-------------------------------------------------------------------------
*/
#ifndef CHECKSUMHELPER_H
#define CHECKSUMHELPER_H
/* Shared memory */
extern Size ChecksumHelperShmemSize(void);
extern void ChecksumHelperShmemInit(void);
/* Start the background processes for enabling checksums */
bool StartChecksumHelperLauncher(int cost_delay, int cost_limit);
/* Shutdown the background processes, if any */
void ShutdownChecksumHelperIfRunning(void);
/* Background worker entrypoints */
void ChecksumHelperLauncherMain(Datum arg);
void ChecksumHelperWorkerMain(Datum arg);
#endif /* CHECKSUMHELPER_H */
...@@ -194,6 +194,7 @@ typedef PageHeaderData *PageHeader; ...@@ -194,6 +194,7 @@ typedef PageHeaderData *PageHeader;
*/ */
#define PG_PAGE_LAYOUT_VERSION 4 #define PG_PAGE_LAYOUT_VERSION 4
#define PG_DATA_CHECKSUM_VERSION 1 #define PG_DATA_CHECKSUM_VERSION 1
#define PG_DATA_CHECKSUM_INPROGRESS_VERSION 2
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
* page support macros * page support macros
......
...@@ -15,6 +15,13 @@ ...@@ -15,6 +15,13 @@
#include "storage/block.h" #include "storage/block.h"
typedef enum ChecksumType
{
DATA_CHECKSUMS_OFF = 0,
DATA_CHECKSUMS_ON,
DATA_CHECKSUMS_INPROGRESS
} ChecksumType;
/* /*
* Compute the checksum for a Postgres page. The page must be aligned on a * Compute the checksum for a Postgres page. The page must be aligned on a
* 4-byte boundary. * 4-byte boundary.
......
...@@ -12,7 +12,8 @@ subdir = src/test ...@@ -12,7 +12,8 @@ subdir = src/test
top_builddir = ../.. top_builddir = ../..
include $(top_builddir)/src/Makefile.global include $(top_builddir)/src/Makefile.global
SUBDIRS = perl regress isolation modules authentication recovery subscription SUBDIRS = perl regress isolation modules authentication recovery subscription \
checksum
# Test suites that are not safe by default but can be run if selected # Test suites that are not safe by default but can be run if selected
# by the user via the whitespace-separated list in variable # by the user via the whitespace-separated list in variable
......
# Generated by test suite
/tmp_check/
#-------------------------------------------------------------------------
#
# Makefile for src/test/checksum
#
# Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
# Portions Copyright (c) 1994, Regents of the University of California
#
# src/test/checksum/Makefile
#
#-------------------------------------------------------------------------
subdir = src/test/checksum
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
check:
$(prove_check)
installcheck:
$(prove_installcheck)
clean distclean maintainer-clean:
rm -rf tmp_check
src/test/checksum/README
Regression tests for data checksums
===================================
This directory contains a test suite for enabling data checksums
in a running cluster with streaming replication.
Running the tests
=================
make check
or
make installcheck
NOTE: This creates a temporary installation (in the case of "check"),
with multiple nodes, be they master or standby(s) for the purpose of
the tests.
NOTE: This requires the --enable-tap-tests argument to configure.
# Test suite for testing enabling data checksums with streaming replication
use strict;
use warnings;
use PostgresNode;
use TestLib;
use Test::More tests => 10;
my $MAX_TRIES = 30;
# Initialize master node
my $node_master = get_new_node('master');
$node_master->init(allows_streaming => 1);
$node_master->start;
my $backup_name = 'my_backup';
# Take backup
$node_master->backup($backup_name);
# Create streaming standby linking to master
my $node_standby_1 = get_new_node('standby_1');
$node_standby_1->init_from_backup($node_master, $backup_name,
has_streaming => 1);
$node_standby_1->start;
# Create some content on master to have un-checksummed data in the cluster
$node_master->safe_psql('postgres',
"CREATE TABLE t AS SELECT generate_series(1,10000) AS a;");
# Wait for standbys to catch up
$node_master->wait_for_catchup($node_standby_1, 'replay',
$node_master->lsn('insert'));
# Check that checksums are turned off
my $result = $node_master->safe_psql('postgres',
"SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';");
is($result, "off", 'ensure checksums are turned off on master');
$result = $node_standby_1->safe_psql('postgres',
"SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';");
is($result, "off", 'ensure checksums are turned off on standby_1');
# Enable checksums for the cluster
$node_master->safe_psql('postgres', "SELECT pg_enable_data_checksums();");
# Ensure that the master has switched to inprogress immediately
$result = $node_master->safe_psql('postgres',
"SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';");
is($result, "inprogress", 'ensure checksums are in progress on master');
# Wait for checksum enable to be replayed
$node_master->wait_for_catchup($node_standby_1, 'replay');
# Ensure that the standby has switched to inprogress
$result = $node_standby_1->safe_psql('postgres',
"SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';");
is($result, "inprogress", 'ensure checksums are in progress on standby_1');
# Insert some more data which should be checksummed on INSERT
$node_master->safe_psql('postgres',
"INSERT INTO t VALUES (generate_series(1,10000));");
# Wait for checksums enabled on the master
for (my $i = 0; $i < $MAX_TRIES; $i++)
{
$result = $node_master->safe_psql('postgres',
"SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';");
last if ($result eq 'on');
sleep(1);
}
is ($result, "on", 'ensure checksums are enabled on master');
# Wait for checksums enabled on the standby
for (my $i = 0; $i < $MAX_TRIES; $i++)
{
$result = $node_standby_1->safe_psql('postgres',
"SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';");
last if ($result eq 'on');
sleep(1);
}
is ($result, "on", 'ensure checksums are enabled on standby');
$result = $node_master->safe_psql('postgres', "SELECT count(a) FROM t");
is ($result, "20000", 'ensure we can safely read all data with checksums');
# Disable checksums and ensure it's propagated to standby and that we can
# still read all data
$node_master->safe_psql('postgres', "SELECT pg_disable_data_checksums();");
$result = $node_master->safe_psql('postgres',
"SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';");
is($result, "off", 'ensure checksums are in progress on master');
# Wait for checksum disable to be replayed
$node_master->wait_for_catchup($node_standby_1, 'replay');
# Ensure that the standby has switched to off
$result = $node_standby_1->safe_psql('postgres',
"SELECT setting FROM pg_catalog.pg_settings WHERE name = 'data_checksums';");
is($result, "off", 'ensure checksums are in progress on standby_1');
$result = $node_master->safe_psql('postgres', "SELECT count(a) FROM t");
is ($result, "20000", 'ensure we can safely read all data without checksums');
Parsed test spec with 2 sessions
starting permutation: c_verify_checksums_off r_seqread c_enable_checksums c_verify_checksums_inprogress c_disable_checksums c_wait_checksums_off
step c_verify_checksums_off: SELECT setting = 'off' FROM pg_catalog.pg_settings WHERE name = 'data_checksums';
?column?
t
step r_seqread: SELECT * FROM reader_loop();
reader_loop
t
step c_enable_checksums: SELECT pg_enable_data_checksums(1000);
pg_enable_data_checksums
step c_verify_checksums_inprogress: SELECT setting = 'inprogress' FROM pg_catalog.pg_settings WHERE name = 'data_checksums';
?column?
t
step c_disable_checksums: SELECT pg_disable_data_checksums();
pg_disable_data_checksums
step c_wait_checksums_off: SELECT test_checksums_off();
test_checksums_off
t
Parsed test spec with 3 sessions
starting permutation: c_verify_checksums_off w_insert100k r_seqread c_enable_checksums c_wait_for_checksums c_verify_checksums_on
step c_verify_checksums_off: SELECT setting = 'off' FROM pg_catalog.pg_settings WHERE name = 'data_checksums';
?column?
t
step w_insert100k: SELECT insert_1k(100);
insert_1k
t
step r_seqread: SELECT * FROM reader_loop();
reader_loop
t
step c_enable_checksums: SELECT pg_enable_data_checksums();
pg_enable_data_checksums
step c_wait_for_checksums: SELECT test_checksums_on();
test_checksums_on
t
step c_verify_checksums_on: SELECT setting = 'on' FROM pg_catalog.pg_settings WHERE name = 'data_checksums';
?column?
t
...@@ -72,3 +72,7 @@ test: timeouts ...@@ -72,3 +72,7 @@ test: timeouts
test: vacuum-concurrent-drop test: vacuum-concurrent-drop
test: predicate-gist test: predicate-gist
test: predicate-gin test: predicate-gin
# The checksum_enable suite will enable checksums for the cluster so should
# not run before anything expecting the cluster to have checksums turned off
test: checksum_cancel
test: checksum_enable
setup
{
CREATE TABLE t1 (a serial, b integer, c text);
INSERT INTO t1 (b, c) VALUES (generate_series(1,10000), 'starting values');
CREATE OR REPLACE FUNCTION test_checksums_off() RETURNS boolean AS $$
DECLARE
enabled boolean;
BEGIN
PERFORM pg_sleep(1);
SELECT setting = 'off' INTO enabled FROM pg_catalog.pg_settings WHERE name = 'data_checksums';
RETURN enabled;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION reader_loop() RETURNS boolean AS $$
DECLARE
counter integer;
enabled boolean;
BEGIN
FOR counter IN 1..100 LOOP
PERFORM count(a) FROM t1;
END LOOP;
RETURN True;
END;
$$ LANGUAGE plpgsql;
}
teardown
{
DROP FUNCTION reader_loop();
DROP FUNCTION test_checksums_off();
DROP TABLE t1;
}
session "reader"
step "r_seqread" { SELECT * FROM reader_loop(); }
session "checksums"
step "c_verify_checksums_off" { SELECT setting = 'off' FROM pg_catalog.pg_settings WHERE name = 'data_checksums'; }
step "c_enable_checksums" { SELECT pg_enable_data_checksums(1000); }
step "c_disable_checksums" { SELECT pg_disable_data_checksums(); }
step "c_verify_checksums_inprogress" { SELECT setting = 'inprogress' FROM pg_catalog.pg_settings WHERE name = 'data_checksums'; }
step "c_wait_checksums_off" { SELECT test_checksums_off(); }
permutation "c_verify_checksums_off" "r_seqread" "c_enable_checksums" "c_verify_checksums_inprogress" "c_disable_checksums" "c_wait_checksums_off"
setup
{
CREATE TABLE t1 (a serial, b integer, c text);
INSERT INTO t1 (b, c) VALUES (generate_series(1,10000), 'starting values');
CREATE OR REPLACE FUNCTION insert_1k(iterations int) RETURNS boolean AS $$
DECLARE
counter integer;
BEGIN
FOR counter IN 1..$1 LOOP
INSERT INTO t1 (b, c) VALUES (
generate_series(1, 1000),
array_to_string(array(select chr(97 + (random() * 25)::int) from generate_series(1,250)), '')
);
PERFORM pg_sleep(0.1);
END LOOP;
RETURN True;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION test_checksums_on() RETURNS boolean AS $$
DECLARE
enabled boolean;
BEGIN
LOOP
SELECT setting = 'on' INTO enabled FROM pg_catalog.pg_settings WHERE name = 'data_checksums';
IF enabled THEN
EXIT;
END IF;
PERFORM pg_sleep(1);
END LOOP;
RETURN enabled;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION reader_loop() RETURNS boolean AS $$
DECLARE
counter integer;
BEGIN
FOR counter IN 1..30 LOOP
PERFORM count(a) FROM t1;
PERFORM pg_sleep(0.2);
END LOOP;
RETURN True;
END;
$$ LANGUAGE plpgsql;
}
teardown
{
DROP FUNCTION reader_loop();
DROP FUNCTION test_checksums_on();
DROP FUNCTION insert_1k(int);
DROP TABLE t1;
}
session "writer"
step "w_insert100k" { SELECT insert_1k(100); }
session "reader"
step "r_seqread" { SELECT * FROM reader_loop(); }
session "checksums"
step "c_verify_checksums_off" { SELECT setting = 'off' FROM pg_catalog.pg_settings WHERE name = 'data_checksums'; }
step "c_enable_checksums" { SELECT pg_enable_data_checksums(); }
step "c_wait_for_checksums" { SELECT test_checksums_on(); }
step "c_verify_checksums_on" { SELECT setting = 'on' FROM pg_catalog.pg_settings WHERE name = 'data_checksums'; }
permutation "c_verify_checksums_off" "w_insert100k" "r_seqread" "c_enable_checksums" "c_wait_for_checksums" "c_verify_checksums_on"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment