Commit 3cb29c42 authored by Tom Lane's avatar Tom Lane

Add static assertions about pg_control fitting into one disk sector.

When pg_control was first designed, sizeof(ControlFileData) was small
enough that a comment seemed like plenty to document the assumption that
it'd fit into one disk sector.  Now it's nearly 300 bytes, raising the
possibility that somebody would carelessly add enough stuff to create
a problem.  Let's add a StaticAssertStmt() to ensure that the situation
doesn't pass unnoticed if it ever occurs.

While at it, rename PG_CONTROL_SIZE to PG_CONTROL_FILE_SIZE to make it
clearer what that symbol means, and convert the existing runtime
comparisons of sizeof(ControlFileData) vs. PG_CONTROL_FILE_SIZE to be
static asserts --- we didn't have that technology when this code was
first written.

Discussion: https://postgr.es/m/9192.1500490591@sss.pgh.pa.us
parent 5752dcd4
...@@ -4376,7 +4376,16 @@ static void ...@@ -4376,7 +4376,16 @@ static void
WriteControlFile(void) WriteControlFile(void)
{ {
int fd; int fd;
char buffer[PG_CONTROL_SIZE]; /* need not be aligned */ char buffer[PG_CONTROL_FILE_SIZE]; /* need not be aligned */
/*
* Ensure that the size of the pg_control data structure is sane. See the
* comments for these symbols in pg_control.h.
*/
StaticAssertStmt(sizeof(ControlFileData) <= PG_CONTROL_MAX_SAFE_SIZE,
"pg_control is too large for atomic disk writes");
StaticAssertStmt(sizeof(ControlFileData) <= PG_CONTROL_FILE_SIZE,
"sizeof(ControlFileData) exceeds PG_CONTROL_FILE_SIZE");
/* /*
* Initialize version and compatibility-check fields * Initialize version and compatibility-check fields
...@@ -4409,16 +4418,13 @@ WriteControlFile(void) ...@@ -4409,16 +4418,13 @@ WriteControlFile(void)
FIN_CRC32C(ControlFile->crc); FIN_CRC32C(ControlFile->crc);
/* /*
* We write out PG_CONTROL_SIZE bytes into pg_control, zero-padding the * We write out PG_CONTROL_FILE_SIZE bytes into pg_control, zero-padding
* excess over sizeof(ControlFileData). This reduces the odds of * the excess over sizeof(ControlFileData). This reduces the odds of
* premature-EOF errors when reading pg_control. We'll still fail when we * premature-EOF errors when reading pg_control. We'll still fail when we
* check the contents of the file, but hopefully with a more specific * check the contents of the file, but hopefully with a more specific
* error than "couldn't read pg_control". * error than "couldn't read pg_control".
*/ */
if (sizeof(ControlFileData) > PG_CONTROL_SIZE) memset(buffer, 0, PG_CONTROL_FILE_SIZE);
elog(PANIC, "sizeof(ControlFileData) is larger than PG_CONTROL_SIZE; fix either one");
memset(buffer, 0, PG_CONTROL_SIZE);
memcpy(buffer, ControlFile, sizeof(ControlFileData)); memcpy(buffer, ControlFile, sizeof(ControlFileData));
fd = BasicOpenFile(XLOG_CONTROL_FILE, fd = BasicOpenFile(XLOG_CONTROL_FILE,
...@@ -4432,7 +4438,7 @@ WriteControlFile(void) ...@@ -4432,7 +4438,7 @@ WriteControlFile(void)
errno = 0; errno = 0;
pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE); pgstat_report_wait_start(WAIT_EVENT_CONTROL_FILE_WRITE);
if (write(fd, buffer, PG_CONTROL_SIZE) != PG_CONTROL_SIZE) if (write(fd, buffer, PG_CONTROL_FILE_SIZE) != PG_CONTROL_FILE_SIZE)
{ {
/* if write didn't set errno, assume problem is no disk space */ /* if write didn't set errno, assume problem is no disk space */
if (errno == 0) if (errno == 0)
......
...@@ -552,9 +552,9 @@ ReadControlFile(void) ...@@ -552,9 +552,9 @@ ReadControlFile(void)
} }
/* Use malloc to ensure we have a maxaligned buffer */ /* Use malloc to ensure we have a maxaligned buffer */
buffer = (char *) pg_malloc(PG_CONTROL_SIZE); buffer = (char *) pg_malloc(PG_CONTROL_FILE_SIZE);
len = read(fd, buffer, PG_CONTROL_SIZE); len = read(fd, buffer, PG_CONTROL_FILE_SIZE);
if (len < 0) if (len < 0)
{ {
fprintf(stderr, _("%s: could not read file \"%s\": %s\n"), fprintf(stderr, _("%s: could not read file \"%s\": %s\n"),
...@@ -834,7 +834,16 @@ static void ...@@ -834,7 +834,16 @@ static void
RewriteControlFile(void) RewriteControlFile(void)
{ {
int fd; int fd;
char buffer[PG_CONTROL_SIZE]; /* need not be aligned */ char buffer[PG_CONTROL_FILE_SIZE]; /* need not be aligned */
/*
* For good luck, apply the same static assertions as in backend's
* WriteControlFile().
*/
StaticAssertStmt(sizeof(ControlFileData) <= PG_CONTROL_MAX_SAFE_SIZE,
"pg_control is too large for atomic disk writes");
StaticAssertStmt(sizeof(ControlFileData) <= PG_CONTROL_FILE_SIZE,
"sizeof(ControlFileData) exceeds PG_CONTROL_FILE_SIZE");
/* /*
* Adjust fields as needed to force an empty XLOG starting at * Adjust fields as needed to force an empty XLOG starting at
...@@ -878,21 +887,13 @@ RewriteControlFile(void) ...@@ -878,21 +887,13 @@ RewriteControlFile(void)
FIN_CRC32C(ControlFile.crc); FIN_CRC32C(ControlFile.crc);
/* /*
* We write out PG_CONTROL_SIZE bytes into pg_control, zero-padding the * We write out PG_CONTROL_FILE_SIZE bytes into pg_control, zero-padding
* excess over sizeof(ControlFileData). This reduces the odds of * the excess over sizeof(ControlFileData). This reduces the odds of
* premature-EOF errors when reading pg_control. We'll still fail when we * premature-EOF errors when reading pg_control. We'll still fail when we
* check the contents of the file, but hopefully with a more specific * check the contents of the file, but hopefully with a more specific
* error than "couldn't read pg_control". * error than "couldn't read pg_control".
*/ */
if (sizeof(ControlFileData) > PG_CONTROL_SIZE) memset(buffer, 0, PG_CONTROL_FILE_SIZE);
{
fprintf(stderr,
_("%s: internal error -- sizeof(ControlFileData) is too large ... fix PG_CONTROL_SIZE\n"),
progname);
exit(1);
}
memset(buffer, 0, PG_CONTROL_SIZE);
memcpy(buffer, &ControlFile, sizeof(ControlFileData)); memcpy(buffer, &ControlFile, sizeof(ControlFileData));
unlink(XLOG_CONTROL_FILE); unlink(XLOG_CONTROL_FILE);
...@@ -908,7 +909,7 @@ RewriteControlFile(void) ...@@ -908,7 +909,7 @@ RewriteControlFile(void)
} }
errno = 0; errno = 0;
if (write(fd, buffer, PG_CONTROL_SIZE) != PG_CONTROL_SIZE) if (write(fd, buffer, PG_CONTROL_FILE_SIZE) != PG_CONTROL_FILE_SIZE)
{ {
/* if write didn't set errno, assume problem is no disk space */ /* if write didn't set errno, assume problem is no disk space */
if (errno == 0) if (errno == 0)
......
...@@ -625,9 +625,9 @@ checkControlFile(ControlFileData *ControlFile) ...@@ -625,9 +625,9 @@ checkControlFile(ControlFileData *ControlFile)
static void static void
digestControlFile(ControlFileData *ControlFile, char *src, size_t size) digestControlFile(ControlFileData *ControlFile, char *src, size_t size)
{ {
if (size != PG_CONTROL_SIZE) if (size != PG_CONTROL_FILE_SIZE)
pg_fatal("unexpected control file size %d, expected %d\n", pg_fatal("unexpected control file size %d, expected %d\n",
(int) size, PG_CONTROL_SIZE); (int) size, PG_CONTROL_FILE_SIZE);
memcpy(ControlFile, src, sizeof(ControlFileData)); memcpy(ControlFile, src, sizeof(ControlFileData));
...@@ -641,7 +641,16 @@ digestControlFile(ControlFileData *ControlFile, char *src, size_t size) ...@@ -641,7 +641,16 @@ digestControlFile(ControlFileData *ControlFile, char *src, size_t size)
static void static void
updateControlFile(ControlFileData *ControlFile) updateControlFile(ControlFileData *ControlFile)
{ {
char buffer[PG_CONTROL_SIZE]; char buffer[PG_CONTROL_FILE_SIZE];
/*
* For good luck, apply the same static assertions as in backend's
* WriteControlFile().
*/
StaticAssertStmt(sizeof(ControlFileData) <= PG_CONTROL_MAX_SAFE_SIZE,
"pg_control is too large for atomic disk writes");
StaticAssertStmt(sizeof(ControlFileData) <= PG_CONTROL_FILE_SIZE,
"sizeof(ControlFileData) exceeds PG_CONTROL_FILE_SIZE");
/* Recalculate CRC of control file */ /* Recalculate CRC of control file */
INIT_CRC32C(ControlFile->crc); INIT_CRC32C(ControlFile->crc);
...@@ -651,16 +660,16 @@ updateControlFile(ControlFileData *ControlFile) ...@@ -651,16 +660,16 @@ updateControlFile(ControlFileData *ControlFile)
FIN_CRC32C(ControlFile->crc); FIN_CRC32C(ControlFile->crc);
/* /*
* Write out PG_CONTROL_SIZE bytes into pg_control by zero-padding the * Write out PG_CONTROL_FILE_SIZE bytes into pg_control by zero-padding
* excess over sizeof(ControlFileData) to avoid premature EOF related * the excess over sizeof(ControlFileData), to avoid premature EOF related
* errors when reading it. * errors when reading it.
*/ */
memset(buffer, 0, PG_CONTROL_SIZE); memset(buffer, 0, PG_CONTROL_FILE_SIZE);
memcpy(buffer, ControlFile, sizeof(ControlFileData)); memcpy(buffer, ControlFile, sizeof(ControlFileData));
open_target_file("global/pg_control", false); open_target_file("global/pg_control", false);
write_target_range(buffer, 0, PG_CONTROL_SIZE); write_target_range(buffer, 0, PG_CONTROL_FILE_SIZE);
close_target_file(); close_target_file();
} }
......
...@@ -20,11 +20,12 @@ ...@@ -20,11 +20,12 @@
#include "port/pg_crc32c.h" #include "port/pg_crc32c.h"
#define MOCK_AUTH_NONCE_LEN 32
/* Version identifier for this pg_control format */ /* Version identifier for this pg_control format */
#define PG_CONTROL_VERSION 1002 #define PG_CONTROL_VERSION 1002
/* Nonce key length, see below */
#define MOCK_AUTH_NONCE_LEN 32
/* /*
* Body of CheckPoint XLOG records. This is declared here because we keep * Body of CheckPoint XLOG records. This is declared here because we keep
* a copy of the latest one in pg_control for possible disaster recovery. * a copy of the latest one in pg_control for possible disaster recovery.
...@@ -94,10 +95,6 @@ typedef enum DBState ...@@ -94,10 +95,6 @@ typedef enum DBState
/* /*
* Contents of pg_control. * Contents of pg_control.
*
* NOTE: try to keep this under 512 bytes so that it will fit on one physical
* sector of typical disk drives. This reduces the odds of corruption due to
* power failure midway through a write.
*/ */
typedef struct ControlFileData typedef struct ControlFileData
...@@ -235,6 +232,14 @@ typedef struct ControlFileData ...@@ -235,6 +232,14 @@ typedef struct ControlFileData
pg_crc32c crc; pg_crc32c crc;
} ControlFileData; } ControlFileData;
/*
* Maximum safe value of sizeof(ControlFileData). For reliability's sake,
* it's critical that pg_control updates be atomic writes. That generally
* means the active data can't be more than one disk sector, which is 512
* bytes on common hardware. Be very careful about raising this limit.
*/
#define PG_CONTROL_MAX_SAFE_SIZE 512
/* /*
* Physical size of the pg_control file. Note that this is considerably * Physical size of the pg_control file. Note that this is considerably
* bigger than the actually used size (ie, sizeof(ControlFileData)). * bigger than the actually used size (ie, sizeof(ControlFileData)).
...@@ -242,6 +247,6 @@ typedef struct ControlFileData ...@@ -242,6 +247,6 @@ typedef struct ControlFileData
* changes, so that ReadControlFile will deliver a suitable wrong-version * changes, so that ReadControlFile will deliver a suitable wrong-version
* message instead of a read error if it's looking at an incompatible file. * message instead of a read error if it's looking at an incompatible file.
*/ */
#define PG_CONTROL_SIZE 8192 #define PG_CONTROL_FILE_SIZE 8192
#endif /* PG_CONTROL_H */ #endif /* PG_CONTROL_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment