Commit bd2cb9aa authored by Andrew Dunstan's avatar Andrew Dunstan

Implement a chunking protocol for writes to the syslogger pipe, with messages

reassembled in the syslogger before writing to the log file. This prevents
partial messages from being written, which mucks up log rotation, and
messages from different backends being interleaved, which causes garbled
logs. Backport as far as 8.0, where the syslogger was introduced.

Tom Lane and Andrew Dunstan
parent 320f8205
This diff is collapsed.
...@@ -42,7 +42,7 @@ ...@@ -42,7 +42,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.186 2007/06/07 21:45:59 tgl Exp $ * $PostgreSQL: pgsql/src/backend/utils/error/elog.c,v 1.187 2007/06/14 01:48:51 adunstan Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -124,6 +124,7 @@ static const char *useful_strerror(int errnum); ...@@ -124,6 +124,7 @@ static const char *useful_strerror(int errnum);
static const char *error_severity(int elevel); static const char *error_severity(int elevel);
static void append_with_tabs(StringInfo buf, const char *str); static void append_with_tabs(StringInfo buf, const char *str);
static bool is_log_level_output(int elevel, int log_min_level); static bool is_log_level_output(int elevel, int log_min_level);
static void write_pipe_chunks(int fd, char *data, int len);
/* /*
...@@ -1783,7 +1784,10 @@ send_message_to_server_log(ErrorData *edata) ...@@ -1783,7 +1784,10 @@ send_message_to_server_log(ErrorData *edata)
write_eventlog(edata->elevel, buf.data); write_eventlog(edata->elevel, buf.data);
else else
#endif #endif
fprintf(stderr, "%s", buf.data); if (Redirect_stderr)
write_pipe_chunks(fileno(stderr), buf.data, buf.len);
else
write(fileno(stderr), buf.data, buf.len);
} }
/* If in the syslogger process, try to write messages direct to file */ /* If in the syslogger process, try to write messages direct to file */
...@@ -1793,6 +1797,37 @@ send_message_to_server_log(ErrorData *edata) ...@@ -1793,6 +1797,37 @@ send_message_to_server_log(ErrorData *edata)
pfree(buf.data); pfree(buf.data);
} }
/*
* Send data to the syslogger using the chunked protocol
*/
static void
write_pipe_chunks(int fd, char *data, int len)
{
PipeProtoChunk p;
Assert(len > 0);
p.proto.nuls[0] = p.proto.nuls[1] = '\0';
p.proto.pid = MyProcPid;
/* write all but the last chunk */
while (len > PIPE_MAX_PAYLOAD)
{
p.proto.is_last = 'f';
p.proto.len = PIPE_MAX_PAYLOAD;
memcpy(p.proto.data, data, PIPE_MAX_PAYLOAD);
write(fd, &p, PIPE_HEADER_SIZE + PIPE_MAX_PAYLOAD);
data += PIPE_MAX_PAYLOAD;
len -= PIPE_MAX_PAYLOAD;
}
/* write the last chunk */
p.proto.is_last = 't';
p.proto.len = len;
memcpy(p.proto.data, data, len);
write(fd, &p, PIPE_HEADER_SIZE + len);
}
/* /*
* Write error report to client * Write error report to client
...@@ -2115,6 +2150,7 @@ write_stderr(const char *fmt,...) ...@@ -2115,6 +2150,7 @@ write_stderr(const char *fmt,...)
#ifndef WIN32 #ifndef WIN32
/* On Unix, we just fprintf to stderr */ /* On Unix, we just fprintf to stderr */
vfprintf(stderr, fmt, ap); vfprintf(stderr, fmt, ap);
fflush(stderr);
#else #else
/* /*
...@@ -2130,8 +2166,11 @@ write_stderr(const char *fmt,...) ...@@ -2130,8 +2166,11 @@ write_stderr(const char *fmt,...)
write_eventlog(ERROR, errbuf); write_eventlog(ERROR, errbuf);
} }
else else
{
/* Not running as service, write to stderr */ /* Not running as service, write to stderr */
vfprintf(stderr, fmt, ap); vfprintf(stderr, fmt, ap);
fflush(stderr);
}
#endif #endif
va_end(ap); va_end(ap);
} }
......
...@@ -5,13 +5,61 @@ ...@@ -5,13 +5,61 @@
* *
* Copyright (c) 2004-2007, PostgreSQL Global Development Group * Copyright (c) 2004-2007, PostgreSQL Global Development Group
* *
* $PostgreSQL: pgsql/src/include/postmaster/syslogger.h,v 1.8 2007/01/05 22:19:57 momjian Exp $ * $PostgreSQL: pgsql/src/include/postmaster/syslogger.h,v 1.9 2007/06/14 01:48:51 adunstan Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
#ifndef _SYSLOGGER_H #ifndef _SYSLOGGER_H
#define _SYSLOGGER_H #define _SYSLOGGER_H
#include <limits.h> /* for PIPE_BUF */
/*
* Primitive protocol structure for writing to syslogger pipe(s). The idea
* here is to divide long messages into chunks that are not more than
* PIPE_BUF bytes long, which according to POSIX spec must be written into
* the pipe atomically. The pipe reader then uses the protocol headers to
* reassemble the parts of a message into a single string. The reader can
* also cope with non-protocol data coming down the pipe, though we cannot
* guarantee long strings won't get split apart.
*
* We use 't' or 'f' instead of a bool for is_last to make the protocol a tiny
* bit more robust against finding a false double nul byte prologue. But we
* still might find it in the len and/or pid bytes unless we're careful.
*/
#ifdef PIPE_BUF
/* Are there any systems with PIPE_BUF > 64K? Unlikely, but ... */
#if PIPE_BUF > 65536
#define PIPE_CHUNK_SIZE 65536
#else
#define PIPE_CHUNK_SIZE ((int) PIPE_BUF)
#endif
#else /* not defined */
/* POSIX says the value of PIPE_BUF must be at least 512, so use that */
#define PIPE_CHUNK_SIZE 512
#endif
typedef struct
{
char nuls[2]; /* always \0\0 */
uint16 len; /* size of this chunk (counts data only) */
int32 pid; /* writer's pid */
char is_last; /* last chunk of message? 't' or 'f' */
char data[1]; /* data payload starts here */
} PipeProtoHeader;
typedef union
{
PipeProtoHeader proto;
char filler[PIPE_CHUNK_SIZE];
} PipeProtoChunk;
#define PIPE_HEADER_SIZE offsetof(PipeProtoHeader, data)
#define PIPE_MAX_PAYLOAD ((int) (PIPE_CHUNK_SIZE - PIPE_HEADER_SIZE))
/* GUC options */ /* GUC options */
extern bool Redirect_stderr; extern bool Redirect_stderr;
extern int Log_RotationAge; extern int Log_RotationAge;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment