Commit d9bae531 authored by Magnus Hagander's avatar Magnus Hagander

Implement streaming xlog for backup tools

Add option for parallel streaming of the transaction log while a
base backup is running, to get the logfiles before the server has
removed them.

Also add a tool called pg_receivexlog, which streams the transaction
log into files, creating a log archive without having to wait for
segments to complete, thus decreasing the window of data loss without
having to waste space using archive_timeout. This works best in
combination with archive_command - suggested usage docs etc coming later.
parent 2b64f3f1
...@@ -172,6 +172,7 @@ Complete list of usable sgml source files in this directory. ...@@ -172,6 +172,7 @@ Complete list of usable sgml source files in this directory.
<!ENTITY pgCtl SYSTEM "pg_ctl-ref.sgml"> <!ENTITY pgCtl SYSTEM "pg_ctl-ref.sgml">
<!ENTITY pgDump SYSTEM "pg_dump.sgml"> <!ENTITY pgDump SYSTEM "pg_dump.sgml">
<!ENTITY pgDumpall SYSTEM "pg_dumpall.sgml"> <!ENTITY pgDumpall SYSTEM "pg_dumpall.sgml">
<!ENTITY pgReceivexlog SYSTEM "pg_receivexlog.sgml">
<!ENTITY pgResetxlog SYSTEM "pg_resetxlog.sgml"> <!ENTITY pgResetxlog SYSTEM "pg_resetxlog.sgml">
<!ENTITY pgRestore SYSTEM "pg_restore.sgml"> <!ENTITY pgRestore SYSTEM "pg_restore.sgml">
<!ENTITY postgres SYSTEM "postgres-ref.sgml"> <!ENTITY postgres SYSTEM "postgres-ref.sgml">
......
...@@ -143,8 +143,8 @@ PostgreSQL documentation ...@@ -143,8 +143,8 @@ PostgreSQL documentation
</varlistentry> </varlistentry>
<varlistentry> <varlistentry>
<term><option>-x</option></term> <term><option>-x <replaceable class="parameter">method</replaceable></option></term>
<term><option>--xlog</option></term> <term><option>--xlog=<replaceable class="parameter">method</replaceable></option></term>
<listitem> <listitem>
<para> <para>
Includes the required transaction log files (WAL files) in the Includes the required transaction log files (WAL files) in the
...@@ -154,16 +154,43 @@ PostgreSQL documentation ...@@ -154,16 +154,43 @@ PostgreSQL documentation
to consult the log archive, thus making this a completely standalone to consult the log archive, thus making this a completely standalone
backup. backup.
</para> </para>
<note> <para>
<para> The following methods for collecting the transaction logs are
The transaction log files are collected at the end of the backup. supported:
Therefore, it is necessary for the
<xref linkend="guc-wal-keep-segments"> parameter to be set high <variablelist>
enough that the log is not removed before the end of the backup. <varlistentry>
If the log has been rotated when it's time to transfer it, the <term><literal>f</literal></term>
backup will fail and be unusable. <term><literal>fetch</literal></term>
</para> <listitem>
</note> <para>
The transaction log files are collected at the end of the backup.
Therefore, it is necessary for the
<xref linkend="guc-wal-keep-segments"> parameter to be set high
enough that the log is not removed before the end of the backup.
If the log has been rotated when it's time to transfer it, the
backup will fail and be unusable.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><literal>s</literal></term>
<term><literal>stream</literal></term>
<listitem>
<para>
Stream the transaction log while the backup is created. This will
open a second connection to the server and start streaming the
transaction log in parallel while running the backup. Therefore,
it will use up two slots configured by the
<xref linkend="guc-max-wal-senders"> parameter. As long as the
client can keep up with transaction log received, using this mode
requires no extra transaction logs to be saved on the master.
</para>
</listitem>
</varlistentry>
</variablelist>
</para>
</listitem> </listitem>
</varlistentry> </varlistentry>
...@@ -260,6 +287,20 @@ PostgreSQL documentation ...@@ -260,6 +287,20 @@ PostgreSQL documentation
The following command-line options control the database connection parameters. The following command-line options control the database connection parameters.
<variablelist> <variablelist>
<varlistentry>
<term><option>-s <replaceable class="parameter">interval</replaceable></option></term>
<term><option>--statusint=<replaceable class="parameter">interval</replaceable></option></term>
<listitem>
<para>
Specifies the number of seconds between status packets sent back to the
server. This is required when streaming the transaction log (using
<literal>--xlog=stream</literal>) if replication timeout is configured
on the server, and allows for easier monitoring. The default value is
10 seconds.
</para>
</listitem>
</varlistentry>
<varlistentry> <varlistentry>
<term><option>-h <replaceable class="parameter">host</replaceable></option></term> <term><option>-h <replaceable class="parameter">host</replaceable></option></term>
<term><option>--host=<replaceable class="parameter">host</replaceable></option></term> <term><option>--host=<replaceable class="parameter">host</replaceable></option></term>
......
<!--
doc/src/sgml/ref/pg_receivexlog.sgml
PostgreSQL documentation
-->
<refentry id="app-pgreceivexlog">
<refmeta>
<refentrytitle>pg_receivexlog</refentrytitle>
<manvolnum>1</manvolnum>
<refmiscinfo>Application</refmiscinfo>
</refmeta>
<refnamediv>
<refname>pg_receivexlog</refname>
<refpurpose>streams transaction logs from a <productname>PostgreSQL</productname> cluster</refpurpose>
</refnamediv>
<indexterm zone="app-pgreceivexlog">
<primary>pg_receivexlog</primary>
</indexterm>
<refsynopsisdiv>
<cmdsynopsis>
<command>pg_receivexlog</command>
<arg rep="repeat"><replaceable>option</></arg>
</cmdsynopsis>
</refsynopsisdiv>
<refsect1>
<title>
Description
</title>
<para>
<application>pg_receivexlog</application> is used to stream transaction log
from a running <productname>PostgreSQL</productname> cluster. The transaction
log is streamed using the streaming replication protocol, and is written
to a local directory of files. This directory can be used as the archive
location for doing a restore using point-in-time recovery (see
<xref linkend="continuous-archiving">).
</para>
<para>
<application>pg_receivexlog</application> streams the transaction
log in real time as it's being generated on the server, and does not wait
for segments to complete like <xref linkend="guc-archive-command"> does.
For this reason, it is not necessary to set
<xref linkend="guc-archive-timeout"> when using
<application>pg_receivexlog</application>.
</para>
<para>
The transaction log is streamed over a regular
<productname>PostgreSQL</productname> connection, and uses the
replication protocol. The connection must be
made with a user having <literal>REPLICATION</literal> permissions (see
<xref linkend="role-attributes">), and the user must be granted explicit
permissions in <filename>pg_hba.conf</filename>. The server must also
be configured with <xref linkend="guc-max-wal-senders"> set high enough
to leave at least one session available for the stream.
</para>
</refsect1>
<refsect1>
<title>Options</title>
<para>
The following command-line options control the location and format of the
output.
<variablelist>
<varlistentry>
<term><option>-D <replaceable class="parameter">directory</replaceable></option></term>
<term><option>--dir=<replaceable class="parameter">directory</replaceable></option></term>
<listitem>
<para>
Directory to write the output to.
</para>
<para>
This parameter is required.
</para>
</listitem>
</varlistentry>
</variablelist>
</para>
<para>
The following command-line options control the running of the program.
<variablelist>
<varlistentry>
<term><option>-v</option></term>
<term><option>--verbose</option></term>
<listitem>
<para>
Enables verbose mode.
</para>
</listitem>
</varlistentry>
</variablelist>
</para>
<para>
The following command-line options control the database connection parameters.
<variablelist>
<varlistentry>
<term><option>-s <replaceable class="parameter">interval</replaceable></option></term>
<term><option>--statusint=<replaceable class="parameter">interval</replaceable></option></term>
<listitem>
<para>
Specifies the number of seconds between status packets sent back to the
server. This is required if replication timeout is configured on the
server, and allows for easier monitoring. The default value is
10 seconds.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-h <replaceable class="parameter">host</replaceable></option></term>
<term><option>--host=<replaceable class="parameter">host</replaceable></option></term>
<listitem>
<para>
Specifies the host name of the machine on which the server is
running. If the value begins with a slash, it is used as the
directory for the Unix domain socket. The default is taken
from the <envar>PGHOST</envar> environment variable, if set,
else a Unix domain socket connection is attempted.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-p <replaceable class="parameter">port</replaceable></option></term>
<term><option>--port=<replaceable class="parameter">port</replaceable></option></term>
<listitem>
<para>
Specifies the TCP port or local Unix domain socket file
extension on which the server is listening for connections.
Defaults to the <envar>PGPORT</envar> environment variable, if
set, or a compiled-in default.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-U <replaceable>username</replaceable></option></term>
<term><option>--username=<replaceable class="parameter">username</replaceable></option></term>
<listitem>
<para>
User name to connect as.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-w</></term>
<term><option>--no-password</></term>
<listitem>
<para>
Never issue a password prompt. If the server requires
password authentication and a password is not available by
other means such as a <filename>.pgpass</filename> file, the
connection attempt will fail. This option can be useful in
batch jobs and scripts where no user is present to enter a
password.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-W</option></term>
<term><option>--password</option></term>
<listitem>
<para>
Force <application>pg_receivexlog</application> to prompt for a
password before connecting to a database.
</para>
<para>
This option is never essential, since
<application>pg_receivexlog</application> will automatically prompt
for a password if the server demands password authentication.
However, <application>pg_receivexlog</application> will waste a
connection attempt finding out that the server wants a password.
In some cases it is worth typing <option>-W</> to avoid the extra
connection attempt.
</para>
</listitem>
</varlistentry>
</variablelist>
</para>
<para>
Other, less commonly used, parameters are also available:
<variablelist>
<varlistentry>
<term><option>-V</></term>
<term><option>--version</></term>
<listitem>
<para>
Print the <application>pg_receivexlog</application> version and exit.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>-?</></term>
<term><option>--help</></term>
<listitem>
<para>
Show help about <application>pg_receivexlog</application> command line
arguments, and exit.
</para>
</listitem>
</varlistentry>
</variablelist>
</para>
</refsect1>
<refsect1>
<title>Environment</title>
<para>
This utility, like most other <productname>PostgreSQL</> utilities,
uses the environment variables supported by <application>libpq</>
(see <xref linkend="libpq-envars">).
</para>
</refsect1>
<refsect1>
<title>Notes</title>
<para>
When using <application>pg_receivexlog</application> instead of
<xref linkend="guc-archive-command">, the server will continue to
recycle transaction log files even if the backups are not properly
archived, since there is no command that fails. This can be worked
around by having an <xref linkend="guc-archive-command"> that fails
when the file has not been properly archived yet.
</para>
</refsect1>
<refsect1>
<title>Examples</title>
<para>
To stream the transaction log from the server at
<literal>mydbserver</literal> and store it in the local directory
<filename>/usr/local/pgsql/archive</filename>:
<screen>
<prompt>$</prompt> <userinput>pg_receivexlog -h mydbserver -D /home/pgbackup/archive</userinput>
</screen>
</para>
</refsect1>
<refsect1>
<title>See Also</title>
<simplelist type="inline">
<member><xref linkend="APP-PGBASEBACKUP"></member>
</simplelist>
</refsect1>
</refentry>
...@@ -220,6 +220,7 @@ ...@@ -220,6 +220,7 @@
&pgConfig; &pgConfig;
&pgDump; &pgDump;
&pgDumpall; &pgDumpall;
&pgReceivexlog;
&pgRestore; &pgRestore;
&psqlRef; &psqlRef;
&reindexdb; &reindexdb;
......
/pg_basebackup /pg_basebackup
/pg_receivexlog
\ No newline at end of file
...@@ -18,21 +18,26 @@ include $(top_builddir)/src/Makefile.global ...@@ -18,21 +18,26 @@ include $(top_builddir)/src/Makefile.global
override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS)
OBJS= pg_basebackup.o $(WIN32RES) OBJS=receivelog.o streamutil.o $(WIN32RES)
all: pg_basebackup all: pg_basebackup pg_receivexlog
pg_basebackup: $(OBJS) | submake-libpq submake-libpgport pg_basebackup: pg_basebackup.o $(OBJS) | submake-libpq submake-libpgport
$(CC) $(CFLAGS) $(OBJS) $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X) $(CC) $(CFLAGS) pg_basebackup.o $(OBJS) $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
pg_receivexlog: pg_receivexlog.o $(OBJS) | submake-libpq submake-libpgport
$(CC) $(CFLAGS) pg_receivexlog.o $(OBJS) $(libpq_pgport) $(LDFLAGS) $(LDFLAGS_EX) $(LIBS) -o $@$(X)
install: all installdirs install: all installdirs
$(INSTALL_PROGRAM) pg_basebackup$(X) '$(DESTDIR)$(bindir)/pg_basebackup$(X)' $(INSTALL_PROGRAM) pg_basebackup$(X) '$(DESTDIR)$(bindir)/pg_basebackup$(X)'
$(INSTALL_PROGRAM) pg_receivexlog$(X) '$(DESTDIR)$(bindir)/pg_receivexlog$(X)'
installdirs: installdirs:
$(MKDIR_P) '$(DESTDIR)$(bindir)' $(MKDIR_P) '$(DESTDIR)$(bindir)'
uninstall: uninstall:
rm -f '$(DESTDIR)$(bindir)/pg_basebackup$(X)' rm -f '$(DESTDIR)$(bindir)/pg_basebackup$(X)'
rm -f '$(DESTDIR)$(bindir)/pg_receivexlog$(X)'
clean distclean maintainer-clean: clean distclean maintainer-clean:
rm -f pg_basebackup$(X) $(OBJS) rm -f pg_basebackup$(X) pg_receivexlog$(X) $(OBJS) pg_basebackup.o pg_receivexlog.o
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
#include "access/xlogdefs.h"
/*
* Called whenever a segment is finished, return true to stop
* the streaming at this point.
*/
typedef bool (*segment_finish_callback)(XLogRecPtr segendpos, uint32 timeline);
/*
* Called before trying to read more data. Return true to stop
* the streaming at this point.
*/
typedef bool (*stream_continue_callback)(void);
extern bool ReceiveXlogStream(PGconn *conn,
XLogRecPtr startpos,
uint32 timeline,
char *sysidentifier,
char *basedir,
segment_finish_callback segment_finish,
stream_continue_callback stream_continue,
int standby_message_timeout);
/*-------------------------------------------------------------------------
*
* streamutil.c - utility functions for pg_basebackup and pg_receivelog
*
* Author: Magnus Hagander <magnus@hagander.net>
*
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/bin/pg_basebackup/streamutil.c
*-------------------------------------------------------------------------
*/
/*
* We have to use postgres.h not postgres_fe.h here, because there's so much
* backend-only stuff in the XLOG include files we need. But we need a
* frontend-ish environment otherwise. Hence this ugly hack.
*/
#define FRONTEND 1
#include "postgres.h"
#include "streamutil.h"
#include <stdio.h>
#include <string.h>
const char *progname;
char *dbhost = NULL;
char *dbuser = NULL;
char *dbport = NULL;
int dbgetpassword = 0; /* 0=auto, -1=never, 1=always */
static char *dbpassword = NULL;
PGconn *conn = NULL;
/*
* strdup() and malloc() replacements that prints an error and exits
* if something goes wrong. Can never return NULL.
*/
char *
xstrdup(const char *s)
{
char *result;
result = strdup(s);
if (!result)
{
fprintf(stderr, _("%s: out of memory\n"), progname);
exit(1);
}
return result;
}
void *
xmalloc0(int size)
{
void *result;
result = malloc(size);
if (!result)
{
fprintf(stderr, _("%s: out of memory\n"), progname);
exit(1);
}
MemSet(result, 0, size);
return result;
}
PGconn *
GetConnection(void)
{
PGconn *tmpconn;
int argcount = 4; /* dbname, replication, fallback_app_name,
* password */
int i;
const char **keywords;
const char **values;
char *password = NULL;
if (dbhost)
argcount++;
if (dbuser)
argcount++;
if (dbport)
argcount++;
keywords = xmalloc0((argcount + 1) * sizeof(*keywords));
values = xmalloc0((argcount + 1) * sizeof(*values));
keywords[0] = "dbname";
values[0] = "replication";
keywords[1] = "replication";
values[1] = "true";
keywords[2] = "fallback_application_name";
values[2] = progname;
i = 3;
if (dbhost)
{
keywords[i] = "host";
values[i] = dbhost;
i++;
}
if (dbuser)
{
keywords[i] = "user";
values[i] = dbuser;
i++;
}
if (dbport)
{
keywords[i] = "port";
values[i] = dbport;
i++;
}
while (true)
{
if (password)
free(password);
if (dbpassword)
{
/*
* We've saved a password when a previous connection succeeded,
* meaning this is the call for a second session to the same
* database, so just forcibly reuse that password.
*/
keywords[argcount - 1] = "password";
values[argcount - 1] = dbpassword;
dbgetpassword = -1; /* Don't try again if this fails */
}
else if (dbgetpassword == 1)
{
password = simple_prompt(_("Password: "), 100, false);
keywords[argcount - 1] = "password";
values[argcount - 1] = password;
}
tmpconn = PQconnectdbParams(keywords, values, true);
if (PQstatus(tmpconn) == CONNECTION_BAD &&
PQconnectionNeedsPassword(tmpconn) &&
dbgetpassword != -1)
{
dbgetpassword = 1; /* ask for password next time */
PQfinish(tmpconn);
continue;
}
if (PQstatus(tmpconn) != CONNECTION_OK)
{
fprintf(stderr, _("%s: could not connect to server: %s\n"),
progname, PQerrorMessage(tmpconn));
exit(1);
}
/* Connection ok! */
free(values);
free(keywords);
/* Store the password for next run */
if (password)
dbpassword = password;
return tmpconn;
}
}
#include "libpq-fe.h"
extern const char *progname;
extern char *dbhost;
extern char *dbuser;
extern char *dbport;
extern int dbgetpassword;
/* Connection kept global so we can disconnect easily */
extern PGconn *conn;
#define disconnect_and_exit(code) \
{ \
if (conn != NULL) PQfinish(conn); \
exit(code); \
}
char *xstrdup(const char *s);
void *xmalloc0(int size);
PGconn *GetConnection(void);
...@@ -305,6 +305,13 @@ sub mkvcbuild ...@@ -305,6 +305,13 @@ sub mkvcbuild
$initdb->AddLibrary('ws2_32.lib'); $initdb->AddLibrary('ws2_32.lib');
my $pgbasebackup = AddSimpleFrontend('pg_basebackup', 1); my $pgbasebackup = AddSimpleFrontend('pg_basebackup', 1);
$pgbasebackup->AddFile('src\bin\pg_basebackup\pg_basebackup.c');
$pgbasebackup->AddLibrary('ws2_32.lib');
my $pgreceivexlog = AddSimpleFrontend('pg_basebackup', 1);
$pgreceivexlog->{name} = 'pg_receivexlog';
$pgreceivexlog->AddFile('src\bin\pg_basebackup\pg_receivexlog.c');
$pgreceivexlog->AddLibrary('ws2_32.lib');
my $pgconfig = AddSimpleFrontend('pg_config'); my $pgconfig = AddSimpleFrontend('pg_config');
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment