Commit d25ee300 authored by Bruce Momjian's avatar Bruce Momjian

pg_upgrade: prevent check on live cluster from generating error

Previously an inaccurate but harmless error was generated when running
--check on a live server before reporting the servers as compatible.
The fix is to split error reporting and exit control in the exec_prog()
API.

Reported-by: Daniel Westermann

Backpatch-through: 10
parent e35dba47
...@@ -23,7 +23,7 @@ generate_old_dump(void) ...@@ -23,7 +23,7 @@ generate_old_dump(void)
prep_status("Creating dump of global objects"); prep_status("Creating dump of global objects");
/* run new pg_dumpall binary for globals */ /* run new pg_dumpall binary for globals */
exec_prog(UTILITY_LOG_FILE, NULL, true, exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/pg_dumpall\" %s --globals-only --quote-all-identifiers " "\"%s/pg_dumpall\" %s --globals-only --quote-all-identifiers "
"--binary-upgrade %s -f %s", "--binary-upgrade %s -f %s",
new_cluster.bindir, cluster_conn_opts(&old_cluster), new_cluster.bindir, cluster_conn_opts(&old_cluster),
......
...@@ -71,16 +71,14 @@ get_bin_version(ClusterInfo *cluster) ...@@ -71,16 +71,14 @@ get_bin_version(ClusterInfo *cluster)
* and attempts to execute that command. If the command executes * and attempts to execute that command. If the command executes
* successfully, exec_prog() returns true. * successfully, exec_prog() returns true.
* *
* If the command fails, an error message is saved to the specified log_file. * If the command fails, an error message is optionally written to the specified
* If throw_error is true, this raises a PG_FATAL error and pg_upgrade * log_file, and the program optionally exits.
* terminates; otherwise it is just reported as PG_REPORT and exec_prog()
* returns false.
* *
* The code requires it be called first from the primary thread on Windows. * The code requires it be called first from the primary thread on Windows.
*/ */
bool bool
exec_prog(const char *log_file, const char *opt_log_file, exec_prog(const char *log_file, const char *opt_log_file,
bool throw_error, const char *fmt,...) bool report_error, bool exit_on_error, const char *fmt,...)
{ {
int result = 0; int result = 0;
int written; int written;
...@@ -173,7 +171,7 @@ exec_prog(const char *log_file, const char *opt_log_file, ...@@ -173,7 +171,7 @@ exec_prog(const char *log_file, const char *opt_log_file,
#endif #endif
result = system(cmd); result = system(cmd);
if (result != 0) if (result != 0 && report_error)
{ {
/* we might be in on a progress status line, so go to the next line */ /* we might be in on a progress status line, so go to the next line */
report_status(PG_REPORT, "\n*failure*"); report_status(PG_REPORT, "\n*failure*");
...@@ -181,12 +179,12 @@ exec_prog(const char *log_file, const char *opt_log_file, ...@@ -181,12 +179,12 @@ exec_prog(const char *log_file, const char *opt_log_file,
pg_log(PG_VERBOSE, "There were problems executing \"%s\"\n", cmd); pg_log(PG_VERBOSE, "There were problems executing \"%s\"\n", cmd);
if (opt_log_file) if (opt_log_file)
pg_log(throw_error ? PG_FATAL : PG_REPORT, pg_log(exit_on_error ? PG_FATAL : PG_REPORT,
"Consult the last few lines of \"%s\" or \"%s\" for\n" "Consult the last few lines of \"%s\" or \"%s\" for\n"
"the probable cause of the failure.\n", "the probable cause of the failure.\n",
log_file, opt_log_file); log_file, opt_log_file);
else else
pg_log(throw_error ? PG_FATAL : PG_REPORT, pg_log(exit_on_error ? PG_FATAL : PG_REPORT,
"Consult the last few lines of \"%s\" for\n" "Consult the last few lines of \"%s\" for\n"
"the probable cause of the failure.\n", "the probable cause of the failure.\n",
log_file); log_file);
......
...@@ -78,8 +78,8 @@ parallel_exec_prog(const char *log_file, const char *opt_log_file, ...@@ -78,8 +78,8 @@ parallel_exec_prog(const char *log_file, const char *opt_log_file,
va_end(args); va_end(args);
if (user_opts.jobs <= 1) if (user_opts.jobs <= 1)
/* throw_error must be true to allow jobs */ /* exit_on_error must be true to allow jobs */
exec_prog(log_file, opt_log_file, true, "%s", cmd); exec_prog(log_file, opt_log_file, true, true, "%s", cmd);
else else
{ {
/* parallel */ /* parallel */
...@@ -122,7 +122,7 @@ parallel_exec_prog(const char *log_file, const char *opt_log_file, ...@@ -122,7 +122,7 @@ parallel_exec_prog(const char *log_file, const char *opt_log_file,
child = fork(); child = fork();
if (child == 0) if (child == 0)
/* use _exit to skip atexit() functions */ /* use _exit to skip atexit() functions */
_exit(!exec_prog(log_file, opt_log_file, true, "%s", cmd)); _exit(!exec_prog(log_file, opt_log_file, true, true, "%s", cmd));
else if (child < 0) else if (child < 0)
/* fork failed */ /* fork failed */
pg_fatal("could not create worker process: %s\n", strerror(errno)); pg_fatal("could not create worker process: %s\n", strerror(errno));
...@@ -160,7 +160,7 @@ win32_exec_prog(exec_thread_arg *args) ...@@ -160,7 +160,7 @@ win32_exec_prog(exec_thread_arg *args)
{ {
int ret; int ret;
ret = !exec_prog(args->log_file, args->opt_log_file, true, "%s", args->cmd); ret = !exec_prog(args->log_file, args->opt_log_file, true, true, "%s", args->cmd);
/* terminates thread */ /* terminates thread */
return ret; return ret;
...@@ -187,7 +187,6 @@ parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr, ...@@ -187,7 +187,6 @@ parallel_transfer_all_new_dbs(DbInfoArr *old_db_arr, DbInfoArr *new_db_arr,
#endif #endif
if (user_opts.jobs <= 1) if (user_opts.jobs <= 1)
/* throw_error must be true to allow jobs */
transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, new_pgdata, NULL); transfer_all_new_dbs(old_db_arr, new_db_arr, old_pgdata, new_pgdata, NULL);
else else
{ {
......
...@@ -149,14 +149,14 @@ main(int argc, char **argv) ...@@ -149,14 +149,14 @@ main(int argc, char **argv)
* because there is no need to have the schema load use new oids. * because there is no need to have the schema load use new oids.
*/ */
prep_status("Setting next OID for new cluster"); prep_status("Setting next OID for new cluster");
exec_prog(UTILITY_LOG_FILE, NULL, true, exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/pg_resetwal\" -o %u \"%s\"", "\"%s/pg_resetwal\" -o %u \"%s\"",
new_cluster.bindir, old_cluster.controldata.chkpnt_nxtoid, new_cluster.bindir, old_cluster.controldata.chkpnt_nxtoid,
new_cluster.pgdata); new_cluster.pgdata);
check_ok(); check_ok();
prep_status("Sync data directory to disk"); prep_status("Sync data directory to disk");
exec_prog(UTILITY_LOG_FILE, NULL, true, exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/initdb\" --sync-only \"%s\"", new_cluster.bindir, "\"%s/initdb\" --sync-only \"%s\"", new_cluster.bindir,
new_cluster.pgdata); new_cluster.pgdata);
check_ok(); check_ok();
...@@ -249,7 +249,7 @@ prepare_new_cluster(void) ...@@ -249,7 +249,7 @@ prepare_new_cluster(void)
* --analyze so autovacuum doesn't update statistics later * --analyze so autovacuum doesn't update statistics later
*/ */
prep_status("Analyzing all rows in the new cluster"); prep_status("Analyzing all rows in the new cluster");
exec_prog(UTILITY_LOG_FILE, NULL, true, exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/vacuumdb\" %s --all --analyze %s", "\"%s/vacuumdb\" %s --all --analyze %s",
new_cluster.bindir, cluster_conn_opts(&new_cluster), new_cluster.bindir, cluster_conn_opts(&new_cluster),
log_opts.verbose ? "--verbose" : ""); log_opts.verbose ? "--verbose" : "");
...@@ -262,7 +262,7 @@ prepare_new_cluster(void) ...@@ -262,7 +262,7 @@ prepare_new_cluster(void)
* counter later. * counter later.
*/ */
prep_status("Freezing all rows in the new cluster"); prep_status("Freezing all rows in the new cluster");
exec_prog(UTILITY_LOG_FILE, NULL, true, exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/vacuumdb\" %s --all --freeze %s", "\"%s/vacuumdb\" %s --all --freeze %s",
new_cluster.bindir, cluster_conn_opts(&new_cluster), new_cluster.bindir, cluster_conn_opts(&new_cluster),
log_opts.verbose ? "--verbose" : ""); log_opts.verbose ? "--verbose" : "");
...@@ -289,7 +289,7 @@ prepare_new_databases(void) ...@@ -289,7 +289,7 @@ prepare_new_databases(void)
* support functions in template1 but pg_dumpall creates database using * support functions in template1 but pg_dumpall creates database using
* the template0 template. * the template0 template.
*/ */
exec_prog(UTILITY_LOG_FILE, NULL, true, exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/psql\" " EXEC_PSQL_ARGS " %s -f \"%s\"", "\"%s/psql\" " EXEC_PSQL_ARGS " %s -f \"%s\"",
new_cluster.bindir, cluster_conn_opts(&new_cluster), new_cluster.bindir, cluster_conn_opts(&new_cluster),
GLOBALS_DUMP_FILE); GLOBALS_DUMP_FILE);
...@@ -392,7 +392,7 @@ copy_subdir_files(const char *old_subdir, const char *new_subdir) ...@@ -392,7 +392,7 @@ copy_subdir_files(const char *old_subdir, const char *new_subdir)
prep_status("Copying old %s to new server", old_subdir); prep_status("Copying old %s to new server", old_subdir);
exec_prog(UTILITY_LOG_FILE, NULL, true, exec_prog(UTILITY_LOG_FILE, NULL, true, true,
#ifndef WIN32 #ifndef WIN32
"cp -Rf \"%s\" \"%s\"", "cp -Rf \"%s\" \"%s\"",
#else #else
...@@ -418,16 +418,16 @@ copy_xact_xlog_xid(void) ...@@ -418,16 +418,16 @@ copy_xact_xlog_xid(void)
/* set the next transaction id and epoch of the new cluster */ /* set the next transaction id and epoch of the new cluster */
prep_status("Setting next transaction ID and epoch for new cluster"); prep_status("Setting next transaction ID and epoch for new cluster");
exec_prog(UTILITY_LOG_FILE, NULL, true, exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/pg_resetwal\" -f -x %u \"%s\"", "\"%s/pg_resetwal\" -f -x %u \"%s\"",
new_cluster.bindir, old_cluster.controldata.chkpnt_nxtxid, new_cluster.bindir, old_cluster.controldata.chkpnt_nxtxid,
new_cluster.pgdata); new_cluster.pgdata);
exec_prog(UTILITY_LOG_FILE, NULL, true, exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/pg_resetwal\" -f -e %u \"%s\"", "\"%s/pg_resetwal\" -f -e %u \"%s\"",
new_cluster.bindir, old_cluster.controldata.chkpnt_nxtepoch, new_cluster.bindir, old_cluster.controldata.chkpnt_nxtepoch,
new_cluster.pgdata); new_cluster.pgdata);
/* must reset commit timestamp limits also */ /* must reset commit timestamp limits also */
exec_prog(UTILITY_LOG_FILE, NULL, true, exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/pg_resetwal\" -f -c %u,%u \"%s\"", "\"%s/pg_resetwal\" -f -c %u,%u \"%s\"",
new_cluster.bindir, new_cluster.bindir,
old_cluster.controldata.chkpnt_nxtxid, old_cluster.controldata.chkpnt_nxtxid,
...@@ -453,7 +453,7 @@ copy_xact_xlog_xid(void) ...@@ -453,7 +453,7 @@ copy_xact_xlog_xid(void)
* we preserve all files and contents, so we must preserve both "next" * we preserve all files and contents, so we must preserve both "next"
* counters here and the oldest multi present on system. * counters here and the oldest multi present on system.
*/ */
exec_prog(UTILITY_LOG_FILE, NULL, true, exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/pg_resetwal\" -O %u -m %u,%u \"%s\"", "\"%s/pg_resetwal\" -O %u -m %u,%u \"%s\"",
new_cluster.bindir, new_cluster.bindir,
old_cluster.controldata.chkpnt_nxtmxoff, old_cluster.controldata.chkpnt_nxtmxoff,
...@@ -481,7 +481,7 @@ copy_xact_xlog_xid(void) ...@@ -481,7 +481,7 @@ copy_xact_xlog_xid(void)
* might end up wrapped around (i.e. 0) if the old cluster had * might end up wrapped around (i.e. 0) if the old cluster had
* next=MaxMultiXactId, but multixact.c can cope with that just fine. * next=MaxMultiXactId, but multixact.c can cope with that just fine.
*/ */
exec_prog(UTILITY_LOG_FILE, NULL, true, exec_prog(UTILITY_LOG_FILE, NULL, true, true,
"\"%s/pg_resetwal\" -m %u,%u \"%s\"", "\"%s/pg_resetwal\" -m %u,%u \"%s\"",
new_cluster.bindir, new_cluster.bindir,
old_cluster.controldata.chkpnt_nxtmulti + 1, old_cluster.controldata.chkpnt_nxtmulti + 1,
...@@ -492,7 +492,7 @@ copy_xact_xlog_xid(void) ...@@ -492,7 +492,7 @@ copy_xact_xlog_xid(void)
/* now reset the wal archives in the new cluster */ /* now reset the wal archives in the new cluster */
prep_status("Resetting WAL archives"); prep_status("Resetting WAL archives");
exec_prog(UTILITY_LOG_FILE, NULL, true, exec_prog(UTILITY_LOG_FILE, NULL, true, true,
/* use timeline 1 to match controldata and no WAL history file */ /* use timeline 1 to match controldata and no WAL history file */
"\"%s/pg_resetwal\" -l 00000001%s \"%s\"", new_cluster.bindir, "\"%s/pg_resetwal\" -l 00000001%s \"%s\"", new_cluster.bindir,
old_cluster.controldata.nextxlogfile + 8, old_cluster.controldata.nextxlogfile + 8,
......
...@@ -360,7 +360,7 @@ void generate_old_dump(void); ...@@ -360,7 +360,7 @@ void generate_old_dump(void);
#define EXEC_PSQL_ARGS "--echo-queries --set ON_ERROR_STOP=on --no-psqlrc --dbname=template1" #define EXEC_PSQL_ARGS "--echo-queries --set ON_ERROR_STOP=on --no-psqlrc --dbname=template1"
bool exec_prog(const char *log_file, const char *opt_log_file, bool exec_prog(const char *log_file, const char *opt_log_file,
bool throw_error, const char *fmt,...) pg_attribute_printf(4, 5); bool report_error, bool exit_on_error, const char *fmt,...) pg_attribute_printf(5, 6);
void verify_directories(void); void verify_directories(void);
bool pid_lock_file_exists(const char *datadir); bool pid_lock_file_exists(const char *datadir);
...@@ -416,8 +416,8 @@ PGresult *executeQueryOrDie(PGconn *conn, const char *fmt,...) pg_attribute_pr ...@@ -416,8 +416,8 @@ PGresult *executeQueryOrDie(PGconn *conn, const char *fmt,...) pg_attribute_pr
char *cluster_conn_opts(ClusterInfo *cluster); char *cluster_conn_opts(ClusterInfo *cluster);
bool start_postmaster(ClusterInfo *cluster, bool throw_error); bool start_postmaster(ClusterInfo *cluster, bool report_and_exit_on_error);
void stop_postmaster(bool fast); void stop_postmaster(bool in_atexit);
uint32 get_major_server_version(ClusterInfo *cluster); uint32 get_major_server_version(ClusterInfo *cluster);
void check_pghost_envvar(void); void check_pghost_envvar(void);
......
...@@ -191,7 +191,7 @@ stop_postmaster_atexit(void) ...@@ -191,7 +191,7 @@ stop_postmaster_atexit(void)
bool bool
start_postmaster(ClusterInfo *cluster, bool throw_error) start_postmaster(ClusterInfo *cluster, bool report_and_exit_on_error)
{ {
char cmd[MAXPGPATH * 4 + 1000]; char cmd[MAXPGPATH * 4 + 1000];
PGconn *conn; PGconn *conn;
...@@ -257,11 +257,11 @@ start_postmaster(ClusterInfo *cluster, bool throw_error) ...@@ -257,11 +257,11 @@ start_postmaster(ClusterInfo *cluster, bool throw_error)
(strcmp(SERVER_LOG_FILE, (strcmp(SERVER_LOG_FILE,
SERVER_START_LOG_FILE) != 0) ? SERVER_START_LOG_FILE) != 0) ?
SERVER_LOG_FILE : NULL, SERVER_LOG_FILE : NULL,
false, report_and_exit_on_error, false,
"%s", cmd); "%s", cmd);
/* Did it fail and we are just testing if the server could be started? */ /* Did it fail and we are just testing if the server could be started? */
if (!pg_ctl_return && !throw_error) if (!pg_ctl_return && !report_and_exit_on_error)
return false; return false;
/* /*
...@@ -305,9 +305,9 @@ start_postmaster(ClusterInfo *cluster, bool throw_error) ...@@ -305,9 +305,9 @@ start_postmaster(ClusterInfo *cluster, bool throw_error)
PQfinish(conn); PQfinish(conn);
/* /*
* If pg_ctl failed, and the connection didn't fail, and throw_error is * If pg_ctl failed, and the connection didn't fail, and
* enabled, fail now. This could happen if the server was already * report_and_exit_on_error is enabled, fail now. This
* running. * could happen if the server was already running.
*/ */
if (!pg_ctl_return) if (!pg_ctl_return)
{ {
...@@ -322,7 +322,7 @@ start_postmaster(ClusterInfo *cluster, bool throw_error) ...@@ -322,7 +322,7 @@ start_postmaster(ClusterInfo *cluster, bool throw_error)
void void
stop_postmaster(bool fast) stop_postmaster(bool in_atexit)
{ {
ClusterInfo *cluster; ClusterInfo *cluster;
...@@ -333,11 +333,11 @@ stop_postmaster(bool fast) ...@@ -333,11 +333,11 @@ stop_postmaster(bool fast)
else else
return; /* no cluster running */ return; /* no cluster running */
exec_prog(SERVER_STOP_LOG_FILE, NULL, !fast, exec_prog(SERVER_STOP_LOG_FILE, NULL, !in_atexit, !in_atexit,
"\"%s/pg_ctl\" -w -D \"%s\" -o \"%s\" %s stop", "\"%s/pg_ctl\" -w -D \"%s\" -o \"%s\" %s stop",
cluster->bindir, cluster->pgconfig, cluster->bindir, cluster->pgconfig,
cluster->pgopts ? cluster->pgopts : "", cluster->pgopts ? cluster->pgopts : "",
fast ? "-m fast" : "-m smart"); in_atexit ? "-m fast" : "-m smart");
os_info.running_cluster = NULL; os_info.running_cluster = NULL;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment