Commit b7b8cc0c authored by Tom Lane's avatar Tom Lane

Redesign parallel dump/restore's wait-for-workers logic.

The ListenToWorkers/ReapWorkerStatus APIs were messy and hard to use.
Instead, make DispatchJobForTocEntry register a callback function that
will take care of state cleanup, doing whatever had been done by the caller
of ReapWorkerStatus in the old design.  (This callback is essentially just
the old mark_work_done function in the restore case, and a trivial test for
worker failure in the dump case.)  Then we can have ListenToWorkers call
the callback immediately on receipt of a status message, and return the
worker to WRKR_IDLE state; so the WRKR_FINISHED state goes away.

This allows us to design a unified wait-for-worker-messages loop:
WaitForWorkers replaces EnsureIdleWorker and EnsureWorkersFinished as well
as the mess in restore_toc_entries_parallel.  Also, we no longer need the
fragile API spec that the caller of DispatchJobForTocEntry is responsible
for ensuring there's an idle worker, since DispatchJobForTocEntry can just
wait until there is one.

In passing, I got rid of the ParallelArgs struct, which was a net negative
in terms of notational verboseness, and didn't seem to be providing any
noticeable amount of abstraction either.

Tom Lane, reviewed by Kevin Grittner

Discussion: <1188.1464544443@sss.pgh.pa.us>
parent f31a931f
...@@ -35,9 +35,11 @@ ...@@ -35,9 +35,11 @@
* the required action (dump or restore) and returns a malloc'd status string. * the required action (dump or restore) and returns a malloc'd status string.
* The status string is passed back to the master where it is interpreted by * The status string is passed back to the master where it is interpreted by
* AH->MasterEndParallelItemPtr, another format-specific routine. That * AH->MasterEndParallelItemPtr, another format-specific routine. That
* function can update state or catalog information on the master's side, * function can update format-specific information on the master's side,
* depending on the reply from the worker process. In the end it returns a * depending on the reply from the worker process. In the end it returns a
* status code, which is 0 for successful execution. * status code, which we pass to the ParallelCompletionPtr callback function
* that was passed to DispatchJobForTocEntry(). The callback function does
* state updating for the master control logic in pg_backup_archiver.c.
* *
* Remember that we have forked off the workers only after we have read in * Remember that we have forked off the workers only after we have read in
* the catalog. That's why our worker processes can also access the catalog * the catalog. That's why our worker processes can also access the catalog
...@@ -48,13 +50,8 @@ ...@@ -48,13 +50,8 @@
* In the master process, the workerStatus field for each worker has one of * In the master process, the workerStatus field for each worker has one of
* the following values: * the following values:
* WRKR_IDLE: it's waiting for a command * WRKR_IDLE: it's waiting for a command
* WRKR_WORKING: it's been sent a command * WRKR_WORKING: it's working on a command
* WRKR_FINISHED: it's returned a result
* WRKR_TERMINATED: process ended * WRKR_TERMINATED: process ended
* The FINISHED state indicates that the worker is idle, but we've not yet
* dealt with the status code it returned from the prior command.
* ReapWorkerStatus() extracts the unhandled command status value and sets
* the workerStatus back to WRKR_IDLE.
*/ */
#include "postgres_fe.h" #include "postgres_fe.h"
...@@ -79,6 +76,8 @@ ...@@ -79,6 +76,8 @@
#define PIPE_READ 0 #define PIPE_READ 0
#define PIPE_WRITE 1 #define PIPE_WRITE 1
#define NO_SLOT (-1) /* Failure result for GetIdleWorker() */
#ifdef WIN32 #ifdef WIN32
/* /*
...@@ -175,9 +174,12 @@ static void setup_cancel_handler(void); ...@@ -175,9 +174,12 @@ static void setup_cancel_handler(void);
static void set_cancel_pstate(ParallelState *pstate); static void set_cancel_pstate(ParallelState *pstate);
static void set_cancel_slot_archive(ParallelSlot *slot, ArchiveHandle *AH); static void set_cancel_slot_archive(ParallelSlot *slot, ArchiveHandle *AH);
static void RunWorker(ArchiveHandle *AH, ParallelSlot *slot); static void RunWorker(ArchiveHandle *AH, ParallelSlot *slot);
static int GetIdleWorker(ParallelState *pstate);
static bool HasEveryWorkerTerminated(ParallelState *pstate); static bool HasEveryWorkerTerminated(ParallelState *pstate);
static void lockTableForWorker(ArchiveHandle *AH, TocEntry *te); static void lockTableForWorker(ArchiveHandle *AH, TocEntry *te);
static void WaitForCommands(ArchiveHandle *AH, int pipefd[2]); static void WaitForCommands(ArchiveHandle *AH, int pipefd[2]);
static bool ListenToWorkers(ArchiveHandle *AH, ParallelState *pstate,
bool do_wait);
static char *getMessageFromMaster(int pipefd[2]); static char *getMessageFromMaster(int pipefd[2]);
static void sendMessageToMaster(int pipefd[2], const char *str); static void sendMessageToMaster(int pipefd[2], const char *str);
static int select_loop(int maxFd, fd_set *workerset); static int select_loop(int maxFd, fd_set *workerset);
...@@ -349,8 +351,8 @@ archive_close_connection(int code, void *arg) ...@@ -349,8 +351,8 @@ archive_close_connection(int code, void *arg)
* fail to detect it because there would be no EOF condition on * fail to detect it because there would be no EOF condition on
* the other end of the pipe.) * the other end of the pipe.)
*/ */
if (slot->args->AH) if (slot->AH)
DisconnectDatabase(&(slot->args->AH->public)); DisconnectDatabase(&(slot->AH->public));
#ifdef WIN32 #ifdef WIN32
closesocket(slot->pipeRevRead); closesocket(slot->pipeRevRead);
...@@ -407,7 +409,7 @@ ShutdownWorkersHard(ParallelState *pstate) ...@@ -407,7 +409,7 @@ ShutdownWorkersHard(ParallelState *pstate)
EnterCriticalSection(&signal_info_lock); EnterCriticalSection(&signal_info_lock);
for (i = 0; i < pstate->numWorkers; i++) for (i = 0; i < pstate->numWorkers; i++)
{ {
ArchiveHandle *AH = pstate->parallelSlot[i].args->AH; ArchiveHandle *AH = pstate->parallelSlot[i].AH;
char errbuf[1]; char errbuf[1];
if (AH != NULL && AH->connCancel != NULL) if (AH != NULL && AH->connCancel != NULL)
...@@ -634,7 +636,7 @@ consoleHandler(DWORD dwCtrlType) ...@@ -634,7 +636,7 @@ consoleHandler(DWORD dwCtrlType)
for (i = 0; i < signal_info.pstate->numWorkers; i++) for (i = 0; i < signal_info.pstate->numWorkers; i++)
{ {
ParallelSlot *slot = &(signal_info.pstate->parallelSlot[i]); ParallelSlot *slot = &(signal_info.pstate->parallelSlot[i]);
ArchiveHandle *AH = slot->args->AH; ArchiveHandle *AH = slot->AH;
HANDLE hThread = (HANDLE) slot->hThread; HANDLE hThread = (HANDLE) slot->hThread;
/* /*
...@@ -789,7 +791,7 @@ set_cancel_slot_archive(ParallelSlot *slot, ArchiveHandle *AH) ...@@ -789,7 +791,7 @@ set_cancel_slot_archive(ParallelSlot *slot, ArchiveHandle *AH)
EnterCriticalSection(&signal_info_lock); EnterCriticalSection(&signal_info_lock);
#endif #endif
slot->args->AH = AH; slot->AH = AH;
#ifdef WIN32 #ifdef WIN32
LeaveCriticalSection(&signal_info_lock); LeaveCriticalSection(&signal_info_lock);
...@@ -935,9 +937,10 @@ ParallelBackupStart(ArchiveHandle *AH) ...@@ -935,9 +937,10 @@ ParallelBackupStart(ArchiveHandle *AH)
strerror(errno)); strerror(errno));
slot->workerStatus = WRKR_IDLE; slot->workerStatus = WRKR_IDLE;
slot->args = (ParallelArgs *) pg_malloc(sizeof(ParallelArgs)); slot->AH = NULL;
slot->args->AH = NULL; slot->te = NULL;
slot->args->te = NULL; slot->callback = NULL;
slot->callback_data = NULL;
/* master's ends of the pipes */ /* master's ends of the pipes */
slot->pipeRead = pipeWM[PIPE_READ]; slot->pipeRead = pipeWM[PIPE_READ];
...@@ -1071,20 +1074,28 @@ ParallelBackupEnd(ArchiveHandle *AH, ParallelState *pstate) ...@@ -1071,20 +1074,28 @@ ParallelBackupEnd(ArchiveHandle *AH, ParallelState *pstate)
} }
/* /*
* Dispatch a job to some free worker (caller must ensure there is one!) * Dispatch a job to some free worker.
* *
* te is the TocEntry to be processed, act is the action to be taken on it. * te is the TocEntry to be processed, act is the action to be taken on it.
* callback is the function to call on completion of the job.
*
* If no worker is currently available, this will block, and previously
* registered callback functions may be called.
*/ */
void void
DispatchJobForTocEntry(ArchiveHandle *AH, ParallelState *pstate, TocEntry *te, DispatchJobForTocEntry(ArchiveHandle *AH,
T_Action act) ParallelState *pstate,
TocEntry *te,
T_Action act,
ParallelCompletionPtr callback,
void *callback_data)
{ {
int worker; int worker;
char *arg; char *arg;
/* our caller makes sure that at least one worker is idle */ /* Get a worker, waiting if none are idle */
worker = GetIdleWorker(pstate); while ((worker = GetIdleWorker(pstate)) == NO_SLOT)
Assert(worker != NO_SLOT); WaitForWorkers(AH, pstate, WFW_ONE_IDLE);
/* Construct and send command string */ /* Construct and send command string */
arg = (AH->MasterStartParallelItemPtr) (AH, te, act); arg = (AH->MasterStartParallelItemPtr) (AH, te, act);
...@@ -1095,14 +1106,16 @@ DispatchJobForTocEntry(ArchiveHandle *AH, ParallelState *pstate, TocEntry *te, ...@@ -1095,14 +1106,16 @@ DispatchJobForTocEntry(ArchiveHandle *AH, ParallelState *pstate, TocEntry *te,
/* Remember worker is busy, and which TocEntry it's working on */ /* Remember worker is busy, and which TocEntry it's working on */
pstate->parallelSlot[worker].workerStatus = WRKR_WORKING; pstate->parallelSlot[worker].workerStatus = WRKR_WORKING;
pstate->parallelSlot[worker].args->te = te; pstate->parallelSlot[worker].te = te;
pstate->parallelSlot[worker].callback = callback;
pstate->parallelSlot[worker].callback_data = callback_data;
} }
/* /*
* Find an idle worker and return its slot number. * Find an idle worker and return its slot number.
* Return NO_SLOT if none are idle. * Return NO_SLOT if none are idle.
*/ */
int static int
GetIdleWorker(ParallelState *pstate) GetIdleWorker(ParallelState *pstate)
{ {
int i; int i;
...@@ -1274,17 +1287,16 @@ WaitForCommands(ArchiveHandle *AH, int pipefd[2]) ...@@ -1274,17 +1287,16 @@ WaitForCommands(ArchiveHandle *AH, int pipefd[2])
* immediately if there is none available. * immediately if there is none available.
* *
* When we get a status message, we let MasterEndParallelItemPtr process it, * When we get a status message, we let MasterEndParallelItemPtr process it,
* then save the resulting status code and switch the worker's state to * then pass the resulting status code to the callback function that was
* WRKR_FINISHED. Later, caller must call ReapWorkerStatus() to verify * specified to DispatchJobForTocEntry, then reset the worker status to IDLE.
* that the status was "OK" and push the worker back to IDLE state.
* *
* XXX Rube Goldberg would be proud of this API, but no one else should be. * Returns true if we collected a status message, else false.
* *
* XXX is it worth checking for more than one status message per call? * XXX is it worth checking for more than one status message per call?
* It seems somewhat unlikely that multiple workers would finish at exactly * It seems somewhat unlikely that multiple workers would finish at exactly
* the same time. * the same time.
*/ */
void static bool
ListenToWorkers(ArchiveHandle *AH, ParallelState *pstate, bool do_wait) ListenToWorkers(ArchiveHandle *AH, ParallelState *pstate, bool do_wait)
{ {
int worker; int worker;
...@@ -1298,34 +1310,39 @@ ListenToWorkers(ArchiveHandle *AH, ParallelState *pstate, bool do_wait) ...@@ -1298,34 +1310,39 @@ ListenToWorkers(ArchiveHandle *AH, ParallelState *pstate, bool do_wait)
/* If do_wait is true, we must have detected EOF on some socket */ /* If do_wait is true, we must have detected EOF on some socket */
if (do_wait) if (do_wait)
exit_horribly(modulename, "a worker process died unexpectedly\n"); exit_horribly(modulename, "a worker process died unexpectedly\n");
return; return false;
} }
/* Process it and update our idea of the worker's status */ /* Process it and update our idea of the worker's status */
if (messageStartsWith(msg, "OK ")) if (messageStartsWith(msg, "OK "))
{ {
TocEntry *te = pstate->parallelSlot[worker].args->te; ParallelSlot *slot = &pstate->parallelSlot[worker];
TocEntry *te = slot->te;
char *statusString; char *statusString;
int status;
if (messageStartsWith(msg, "OK RESTORE ")) if (messageStartsWith(msg, "OK RESTORE "))
{ {
statusString = msg + strlen("OK RESTORE "); statusString = msg + strlen("OK RESTORE ");
pstate->parallelSlot[worker].status = status =
(AH->MasterEndParallelItemPtr) (AH->MasterEndParallelItemPtr)
(AH, te, statusString, ACT_RESTORE); (AH, te, statusString, ACT_RESTORE);
slot->callback(AH, te, status, slot->callback_data);
} }
else if (messageStartsWith(msg, "OK DUMP ")) else if (messageStartsWith(msg, "OK DUMP "))
{ {
statusString = msg + strlen("OK DUMP "); statusString = msg + strlen("OK DUMP ");
pstate->parallelSlot[worker].status = status =
(AH->MasterEndParallelItemPtr) (AH->MasterEndParallelItemPtr)
(AH, te, statusString, ACT_DUMP); (AH, te, statusString, ACT_DUMP);
slot->callback(AH, te, status, slot->callback_data);
} }
else else
exit_horribly(modulename, exit_horribly(modulename,
"invalid message received from worker: \"%s\"\n", "invalid message received from worker: \"%s\"\n",
msg); msg);
pstate->parallelSlot[worker].workerStatus = WRKR_FINISHED; slot->workerStatus = WRKR_IDLE;
slot->te = NULL;
} }
else else
exit_horribly(modulename, exit_horribly(modulename,
...@@ -1334,110 +1351,79 @@ ListenToWorkers(ArchiveHandle *AH, ParallelState *pstate, bool do_wait) ...@@ -1334,110 +1351,79 @@ ListenToWorkers(ArchiveHandle *AH, ParallelState *pstate, bool do_wait)
/* Free the string returned from getMessageFromWorker */ /* Free the string returned from getMessageFromWorker */
free(msg); free(msg);
}
/*
* Check to see if any worker is in WRKR_FINISHED state. If so,
* return its command status code into *status, reset it to IDLE state,
* and return its slot number. Otherwise return NO_SLOT.
*
* This function is executed in the master process.
*/
int
ReapWorkerStatus(ParallelState *pstate, int *status)
{
int i;
for (i = 0; i < pstate->numWorkers; i++) return true;
{
if (pstate->parallelSlot[i].workerStatus == WRKR_FINISHED)
{
*status = pstate->parallelSlot[i].status;
pstate->parallelSlot[i].status = 0;
pstate->parallelSlot[i].workerStatus = WRKR_IDLE;
return i;
}
}
return NO_SLOT;
} }
/* /*
* Wait, if necessary, until we have at least one idle worker. * Check for status results from workers, waiting if necessary.
* Reap worker status as necessary to move FINISHED workers to IDLE state.
* *
* We assume that no extra processing is required when reaping a finished * Available wait modes are:
* command, except for checking that the status was OK (zero). * WFW_NO_WAIT: reap any available status, but don't block
* Caution: that assumption means that this function can only be used in * WFW_GOT_STATUS: wait for at least one more worker to finish
* parallel dump, not parallel restore, because the latter has a more * WFW_ONE_IDLE: wait for at least one worker to be idle
* complex set of rules about handling status. * WFW_ALL_IDLE: wait for all workers to be idle
*
* Any received results are passed to MasterEndParallelItemPtr and then
* to the callback specified to DispatchJobForTocEntry.
* *
* This function is executed in the master process. * This function is executed in the master process.
*/ */
void void
EnsureIdleWorker(ArchiveHandle *AH, ParallelState *pstate) WaitForWorkers(ArchiveHandle *AH, ParallelState *pstate, WFW_WaitOption mode)
{ {
int ret_worker; bool do_wait = false;
int work_status;
for (;;) /*
* In GOT_STATUS mode, always block waiting for a message, since we can't
* return till we get something. In other modes, we don't block the first
* time through the loop.
*/
if (mode == WFW_GOT_STATUS)
{ {
int nTerm = 0; /* Assert that caller knows what it's doing */
Assert(!IsEveryWorkerIdle(pstate));
while ((ret_worker = ReapWorkerStatus(pstate, &work_status)) != NO_SLOT) do_wait = true;
{ }
if (work_status != 0)
exit_horribly(modulename, "error processing a parallel work item\n");
nTerm++;
}
/*
* We need to make sure that we have an idle worker before dispatching
* the next item. If nTerm > 0 we already have that (quick check).
*/
if (nTerm > 0)
return;
/* explicit check for an idle worker */
if (GetIdleWorker(pstate) != NO_SLOT)
return;
for (;;)
{
/* /*
* If we have no idle worker, read the result of one or more workers * Check for status messages, even if we don't need to block. We do
* and loop the loop to call ReapWorkerStatus() on them * not try very hard to reap all available messages, though, since
* there's unlikely to be more than one.
*/ */
ListenToWorkers(AH, pstate, true); if (ListenToWorkers(AH, pstate, do_wait))
} {
} /*
* If we got a message, we are done by definition for GOT_STATUS
/* * mode, and we can also be certain that there's at least one idle
* Wait for all workers to be idle. * worker. So we're done in all but ALL_IDLE mode.
* Reap worker status as necessary to move FINISHED workers to IDLE state. */
* if (mode != WFW_ALL_IDLE)
* We assume that no extra processing is required when reaping a finished return;
* command, except for checking that the status was OK (zero). }
* Caution: that assumption means that this function can only be used in
* parallel dump, not parallel restore, because the latter has a more
* complex set of rules about handling status.
*
* This function is executed in the master process.
*/
void
EnsureWorkersFinished(ArchiveHandle *AH, ParallelState *pstate)
{
int work_status;
if (!pstate || pstate->numWorkers == 1) /* Check whether we must wait for new status messages */
return; switch (mode)
{
case WFW_NO_WAIT:
return; /* never wait */
case WFW_GOT_STATUS:
Assert(false); /* can't get here, because we waited */
break;
case WFW_ONE_IDLE:
if (GetIdleWorker(pstate) != NO_SLOT)
return;
break;
case WFW_ALL_IDLE:
if (IsEveryWorkerIdle(pstate))
return;
break;
}
/* Waiting for the remaining worker processes to finish */ /* Loop back, and this time wait for something to happen */
while (!IsEveryWorkerIdle(pstate)) do_wait = true;
{
if (ReapWorkerStatus(pstate, &work_status) == NO_SLOT)
ListenToWorkers(AH, pstate, true);
else if (work_status != 0)
exit_horribly(modulename,
"error processing a parallel work item\n");
} }
} }
......
...@@ -2,14 +2,11 @@ ...@@ -2,14 +2,11 @@
* *
* parallel.h * parallel.h
* *
* Parallel support header file for the pg_dump archiver * Parallel support for pg_dump and pg_restore
* *
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* The author is not responsible for loss or damages that may
* result from its use.
*
* IDENTIFICATION * IDENTIFICATION
* src/bin/pg_dump/parallel.h * src/bin/pg_dump/parallel.h
* *
...@@ -21,31 +18,53 @@ ...@@ -21,31 +18,53 @@
#include "pg_backup_archiver.h" #include "pg_backup_archiver.h"
/* Function to call in master process on completion of a worker task */
typedef void (*ParallelCompletionPtr) (ArchiveHandle *AH,
TocEntry *te,
int status,
void *callback_data);
/* Wait options for WaitForWorkers */
typedef enum
{
WFW_NO_WAIT,
WFW_GOT_STATUS,
WFW_ONE_IDLE,
WFW_ALL_IDLE
} WFW_WaitOption;
/* Worker process statuses */
typedef enum typedef enum
{ {
WRKR_TERMINATED = 0,
WRKR_IDLE, WRKR_IDLE,
WRKR_WORKING, WRKR_WORKING,
WRKR_FINISHED WRKR_TERMINATED
} T_WorkerStatus; } T_WorkerStatus;
/* Arguments needed for a worker process */ /*
typedef struct ParallelArgs * Per-parallel-worker state of parallel.c.
{ *
ArchiveHandle *AH; * Much of this is valid only in the master process (or, on Windows, should
TocEntry *te; * be touched only by the master thread). But the AH field should be touched
} ParallelArgs; * only by workers. The pipe descriptors are valid everywhere.
*/
/* State for each parallel activity slot */
typedef struct ParallelSlot typedef struct ParallelSlot
{ {
ParallelArgs *args; T_WorkerStatus workerStatus; /* see enum above */
T_WorkerStatus workerStatus;
int status; /* These fields are valid if workerStatus == WRKR_WORKING: */
TocEntry *te; /* item being worked on */
ParallelCompletionPtr callback; /* function to call on completion */
void *callback_data; /* passthru data for it */
ArchiveHandle *AH; /* Archive data worker is using */
int pipeRead; /* master's end of the pipes */ int pipeRead; /* master's end of the pipes */
int pipeWrite; int pipeWrite;
int pipeRevRead; /* child's end of the pipes */ int pipeRevRead; /* child's end of the pipes */
int pipeRevWrite; int pipeRevWrite;
/* Child process/thread identity info: */
#ifdef WIN32 #ifdef WIN32
uintptr_t hThread; uintptr_t hThread;
unsigned int threadId; unsigned int threadId;
...@@ -54,12 +73,11 @@ typedef struct ParallelSlot ...@@ -54,12 +73,11 @@ typedef struct ParallelSlot
#endif #endif
} ParallelSlot; } ParallelSlot;
#define NO_SLOT (-1) /* Overall state for parallel.c */
typedef struct ParallelState typedef struct ParallelState
{ {
int numWorkers; int numWorkers; /* allowed number of workers */
ParallelSlot *parallelSlot; ParallelSlot *parallelSlot; /* array of numWorkers slots */
} ParallelState; } ParallelState;
#ifdef WIN32 #ifdef WIN32
...@@ -69,17 +87,17 @@ extern DWORD mainThreadId; ...@@ -69,17 +87,17 @@ extern DWORD mainThreadId;
extern void init_parallel_dump_utils(void); extern void init_parallel_dump_utils(void);
extern int GetIdleWorker(ParallelState *pstate);
extern bool IsEveryWorkerIdle(ParallelState *pstate); extern bool IsEveryWorkerIdle(ParallelState *pstate);
extern void ListenToWorkers(ArchiveHandle *AH, ParallelState *pstate, bool do_wait); extern void WaitForWorkers(ArchiveHandle *AH, ParallelState *pstate,
extern int ReapWorkerStatus(ParallelState *pstate, int *status); WFW_WaitOption mode);
extern void EnsureIdleWorker(ArchiveHandle *AH, ParallelState *pstate);
extern void EnsureWorkersFinished(ArchiveHandle *AH, ParallelState *pstate);
extern ParallelState *ParallelBackupStart(ArchiveHandle *AH); extern ParallelState *ParallelBackupStart(ArchiveHandle *AH);
extern void DispatchJobForTocEntry(ArchiveHandle *AH, extern void DispatchJobForTocEntry(ArchiveHandle *AH,
ParallelState *pstate, ParallelState *pstate,
TocEntry *te, T_Action act); TocEntry *te,
T_Action act,
ParallelCompletionPtr callback,
void *callback_data);
extern void ParallelBackupEnd(ArchiveHandle *AH, ParallelState *pstate); extern void ParallelBackupEnd(ArchiveHandle *AH, ParallelState *pstate);
extern void set_archive_cancel_info(ArchiveHandle *AH, PGconn *conn); extern void set_archive_cancel_info(ArchiveHandle *AH, PGconn *conn);
......
...@@ -97,9 +97,14 @@ static void par_list_remove(TocEntry *te); ...@@ -97,9 +97,14 @@ static void par_list_remove(TocEntry *te);
static TocEntry *get_next_work_item(ArchiveHandle *AH, static TocEntry *get_next_work_item(ArchiveHandle *AH,
TocEntry *ready_list, TocEntry *ready_list,
ParallelState *pstate); ParallelState *pstate);
static void mark_work_done(ArchiveHandle *AH, TocEntry *ready_list, static void mark_dump_job_done(ArchiveHandle *AH,
int worker, int status, TocEntry *te,
ParallelState *pstate); int status,
void *callback_data);
static void mark_restore_job_done(ArchiveHandle *AH,
TocEntry *te,
int status,
void *callback_data);
static void fix_dependencies(ArchiveHandle *AH); static void fix_dependencies(ArchiveHandle *AH);
static bool has_lock_conflicts(TocEntry *te1, TocEntry *te2); static bool has_lock_conflicts(TocEntry *te1, TocEntry *te2);
static void repoint_table_dependencies(ArchiveHandle *AH); static void repoint_table_dependencies(ArchiveHandle *AH);
...@@ -2355,8 +2360,8 @@ WriteDataChunks(ArchiveHandle *AH, ParallelState *pstate) ...@@ -2355,8 +2360,8 @@ WriteDataChunks(ArchiveHandle *AH, ParallelState *pstate)
* If we are in a parallel backup, then we are always the master * If we are in a parallel backup, then we are always the master
* process. Dispatch each data-transfer job to a worker. * process. Dispatch each data-transfer job to a worker.
*/ */
EnsureIdleWorker(AH, pstate); DispatchJobForTocEntry(AH, pstate, te, ACT_DUMP,
DispatchJobForTocEntry(AH, pstate, te, ACT_DUMP); mark_dump_job_done, NULL);
} }
else else
WriteDataChunksForTocEntry(AH, te); WriteDataChunksForTocEntry(AH, te);
...@@ -2365,9 +2370,32 @@ WriteDataChunks(ArchiveHandle *AH, ParallelState *pstate) ...@@ -2365,9 +2370,32 @@ WriteDataChunks(ArchiveHandle *AH, ParallelState *pstate)
/* /*
* If parallel, wait for workers to finish. * If parallel, wait for workers to finish.
*/ */
EnsureWorkersFinished(AH, pstate); if (pstate && pstate->numWorkers > 1)
WaitForWorkers(AH, pstate, WFW_ALL_IDLE);
} }
/*
* Callback function that's invoked in the master process after a step has
* been parallel dumped.
*
* We don't need to do anything except check for worker failure.
*/
static void
mark_dump_job_done(ArchiveHandle *AH,
TocEntry *te,
int status,
void *callback_data)
{
ahlog(AH, 1, "finished item %d %s %s\n",
te->dumpId, te->desc, te->tag);
if (status != 0)
exit_horribly(modulename, "worker process failed: exit code %d\n",
status);
}
void void
WriteDataChunksForTocEntry(ArchiveHandle *AH, TocEntry *te) WriteDataChunksForTocEntry(ArchiveHandle *AH, TocEntry *te)
{ {
...@@ -2751,9 +2779,9 @@ _tocEntryRequired(TocEntry *te, teSection curSection, RestoreOptions *ropt) ...@@ -2751,9 +2779,9 @@ _tocEntryRequired(TocEntry *te, teSection curSection, RestoreOptions *ropt)
return 0; return 0;
} }
if (ropt->schemaExcludeNames.head != NULL if (ropt->schemaExcludeNames.head != NULL &&
&& te->namespace te->namespace &&
&& simple_string_list_member(&ropt->schemaExcludeNames, te->namespace)) simple_string_list_member(&ropt->schemaExcludeNames, te->namespace))
return 0; return 0;
if (ropt->selTypes) if (ropt->selTypes)
...@@ -3769,11 +3797,9 @@ static void ...@@ -3769,11 +3797,9 @@ static void
restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate, restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate,
TocEntry *pending_list) TocEntry *pending_list)
{ {
int work_status;
bool skipped_some; bool skipped_some;
TocEntry ready_list; TocEntry ready_list;
TocEntry *next_work_item; TocEntry *next_work_item;
int ret_child;
ahlog(AH, 2, "entering restore_toc_entries_parallel\n"); ahlog(AH, 2, "entering restore_toc_entries_parallel\n");
...@@ -3850,54 +3876,29 @@ restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate, ...@@ -3850,54 +3876,29 @@ restore_toc_entries_parallel(ArchiveHandle *AH, ParallelState *pstate,
par_list_remove(next_work_item); par_list_remove(next_work_item);
DispatchJobForTocEntry(AH, pstate, next_work_item, ACT_RESTORE); DispatchJobForTocEntry(AH, pstate, next_work_item, ACT_RESTORE,
mark_restore_job_done, &ready_list);
} }
else else
{ {
/* at least one child is working and we have nothing ready. */ /* at least one child is working and we have nothing ready. */
} }
for (;;) /*
{ * Before dispatching another job, check to see if anything has
int nTerm = 0; * finished. We should check every time through the loop so as to
* reduce dependencies as soon as possible. If we were unable to
/* * dispatch any job this time through, wait until some worker finishes
* In order to reduce dependencies as soon as possible and * (and, hopefully, unblocks some pending item). If we did dispatch
* especially to reap the status of workers who are working on * something, continue as soon as there's at least one idle worker.
* items that pending items depend on, we do a non-blocking check * Note that in either case, there's guaranteed to be at least one
* for ended workers first. * idle worker when we return to the top of the loop. This ensures we
* * won't block inside DispatchJobForTocEntry, which would be
* However, if we do not have any other work items currently that * undesirable: we'd rather postpone dispatching until we see what's
* workers can work on, we do not busy-loop here but instead * been unblocked by finished jobs.
* really wait for at least one worker to terminate. Hence we call */
* ListenToWorkers(..., ..., do_wait = true) in this case. WaitForWorkers(AH, pstate,
*/ next_work_item ? WFW_ONE_IDLE : WFW_GOT_STATUS);
ListenToWorkers(AH, pstate, !next_work_item);
while ((ret_child = ReapWorkerStatus(pstate, &work_status)) != NO_SLOT)
{
nTerm++;
mark_work_done(AH, &ready_list, ret_child, work_status, pstate);
}
/*
* We need to make sure that we have an idle worker before
* re-running the loop. If nTerm > 0 we already have that (quick
* check).
*/
if (nTerm > 0)
break;
/* if nobody terminated, explicitly check for an idle worker */
if (GetIdleWorker(pstate) != NO_SLOT)
break;
/*
* If we have no idle worker, read the result of one or more
* workers and loop the loop to call ReapWorkerStatus() on them.
*/
ListenToWorkers(AH, pstate, true);
}
} }
ahlog(AH, 1, "finished main parallel loop\n"); ahlog(AH, 1, "finished main parallel loop\n");
...@@ -4025,9 +4026,11 @@ get_next_work_item(ArchiveHandle *AH, TocEntry *ready_list, ...@@ -4025,9 +4026,11 @@ get_next_work_item(ArchiveHandle *AH, TocEntry *ready_list,
int count = 0; int count = 0;
for (k = 0; k < pstate->numWorkers; k++) for (k = 0; k < pstate->numWorkers; k++)
if (pstate->parallelSlot[k].args->te != NULL && {
pstate->parallelSlot[k].args->te->section == SECTION_DATA) if (pstate->parallelSlot[k].workerStatus == WRKR_WORKING &&
pstate->parallelSlot[k].te->section == SECTION_DATA)
count++; count++;
}
if (pstate->numWorkers == 0 || count * 4 < pstate->numWorkers) if (pstate->numWorkers == 0 || count * 4 < pstate->numWorkers)
pref_non_data = false; pref_non_data = false;
} }
...@@ -4044,13 +4047,13 @@ get_next_work_item(ArchiveHandle *AH, TocEntry *ready_list, ...@@ -4044,13 +4047,13 @@ get_next_work_item(ArchiveHandle *AH, TocEntry *ready_list,
* that a currently running item also needs lock on, or vice versa. If * that a currently running item also needs lock on, or vice versa. If
* so, we don't want to schedule them together. * so, we don't want to schedule them together.
*/ */
for (i = 0; i < pstate->numWorkers && !conflicts; i++) for (i = 0; i < pstate->numWorkers; i++)
{ {
TocEntry *running_te; TocEntry *running_te;
if (pstate->parallelSlot[i].workerStatus != WRKR_WORKING) if (pstate->parallelSlot[i].workerStatus != WRKR_WORKING)
continue; continue;
running_te = pstate->parallelSlot[i].args->te; running_te = pstate->parallelSlot[i].te;
if (has_lock_conflicts(te, running_te) || if (has_lock_conflicts(te, running_te) ||
has_lock_conflicts(running_te, te)) has_lock_conflicts(running_te, te))
...@@ -4091,10 +4094,8 @@ get_next_work_item(ArchiveHandle *AH, TocEntry *ready_list, ...@@ -4091,10 +4094,8 @@ get_next_work_item(ArchiveHandle *AH, TocEntry *ready_list,
* our work is finished, the master process will assign us a new work item. * our work is finished, the master process will assign us a new work item.
*/ */
int int
parallel_restore(ParallelArgs *args) parallel_restore(ArchiveHandle *AH, TocEntry *te)
{ {
ArchiveHandle *AH = args->AH;
TocEntry *te = args->te;
int status; int status;
Assert(AH->connection != NULL); Assert(AH->connection != NULL);
...@@ -4110,22 +4111,18 @@ parallel_restore(ParallelArgs *args) ...@@ -4110,22 +4111,18 @@ parallel_restore(ParallelArgs *args)
/* /*
* Housekeeping to be done after a step has been parallel restored. * Callback function that's invoked in the master process after a step has
* been parallel restored.
* *
* Clear the appropriate slot, free all the extra memory we allocated, * Update status and reduce the dependency count of any dependent items.
* update status, and reduce the dependency count of any dependent items.
*/ */
static void static void
mark_work_done(ArchiveHandle *AH, TocEntry *ready_list, mark_restore_job_done(ArchiveHandle *AH,
int worker, int status, TocEntry *te,
ParallelState *pstate) int status,
void *callback_data)
{ {
TocEntry *te = NULL; TocEntry *ready_list = (TocEntry *) callback_data;
te = pstate->parallelSlot[worker].args->te;
if (te == NULL)
exit_horribly(modulename, "could not find slot of finished worker\n");
ahlog(AH, 1, "finished item %d %s %s\n", ahlog(AH, 1, "finished item %d %s %s\n",
te->dumpId, te->desc, te->tag); te->dumpId, te->desc, te->tag);
......
...@@ -111,7 +111,6 @@ typedef z_stream *z_streamp; ...@@ -111,7 +111,6 @@ typedef z_stream *z_streamp;
typedef struct _archiveHandle ArchiveHandle; typedef struct _archiveHandle ArchiveHandle;
typedef struct _tocEntry TocEntry; typedef struct _tocEntry TocEntry;
struct ParallelArgs;
struct ParallelState; struct ParallelState;
#define READ_ERROR_EXIT(fd) \ #define READ_ERROR_EXIT(fd) \
...@@ -375,7 +374,7 @@ struct _tocEntry ...@@ -375,7 +374,7 @@ struct _tocEntry
int nLockDeps; /* number of such dependencies */ int nLockDeps; /* number of such dependencies */
}; };
extern int parallel_restore(struct ParallelArgs *args); extern int parallel_restore(ArchiveHandle *AH, TocEntry *te);
extern void on_exit_close_archive(Archive *AHX); extern void on_exit_close_archive(Archive *AHX);
extern void warn_or_exit_horribly(ArchiveHandle *AH, const char *modulename, const char *fmt,...) pg_attribute_printf(3, 4); extern void warn_or_exit_horribly(ArchiveHandle *AH, const char *modulename, const char *fmt,...) pg_attribute_printf(3, 4);
......
...@@ -820,13 +820,9 @@ _WorkerJobRestoreCustom(ArchiveHandle *AH, TocEntry *te) ...@@ -820,13 +820,9 @@ _WorkerJobRestoreCustom(ArchiveHandle *AH, TocEntry *te)
*/ */
const int buflen = 64; const int buflen = 64;
char *buf = (char *) pg_malloc(buflen); char *buf = (char *) pg_malloc(buflen);
ParallelArgs pargs;
int status; int status;
pargs.AH = AH; status = parallel_restore(AH, te);
pargs.te = te;
status = parallel_restore(&pargs);
snprintf(buf, buflen, "OK RESTORE %d %d %d", te->dumpId, status, snprintf(buf, buflen, "OK RESTORE %d %d %d", te->dumpId, status,
status == WORKER_IGNORED_ERRORS ? AH->public.n_errors : 0); status == WORKER_IGNORED_ERRORS ? AH->public.n_errors : 0);
......
...@@ -826,13 +826,9 @@ _WorkerJobRestoreDirectory(ArchiveHandle *AH, TocEntry *te) ...@@ -826,13 +826,9 @@ _WorkerJobRestoreDirectory(ArchiveHandle *AH, TocEntry *te)
*/ */
const int buflen = 64; const int buflen = 64;
char *buf = (char *) pg_malloc(buflen); char *buf = (char *) pg_malloc(buflen);
ParallelArgs pargs;
int status; int status;
pargs.AH = AH; status = parallel_restore(AH, te);
pargs.te = te;
status = parallel_restore(&pargs);
snprintf(buf, buflen, "OK RESTORE %d %d %d", te->dumpId, status, snprintf(buf, buflen, "OK RESTORE %d %d %d", te->dumpId, status,
status == WORKER_IGNORED_ERRORS ? AH->public.n_errors : 0); status == WORKER_IGNORED_ERRORS ? AH->public.n_errors : 0);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment