Commit 6e71dd7c authored by Robert Haas's avatar Robert Haas

Modify tqueue infrastructure to support transient record types.

Commit 4a4e6893, which introduced this
mechanism, failed to account for the fact that the RECORD pseudo-type
uses transient typmods that are only meaningful within a single
backend.  Transferring such tuples without modification between two
cooperating backends does not work.  This commit installs a system
for passing the tuple descriptors over the same shm_mq being used to
send the tuples themselves.  The two sides might not assign the same
transient typmod to any given tuple descriptor, so we must also
substitute the appropriate receiver-side typmod for the one used by
the sender.  That adds some CPU overhead, but still seems better than
being unable to pass records between cooperating parallel processes.

Along the way, move the logic for handling multiple tuple queues from
tqueue.c to nodeGather.c; tqueue.c now provides a TupleQueueReader,
which reads from a single queue, rather than a TupleQueueFunnel, which
potentially reads from multiple queues.  This change was suggested
previously as a way to make sure that nodeGather.c rather than tqueue.c
had policy control over the order in which to read from queues, but
it wasn't clear to me until now how good an idea it was.  typmod
mapping needs to be performed separately for each queue, and it is
much simpler if the tqueue.c code handles that and leaves multiplexing
multiple queues to higher layers of the stack.
parent cbb82e37
...@@ -36,11 +36,13 @@ ...@@ -36,11 +36,13 @@
#include "executor/nodeGather.h" #include "executor/nodeGather.h"
#include "executor/nodeSubplan.h" #include "executor/nodeSubplan.h"
#include "executor/tqueue.h" #include "executor/tqueue.h"
#include "miscadmin.h"
#include "utils/memutils.h" #include "utils/memutils.h"
#include "utils/rel.h" #include "utils/rel.h"
static TupleTableSlot *gather_getnext(GatherState *gatherstate); static TupleTableSlot *gather_getnext(GatherState *gatherstate);
static HeapTuple gather_readnext(GatherState *gatherstate);
static void ExecShutdownGatherWorkers(GatherState *node); static void ExecShutdownGatherWorkers(GatherState *node);
...@@ -125,6 +127,7 @@ ExecInitGather(Gather *node, EState *estate, int eflags) ...@@ -125,6 +127,7 @@ ExecInitGather(Gather *node, EState *estate, int eflags)
TupleTableSlot * TupleTableSlot *
ExecGather(GatherState *node) ExecGather(GatherState *node)
{ {
TupleTableSlot *fslot = node->funnel_slot;
int i; int i;
TupleTableSlot *slot; TupleTableSlot *slot;
TupleTableSlot *resultSlot; TupleTableSlot *resultSlot;
...@@ -148,6 +151,7 @@ ExecGather(GatherState *node) ...@@ -148,6 +151,7 @@ ExecGather(GatherState *node)
*/ */
if (gather->num_workers > 0 && IsInParallelMode()) if (gather->num_workers > 0 && IsInParallelMode())
{ {
ParallelContext *pcxt;
bool got_any_worker = false; bool got_any_worker = false;
/* Initialize the workers required to execute Gather node. */ /* Initialize the workers required to execute Gather node. */
...@@ -160,18 +164,26 @@ ExecGather(GatherState *node) ...@@ -160,18 +164,26 @@ ExecGather(GatherState *node)
* Register backend workers. We might not get as many as we * Register backend workers. We might not get as many as we
* requested, or indeed any at all. * requested, or indeed any at all.
*/ */
LaunchParallelWorkers(node->pei->pcxt); pcxt = node->pei->pcxt;
LaunchParallelWorkers(pcxt);
/* Set up a tuple queue to collect the results. */ /* Set up tuple queue readers to read the results. */
node->funnel = CreateTupleQueueFunnel(); if (pcxt->nworkers > 0)
for (i = 0; i < node->pei->pcxt->nworkers; ++i)
{ {
if (node->pei->pcxt->worker[i].bgwhandle) node->nreaders = 0;
node->reader =
palloc(pcxt->nworkers * sizeof(TupleQueueReader *));
for (i = 0; i < pcxt->nworkers; ++i)
{ {
if (pcxt->worker[i].bgwhandle == NULL)
continue;
shm_mq_set_handle(node->pei->tqueue[i], shm_mq_set_handle(node->pei->tqueue[i],
node->pei->pcxt->worker[i].bgwhandle); pcxt->worker[i].bgwhandle);
RegisterTupleQueueOnFunnel(node->funnel, node->reader[node->nreaders++] =
node->pei->tqueue[i]); CreateTupleQueueReader(node->pei->tqueue[i],
fslot->tts_tupleDescriptor);
got_any_worker = true; got_any_worker = true;
} }
} }
...@@ -182,7 +194,7 @@ ExecGather(GatherState *node) ...@@ -182,7 +194,7 @@ ExecGather(GatherState *node)
} }
/* Run plan locally if no workers or not single-copy. */ /* Run plan locally if no workers or not single-copy. */
node->need_to_scan_locally = (node->funnel == NULL) node->need_to_scan_locally = (node->reader == NULL)
|| !gather->single_copy; || !gather->single_copy;
node->initialized = true; node->initialized = true;
} }
...@@ -254,13 +266,9 @@ ExecEndGather(GatherState *node) ...@@ -254,13 +266,9 @@ ExecEndGather(GatherState *node)
} }
/* /*
* gather_getnext * Read the next tuple. We might fetch a tuple from one of the tuple queues
* * using gather_readnext, or if no tuple queue contains a tuple and the
* Get the next tuple from shared memory queue. This function * single_copy flag is not set, we might generate one locally instead.
* is responsible for fetching tuples from all the queues associated
* with worker backends used in Gather node execution and if there is
* no data available from queues or no worker is available, it does
* fetch the data from local node.
*/ */
static TupleTableSlot * static TupleTableSlot *
gather_getnext(GatherState *gatherstate) gather_getnext(GatherState *gatherstate)
...@@ -270,18 +278,11 @@ gather_getnext(GatherState *gatherstate) ...@@ -270,18 +278,11 @@ gather_getnext(GatherState *gatherstate)
TupleTableSlot *fslot = gatherstate->funnel_slot; TupleTableSlot *fslot = gatherstate->funnel_slot;
HeapTuple tup; HeapTuple tup;
while (gatherstate->funnel != NULL || gatherstate->need_to_scan_locally) while (gatherstate->reader != NULL || gatherstate->need_to_scan_locally)
{ {
if (gatherstate->funnel != NULL) if (gatherstate->reader != NULL)
{ {
bool done = false; tup = gather_readnext(gatherstate);
/* wait only if local scan is done */
tup = TupleQueueFunnelNext(gatherstate->funnel,
gatherstate->need_to_scan_locally,
&done);
if (done)
ExecShutdownGatherWorkers(gatherstate);
if (HeapTupleIsValid(tup)) if (HeapTupleIsValid(tup))
{ {
...@@ -309,6 +310,80 @@ gather_getnext(GatherState *gatherstate) ...@@ -309,6 +310,80 @@ gather_getnext(GatherState *gatherstate)
return ExecClearTuple(fslot); return ExecClearTuple(fslot);
} }
/*
* Attempt to read a tuple from one of our parallel workers.
*/
static HeapTuple
gather_readnext(GatherState *gatherstate)
{
int waitpos = gatherstate->nextreader;
for (;;)
{
TupleQueueReader *reader;
HeapTuple tup;
bool readerdone;
/* Make sure we've read all messages from workers. */
HandleParallelMessages();
/* Attempt to read a tuple, but don't block if none is available. */
reader = gatherstate->reader[gatherstate->nextreader];
tup = TupleQueueReaderNext(reader, true, &readerdone);
/*
* If this reader is done, remove it. If all readers are done,
* clean up remaining worker state.
*/
if (readerdone)
{
DestroyTupleQueueReader(reader);
--gatherstate->nreaders;
if (gatherstate->nreaders == 0)
{
ExecShutdownGather(gatherstate);
return NULL;
}
else
{
memmove(&gatherstate->reader[gatherstate->nextreader],
&gatherstate->reader[gatherstate->nextreader + 1],
sizeof(TupleQueueReader *)
* (gatherstate->nreaders - gatherstate->nextreader));
if (gatherstate->nextreader >= gatherstate->nreaders)
gatherstate->nextreader = 0;
if (gatherstate->nextreader < waitpos)
--waitpos;
}
continue;
}
/* Advance nextreader pointer in round-robin fashion. */
gatherstate->nextreader =
(gatherstate->nextreader + 1) % gatherstate->nreaders;
/* If we got a tuple, return it. */
if (tup)
return tup;
/* Have we visited every TupleQueueReader? */
if (gatherstate->nextreader == waitpos)
{
/*
* If (still) running plan locally, return NULL so caller can
* generate another tuple from the local copy of the plan.
*/
if (gatherstate->need_to_scan_locally)
return NULL;
/* Nothing to do except wait for developments. */
WaitLatch(MyLatch, WL_LATCH_SET, 0);
CHECK_FOR_INTERRUPTS();
ResetLatch(MyLatch);
}
}
}
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
* ExecShutdownGatherWorkers * ExecShutdownGatherWorkers
* *
...@@ -320,11 +395,14 @@ gather_getnext(GatherState *gatherstate) ...@@ -320,11 +395,14 @@ gather_getnext(GatherState *gatherstate)
void void
ExecShutdownGatherWorkers(GatherState *node) ExecShutdownGatherWorkers(GatherState *node)
{ {
/* Shut down tuple queue funnel before shutting down workers. */ /* Shut down tuple queue readers before shutting down workers. */
if (node->funnel != NULL) if (node->reader != NULL)
{ {
DestroyTupleQueueFunnel(node->funnel); int i;
node->funnel = NULL;
for (i = 0; i < node->nreaders; ++i)
DestroyTupleQueueReader(node->reader[i]);
node->reader = NULL;
} }
/* Now shut down the workers. */ /* Now shut down the workers. */
......
This diff is collapsed.
...@@ -21,11 +21,11 @@ ...@@ -21,11 +21,11 @@
extern DestReceiver *CreateTupleQueueDestReceiver(shm_mq_handle *handle); extern DestReceiver *CreateTupleQueueDestReceiver(shm_mq_handle *handle);
/* Use these to receive tuples from a shm_mq. */ /* Use these to receive tuples from a shm_mq. */
typedef struct TupleQueueFunnel TupleQueueFunnel; typedef struct TupleQueueReader TupleQueueReader;
extern TupleQueueFunnel *CreateTupleQueueFunnel(void); extern TupleQueueReader *CreateTupleQueueReader(shm_mq_handle *handle,
extern void DestroyTupleQueueFunnel(TupleQueueFunnel *funnel); TupleDesc tupledesc);
extern void RegisterTupleQueueOnFunnel(TupleQueueFunnel *, shm_mq_handle *); extern void DestroyTupleQueueReader(TupleQueueReader *funnel);
extern HeapTuple TupleQueueFunnelNext(TupleQueueFunnel *, bool nowait, extern HeapTuple TupleQueueReaderNext(TupleQueueReader *,
bool *done); bool nowait, bool *done);
#endif /* TQUEUE_H */ #endif /* TQUEUE_H */
...@@ -1963,7 +1963,9 @@ typedef struct GatherState ...@@ -1963,7 +1963,9 @@ typedef struct GatherState
PlanState ps; /* its first field is NodeTag */ PlanState ps; /* its first field is NodeTag */
bool initialized; bool initialized;
struct ParallelExecutorInfo *pei; struct ParallelExecutorInfo *pei;
struct TupleQueueFunnel *funnel; int nreaders;
int nextreader;
struct TupleQueueReader **reader;
TupleTableSlot *funnel_slot; TupleTableSlot *funnel_slot;
bool need_to_scan_locally; bool need_to_scan_locally;
} GatherState; } GatherState;
......
...@@ -2018,7 +2018,7 @@ TupleHashEntry ...@@ -2018,7 +2018,7 @@ TupleHashEntry
TupleHashEntryData TupleHashEntryData
TupleHashIterator TupleHashIterator
TupleHashTable TupleHashTable
TupleQueueFunnel TupleQueueReader
TupleTableSlot TupleTableSlot
Tuplesortstate Tuplesortstate
Tuplestorestate Tuplestorestate
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment