Commit b676ac44 authored by Tomas Vondra's avatar Tomas Vondra

Optimize creation of slots for FDW bulk inserts

Commit b663a413 introduced bulk inserts for FDW, but the handling of
tuple slots turned out to be problematic for two reasons. Firstly, the
slots were re-created for each individual batch. Secondly, all slots
referenced the same tuple descriptor - with reasonably small batches
this is not an issue, but with large batches this triggers O(N^2)
behavior in the resource owner code.

These two issues work against each other - to reduce the number of times
a slot has to be created/dropped, larger batches are needed. However,
the larger the batch, the more expensive the resource owner gets. For
practical batch sizes (100 - 1000) this would not be a big problem, as
the benefits (latency savings) greatly exceed the resource owner costs.
But for extremely large batches it might be much worse, possibly even
losing with non-batching mode.

Fixed by initializing tuple slots only once (and reusing them across
batches) and by using a new tuple descriptor copy for each slot.

Discussion: https://postgr.es/m/ebbbcc7d-4286-8c28-0272-61b4753af761%40enterprisedb.com
parent 96540f80
...@@ -703,16 +703,31 @@ ExecInsert(ModifyTableState *mtstate, ...@@ -703,16 +703,31 @@ ExecInsert(ModifyTableState *mtstate,
resultRelInfo->ri_BatchSize); resultRelInfo->ri_BatchSize);
} }
resultRelInfo->ri_Slots[resultRelInfo->ri_NumSlots] = /*
MakeSingleTupleTableSlot(slot->tts_tupleDescriptor, * Initialize the batch slots. We don't know how many slots will be
slot->tts_ops); * needed, so we initialize them as the batch grows, and we keep
ExecCopySlot(resultRelInfo->ri_Slots[resultRelInfo->ri_NumSlots], * them across batches. To mitigate an inefficiency in how resource
slot); * owner handles objects with many references (as with many slots
resultRelInfo->ri_PlanSlots[resultRelInfo->ri_NumSlots] = * all referencing the same tuple descriptor) we copy the tuple
MakeSingleTupleTableSlot(planSlot->tts_tupleDescriptor, * descriptor for each slot.
planSlot->tts_ops); */
ExecCopySlot(resultRelInfo->ri_PlanSlots[resultRelInfo->ri_NumSlots], if (resultRelInfo->ri_NumSlots >= resultRelInfo->ri_NumSlotsInitialized)
planSlot); {
TupleDesc tdesc = CreateTupleDescCopy(slot->tts_tupleDescriptor);
resultRelInfo->ri_Slots[resultRelInfo->ri_NumSlots] =
MakeSingleTupleTableSlot(tdesc, slot->tts_ops);
ExecCopySlot(resultRelInfo->ri_Slots[resultRelInfo->ri_NumSlots],
slot);
resultRelInfo->ri_PlanSlots[resultRelInfo->ri_NumSlots] =
MakeSingleTupleTableSlot(tdesc, planSlot->tts_ops);
ExecCopySlot(resultRelInfo->ri_PlanSlots[resultRelInfo->ri_NumSlots],
planSlot);
/* remember how many batch slots we initialized */
resultRelInfo->ri_NumSlotsInitialized++;
}
resultRelInfo->ri_NumSlots++; resultRelInfo->ri_NumSlots++;
...@@ -1034,12 +1049,6 @@ ExecBatchInsert(ModifyTableState *mtstate, ...@@ -1034,12 +1049,6 @@ ExecBatchInsert(ModifyTableState *mtstate,
if (canSetTag && numInserted > 0) if (canSetTag && numInserted > 0)
estate->es_processed += numInserted; estate->es_processed += numInserted;
for (i = 0; i < numSlots; i++)
{
ExecDropSingleTupleTableSlot(slots[i]);
ExecDropSingleTupleTableSlot(planSlots[i]);
}
} }
/* ---------------------------------------------------------------- /* ----------------------------------------------------------------
...@@ -3162,6 +3171,7 @@ ExecEndModifyTable(ModifyTableState *node) ...@@ -3162,6 +3171,7 @@ ExecEndModifyTable(ModifyTableState *node)
*/ */
for (i = 0; i < node->mt_nrels; i++) for (i = 0; i < node->mt_nrels; i++)
{ {
int j;
ResultRelInfo *resultRelInfo = node->resultRelInfo + i; ResultRelInfo *resultRelInfo = node->resultRelInfo + i;
if (!resultRelInfo->ri_usesFdwDirectModify && if (!resultRelInfo->ri_usesFdwDirectModify &&
...@@ -3169,6 +3179,16 @@ ExecEndModifyTable(ModifyTableState *node) ...@@ -3169,6 +3179,16 @@ ExecEndModifyTable(ModifyTableState *node)
resultRelInfo->ri_FdwRoutine->EndForeignModify != NULL) resultRelInfo->ri_FdwRoutine->EndForeignModify != NULL)
resultRelInfo->ri_FdwRoutine->EndForeignModify(node->ps.state, resultRelInfo->ri_FdwRoutine->EndForeignModify(node->ps.state,
resultRelInfo); resultRelInfo);
/*
* Cleanup the initialized batch slots. This only matters for FDWs with
* batching, but the other cases will have ri_NumSlotsInitialized == 0.
*/
for (j = 0; j < resultRelInfo->ri_NumSlotsInitialized; j++)
{
ExecDropSingleTupleTableSlot(resultRelInfo->ri_Slots[j]);
ExecDropSingleTupleTableSlot(resultRelInfo->ri_PlanSlots[j]);
}
} }
/* /*
......
...@@ -462,6 +462,7 @@ typedef struct ResultRelInfo ...@@ -462,6 +462,7 @@ typedef struct ResultRelInfo
/* batch insert stuff */ /* batch insert stuff */
int ri_NumSlots; /* number of slots in the array */ int ri_NumSlots; /* number of slots in the array */
int ri_NumSlotsInitialized; /* number of initialized slots */
int ri_BatchSize; /* max slots inserted in a single batch */ int ri_BatchSize; /* max slots inserted in a single batch */
TupleTableSlot **ri_Slots; /* input tuples for batch insert */ TupleTableSlot **ri_Slots; /* input tuples for batch insert */
TupleTableSlot **ri_PlanSlots; TupleTableSlot **ri_PlanSlots;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment