Commit 2742c450 authored by Andres Freund's avatar Andres Freund

expression eval: Reduce number of steps for agg transition invocations.

Do so by combining the various steps that are part of aggregate
transition function invocation into one larger step. As some of the
current steps are only necessary for some aggregates, have one variant
of the aggregate transition step for each possible combination.

To avoid further manual copies of code in the different transition
step implementations, move most of the code into helper functions
marked as "always inline".

The benefit of this change is an increase in performance when
aggregating lots of rows. This comes in part due to the reduced number
of indirect jumps due to the reduced number of steps, and in part by
reducing redundant setup code across steps. This mainly benefits
interpreted execution, but the code generated by JIT is also improved
a bit.

As a nice side-effect it also ends up making the code a bit simpler.

A small additional optimization is removing the need to set
aggstate->curaggcontext before calling ExecAggInitGroup, choosing to
instead passign curaggcontext as an argument. It was, in contrast to
other aggregate related functions, only needed to fetch a memory
context to copy the transition value into.

Author: Andres Freund
Discussion:
   https://postgr.es/m/20191023163849.sosqbfs5yenocez3@alap3.anarazel.de
   https://postgr.es/m/5c371df7cee903e8cd4c685f90c6c72086d3a2dc.camel@j-davis.com
parent 7d672b76
......@@ -3229,8 +3229,6 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
FunctionCallInfo fcinfo, AggStatePerTrans pertrans,
int transno, int setno, int setoff, bool ishash)
{
int adjust_init_jumpnull = -1;
int adjust_strict_jumpnull = -1;
ExprContext *aggcontext;
if (ishash)
......@@ -3239,52 +3237,61 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
aggcontext = aggstate->aggcontexts[setno];
/*
* Determine appropriate transition implementation.
*
* For non-ordered aggregates:
*
* If the initial value for the transition state doesn't exist in the
* pg_aggregate table then we will let the first non-NULL value returned
* from the outer procNode become the initial value. (This is useful for
* aggregates like max() and min().) The noTransValue flag signals that we
* still need to do this.
* need to do so. If true, generate a
* EEOP_AGG_INIT_STRICT_PLAIN_TRANS{,_BYVAL} step. This step also needs to
* do the work described next:
*
* If the function is strict, but does have an initial value, choose
* EEOP_AGG_STRICT_PLAIN_TRANS{,_BYVAL}, which skips the transition
* function if the transition value has become NULL (because a previous
* transition function returned NULL). This step also needs to do the work
* described next:
*
* Otherwise we call EEOP_AGG_PLAIN_TRANS{,_BYVAL}, which does not have to
* perform either of the above checks.
*
* Having steps with overlapping responsibilities is not nice, but
* aggregations are very performance sensitive, making this worthwhile.
*
* For ordered aggregates:
*
* Only need to choose between the faster path for a single orderred
* column, and the one between multiple columns. Checking strictness etc
* is done when finalizing the aggregate. See
* process_ordered_aggregate_{single, multi} and
* advance_transition_function.
*/
if (pertrans->numSortCols == 0 &&
fcinfo->flinfo->fn_strict &&
pertrans->initValueIsNull)
if (pertrans->numSortCols == 0)
{
scratch->opcode = EEOP_AGG_INIT_TRANS;
scratch->d.agg_init_trans.pertrans = pertrans;
scratch->d.agg_init_trans.setno = setno;
scratch->d.agg_init_trans.setoff = setoff;
scratch->d.agg_init_trans.transno = transno;
scratch->d.agg_init_trans.aggcontext = aggcontext;
scratch->d.agg_init_trans.jumpnull = -1; /* adjust later */
ExprEvalPushStep(state, scratch);
/* see comment about jumping out below */
adjust_init_jumpnull = state->steps_len - 1;
if (pertrans->transtypeByVal)
{
if (fcinfo->flinfo->fn_strict &&
pertrans->initValueIsNull)
scratch->opcode = EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYVAL;
else if (fcinfo->flinfo->fn_strict)
scratch->opcode = EEOP_AGG_PLAIN_TRANS_STRICT_BYVAL;
else
scratch->opcode = EEOP_AGG_PLAIN_TRANS_BYVAL;
}
if (pertrans->numSortCols == 0 &&
fcinfo->flinfo->fn_strict)
else
{
scratch->opcode = EEOP_AGG_STRICT_TRANS_CHECK;
scratch->d.agg_strict_trans_check.setno = setno;
scratch->d.agg_strict_trans_check.setoff = setoff;
scratch->d.agg_strict_trans_check.transno = transno;
scratch->d.agg_strict_trans_check.jumpnull = -1; /* adjust later */
ExprEvalPushStep(state, scratch);
/*
* Note, we don't push into adjust_bailout here - those jump to the
* end of all transition value computations. Here a single transition
* value is NULL, so just skip processing the individual value.
*/
adjust_strict_jumpnull = state->steps_len - 1;
if (fcinfo->flinfo->fn_strict &&
pertrans->initValueIsNull)
scratch->opcode = EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF;
else if (fcinfo->flinfo->fn_strict)
scratch->opcode = EEOP_AGG_PLAIN_TRANS_STRICT_BYREF;
else
scratch->opcode = EEOP_AGG_PLAIN_TRANS_BYREF;
}
}
/* invoke appropriate transition implementation */
if (pertrans->numSortCols == 0 && pertrans->transtypeByVal)
scratch->opcode = EEOP_AGG_PLAIN_TRANS_BYVAL;
else if (pertrans->numSortCols == 0)
scratch->opcode = EEOP_AGG_PLAIN_TRANS;
else if (pertrans->numInputs == 1)
scratch->opcode = EEOP_AGG_ORDERED_TRANS_DATUM;
else
......@@ -3296,22 +3303,6 @@ ExecBuildAggTransCall(ExprState *state, AggState *aggstate,
scratch->d.agg_trans.transno = transno;
scratch->d.agg_trans.aggcontext = aggcontext;
ExprEvalPushStep(state, scratch);
/* adjust jumps so they jump till after transition invocation */
if (adjust_init_jumpnull != -1)
{
ExprEvalStep *as = &state->steps[adjust_init_jumpnull];
Assert(as->d.agg_init_trans.jumpnull == -1);
as->d.agg_init_trans.jumpnull = state->steps_len;
}
if (adjust_strict_jumpnull != -1)
{
ExprEvalStep *as = &state->steps[adjust_strict_jumpnull];
Assert(as->d.agg_strict_trans_check.jumpnull == -1);
as->d.agg_strict_trans_check.jumpnull = state->steps_len;
}
}
/*
......
This diff is collapsed.
......@@ -304,7 +304,10 @@ static int find_compatible_pertrans(AggState *aggstate, Aggref *newagg,
static void
select_current_set(AggState *aggstate, int setno, bool is_hash)
{
/* when changing this, also adapt ExecInterpExpr() and friends */
/*
* When changing this, also adapt ExecAggPlainTransByVal() and
* ExecAggPlainTransByRef().
*/
if (is_hash)
aggstate->curaggcontext = aggstate->hashcontext;
else
......
......@@ -2046,25 +2046,47 @@ llvm_compile_expr(ExprState *state)
break;
}
case EEOP_AGG_INIT_TRANS:
case EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYVAL:
case EEOP_AGG_PLAIN_TRANS_STRICT_BYVAL:
case EEOP_AGG_PLAIN_TRANS_BYVAL:
case EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF:
case EEOP_AGG_PLAIN_TRANS_STRICT_BYREF:
case EEOP_AGG_PLAIN_TRANS_BYREF:
{
AggState *aggstate;
AggStatePerTrans pertrans;
FunctionCallInfo fcinfo;
LLVMValueRef v_aggstatep;
LLVMValueRef v_pertransp;
LLVMValueRef v_fcinfo;
LLVMValueRef v_fcinfo_isnull;
LLVMValueRef v_transvaluep;
LLVMValueRef v_transnullp;
LLVMValueRef v_setoff;
LLVMValueRef v_transno;
LLVMValueRef v_aggcontext;
LLVMValueRef v_allpergroupsp;
LLVMValueRef v_current_setp;
LLVMValueRef v_current_pertransp;
LLVMValueRef v_curaggcontext;
LLVMValueRef v_pertransp;
LLVMValueRef v_pergroupp;
LLVMValueRef v_setoff,
v_transno;
LLVMValueRef v_retval;
LLVMValueRef v_notransvalue;
LLVMValueRef v_tmpcontext;
LLVMValueRef v_oldcontext;
LLVMBasicBlockRef b_init;
aggstate = castNode(AggState, state->parent);
pertrans = op->d.agg_trans.pertrans;
pertrans = op->d.agg_init_trans.pertrans;
fcinfo = pertrans->transfn_fcinfo;
v_aggstatep =
LLVMBuildBitCast(b, v_parent, l_ptr(StructAggState), "");
......@@ -2073,20 +2095,28 @@ llvm_compile_expr(ExprState *state)
/*
* pergroup = &aggstate->all_pergroups
* [op->d.agg_init_trans_check.setoff]
* [op->d.agg_strict_trans_check.setoff]
* [op->d.agg_init_trans_check.transno];
*/
v_allpergroupsp =
l_load_struct_gep(b, v_aggstatep,
FIELDNO_AGGSTATE_ALL_PERGROUPS,
"aggstate.all_pergroups");
v_setoff = l_int32_const(op->d.agg_init_trans.setoff);
v_transno = l_int32_const(op->d.agg_init_trans.transno);
v_setoff = l_int32_const(op->d.agg_trans.setoff);
v_transno = l_int32_const(op->d.agg_trans.transno);
v_pergroupp =
LLVMBuildGEP(b,
l_load_gep1(b, v_allpergroupsp, v_setoff, ""),
&v_transno, 1, "");
if (opcode == EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYVAL ||
opcode == EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF)
{
LLVMValueRef v_notransvalue;
LLVMBasicBlockRef b_init;
LLVMBasicBlockRef b_no_init;
v_notransvalue =
l_load_struct_gep(b, v_pergroupp,
FIELDNO_AGGSTATEPERGROUPDATA_NOTRANSVALUE,
......@@ -2094,88 +2124,51 @@ llvm_compile_expr(ExprState *state)
b_init = l_bb_before_v(opblocks[opno + 1],
"op.%d.inittrans", opno);
b_no_init = l_bb_before_v(opblocks[opno + 1],
"op.%d.no_inittrans", opno);
LLVMBuildCondBr(b,
LLVMBuildICmp(b, LLVMIntEQ, v_notransvalue,
l_sbool_const(1), ""),
b_init,
opblocks[opno + 1]);
LLVMPositionBuilderAtEnd(b, b_init);
b_no_init);
/* block to init the transition value if necessary */
{
LLVMValueRef params[3];
LLVMValueRef v_curaggcontext;
LLVMValueRef v_current_set;
LLVMValueRef v_aggcontext;
LLVMValueRef params[4];
v_aggcontext = l_ptr_const(op->d.agg_init_trans.aggcontext,
l_ptr(StructExprContext));
v_current_set =
LLVMBuildStructGEP(b,
v_aggstatep,
FIELDNO_AGGSTATE_CURRENT_SET,
"aggstate.current_set");
v_curaggcontext =
LLVMBuildStructGEP(b,
v_aggstatep,
FIELDNO_AGGSTATE_CURAGGCONTEXT,
"aggstate.curaggcontext");
LLVMPositionBuilderAtEnd(b, b_init);
LLVMBuildStore(b, l_int32_const(op->d.agg_init_trans.setno),
v_current_set);
LLVMBuildStore(b, v_aggcontext,
v_curaggcontext);
v_aggcontext = l_ptr_const(op->d.agg_trans.aggcontext,
l_ptr(StructExprContext));
params[0] = v_aggstatep;
params[1] = v_pertransp;
params[2] = v_pergroupp;
params[3] = v_aggcontext;
LLVMBuildCall(b,
llvm_pg_func(mod, "ExecAggInitGroup"),
params, lengthof(params),
"");
LLVMBuildBr(b, opblocks[opno + 1]);
}
LLVMBuildBr(b, opblocks[op->d.agg_init_trans.jumpnull]);
break;
LLVMPositionBuilderAtEnd(b, b_no_init);
}
case EEOP_AGG_STRICT_TRANS_CHECK:
if (opcode == EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYVAL ||
opcode == EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF ||
opcode == EEOP_AGG_PLAIN_TRANS_STRICT_BYVAL ||
opcode == EEOP_AGG_PLAIN_TRANS_STRICT_BYREF)
{
LLVMValueRef v_setoff,
v_transno;
LLVMValueRef v_aggstatep;
LLVMValueRef v_allpergroupsp;
LLVMValueRef v_transnull;
LLVMValueRef v_pergroupp;
int jumpnull = op->d.agg_strict_trans_check.jumpnull;
v_aggstatep =
LLVMBuildBitCast(b, v_parent, l_ptr(StructAggState), "");
/*
* pergroup = &aggstate->all_pergroups
* [op->d.agg_strict_trans_check.setoff]
* [op->d.agg_init_trans_check.transno];
*/
v_allpergroupsp =
l_load_struct_gep(b, v_aggstatep,
FIELDNO_AGGSTATE_ALL_PERGROUPS,
"aggstate.all_pergroups");
v_setoff =
l_int32_const(op->d.agg_strict_trans_check.setoff);
v_transno =
l_int32_const(op->d.agg_strict_trans_check.transno);
v_pergroupp =
LLVMBuildGEP(b,
l_load_gep1(b, v_allpergroupsp, v_setoff, ""),
&v_transno, 1, "");
LLVMBasicBlockRef b_strictpass;
b_strictpass = l_bb_before_v(opblocks[opno + 1],
"op.%d.strictpass", opno);
v_transnull =
l_load_struct_gep(b, v_pergroupp,
FIELDNO_AGGSTATEPERGROUPDATA_TRANSVALUEISNULL,
......@@ -2184,70 +2177,12 @@ llvm_compile_expr(ExprState *state)
LLVMBuildCondBr(b,
LLVMBuildICmp(b, LLVMIntEQ, v_transnull,
l_sbool_const(1), ""),
opblocks[jumpnull],
opblocks[opno + 1]);
opblocks[opno + 1],
b_strictpass);
break;
LLVMPositionBuilderAtEnd(b, b_strictpass);
}
case EEOP_AGG_PLAIN_TRANS_BYVAL:
case EEOP_AGG_PLAIN_TRANS:
{
AggState *aggstate;
AggStatePerTrans pertrans;
FunctionCallInfo fcinfo;
LLVMValueRef v_aggstatep;
LLVMValueRef v_fcinfo;
LLVMValueRef v_fcinfo_isnull;
LLVMValueRef v_transvaluep;
LLVMValueRef v_transnullp;
LLVMValueRef v_setoff;
LLVMValueRef v_transno;
LLVMValueRef v_aggcontext;
LLVMValueRef v_allpergroupsp;
LLVMValueRef v_current_setp;
LLVMValueRef v_current_pertransp;
LLVMValueRef v_curaggcontext;
LLVMValueRef v_pertransp;
LLVMValueRef v_pergroupp;
LLVMValueRef v_retval;
LLVMValueRef v_tmpcontext;
LLVMValueRef v_oldcontext;
aggstate = castNode(AggState, state->parent);
pertrans = op->d.agg_trans.pertrans;
fcinfo = pertrans->transfn_fcinfo;
v_aggstatep =
LLVMBuildBitCast(b, v_parent, l_ptr(StructAggState), "");
v_pertransp = l_ptr_const(pertrans,
l_ptr(StructAggStatePerTransData));
/*
* pergroup = &aggstate->all_pergroups
* [op->d.agg_strict_trans_check.setoff]
* [op->d.agg_init_trans_check.transno];
*/
v_allpergroupsp =
l_load_struct_gep(b, v_aggstatep,
FIELDNO_AGGSTATE_ALL_PERGROUPS,
"aggstate.all_pergroups");
v_setoff = l_int32_const(op->d.agg_trans.setoff);
v_transno = l_int32_const(op->d.agg_trans.transno);
v_pergroupp =
LLVMBuildGEP(b,
l_load_gep1(b, v_allpergroupsp, v_setoff, ""),
&v_transno, 1, "");
v_fcinfo = l_ptr_const(fcinfo,
l_ptr(StructFunctionCallInfoData));
......@@ -2312,7 +2247,9 @@ llvm_compile_expr(ExprState *state)
* child of the aggcontext, assume we can adopt that value
* without copying it.
*/
if (opcode == EEOP_AGG_PLAIN_TRANS)
if (opcode == EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF ||
opcode == EEOP_AGG_PLAIN_TRANS_STRICT_BYREF ||
opcode == EEOP_AGG_PLAIN_TRANS_BYREF)
{
LLVMBasicBlockRef b_call;
LLVMBasicBlockRef b_nocall;
......
......@@ -225,10 +225,12 @@ typedef enum ExprEvalOp
EEOP_AGG_DESERIALIZE,
EEOP_AGG_STRICT_INPUT_CHECK_ARGS,
EEOP_AGG_STRICT_INPUT_CHECK_NULLS,
EEOP_AGG_INIT_TRANS,
EEOP_AGG_STRICT_TRANS_CHECK,
EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYVAL,
EEOP_AGG_PLAIN_TRANS_STRICT_BYVAL,
EEOP_AGG_PLAIN_TRANS_BYVAL,
EEOP_AGG_PLAIN_TRANS,
EEOP_AGG_PLAIN_TRANS_INIT_STRICT_BYREF,
EEOP_AGG_PLAIN_TRANS_STRICT_BYREF,
EEOP_AGG_PLAIN_TRANS_BYREF,
EEOP_AGG_ORDERED_TRANS_DATUM,
EEOP_AGG_ORDERED_TRANS_TUPLE,
......@@ -620,27 +622,8 @@ typedef struct ExprEvalStep
int jumpnull;
} agg_strict_input_check;
/* for EEOP_AGG_INIT_TRANS */
struct
{
AggStatePerTrans pertrans;
ExprContext *aggcontext;
int setno;
int transno;
int setoff;
int jumpnull;
} agg_init_trans;
/* for EEOP_AGG_STRICT_TRANS_CHECK */
struct
{
int setno;
int transno;
int setoff;
int jumpnull;
} agg_strict_trans_check;
/* for EEOP_AGG_{PLAIN,ORDERED}_TRANS* */
/* for EEOP_AGG_PLAIN_TRANS_[INIT_][STRICT_]{BYVAL,BYREF} */
/* for EEOP_AGG_ORDERED_TRANS_{DATUM,TUPLE} */
struct
{
AggStatePerTrans pertrans;
......@@ -750,7 +733,8 @@ extern void ExecEvalWholeRowVar(ExprState *state, ExprEvalStep *op,
extern void ExecEvalSysVar(ExprState *state, ExprEvalStep *op,
ExprContext *econtext, TupleTableSlot *slot);
extern void ExecAggInitGroup(AggState *aggstate, AggStatePerTrans pertrans, AggStatePerGroup pergroup);
extern void ExecAggInitGroup(AggState *aggstate, AggStatePerTrans pertrans, AggStatePerGroup pergroup,
ExprContext *aggcontext);
extern Datum ExecAggTransReparent(AggState *aggstate, AggStatePerTrans pertrans,
Datum newValue, bool newValueIsNull,
Datum oldValue, bool oldValueIsNull);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment