Commit 3d376fce authored by Tom Lane's avatar Tom Lane

Change the parser to translate "foo [NOT] IN (expression-list)" to

ScalarArrayOpExpr when possible, that is, whenever there is an array type
for the values of the expression list.  This completes the project I've
been working on to improve the speed of index searches with long IN lists,
as per discussion back in mid-October.

I did not force initdb, but until you do one you will see failures in the
"rules" regression test, because some of the standard system views use IN
and their compiled formats have changed.
parent 8a9acd3c
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.263 2005/11/26 22:14:56 tgl Exp $
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.264 2005/11/28 04:35:30 tgl Exp $
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
......@@ -1597,6 +1597,10 @@ _outAExpr(StringInfo str, A_Expr *node)
appendStringInfo(str, " OF ");
WRITE_NODE_FIELD(name);
break;
case AEXPR_IN:
appendStringInfo(str, " IN ");
WRITE_NODE_FIELD(name);
break;
default:
appendStringInfo(str, " ??");
break;
......@@ -1658,6 +1662,7 @@ _outAConst(StringInfo str, A_Const *node)
{
WRITE_NODE_TYPE("A_CONST");
appendStringInfo(str, " :val ");
_outValue(str, &(node->val));
WRITE_NODE_FIELD(typename);
}
......
......@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.515 2005/11/22 15:24:17 adunstan Exp $
* $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.516 2005/11/28 04:35:31 tgl Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
......@@ -6721,7 +6721,7 @@ a_expr: c_expr { $$ = $1; }
}
| a_expr IS NOT OF '(' type_list ')' %prec IS
{
$$ = (Node *) makeSimpleA_Expr(AEXPR_OF, "!=", $1, (Node *) $6);
$$ = (Node *) makeSimpleA_Expr(AEXPR_OF, "<>", $1, (Node *) $6);
}
| a_expr BETWEEN opt_asymmetric b_expr AND b_expr %prec BETWEEN
{
......@@ -6760,29 +6760,20 @@ a_expr: c_expr { $$ = $1; }
/* in_expr returns a SubLink or a list of a_exprs */
if (IsA($3, SubLink))
{
SubLink *n = (SubLink *)$3;
n->subLinkType = ANY_SUBLINK;
if (IsA($1, RowExpr))
n->lefthand = ((RowExpr *) $1)->args;
else
n->lefthand = list_make1($1);
n->operName = list_make1(makeString("="));
$$ = (Node *)n;
/* generate foo = ANY (subquery) */
SubLink *n = (SubLink *) $3;
n->subLinkType = ANY_SUBLINK;
if (IsA($1, RowExpr))
n->lefthand = ((RowExpr *) $1)->args;
else
n->lefthand = list_make1($1);
n->operName = list_make1(makeString("="));
$$ = (Node *)n;
}
else
{
Node *n = NULL;
ListCell *l;
foreach(l, (List *) $3)
{
Node *cmp;
cmp = (Node *) makeSimpleA_Expr(AEXPR_OP, "=", $1, lfirst(l));
if (n == NULL)
n = cmp;
else
n = (Node *) makeA_Expr(AEXPR_OR, NIL, n, cmp);
}
$$ = n;
/* generate scalar IN expression */
$$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "=", $1, $3);
}
}
| a_expr NOT IN_P in_expr
......@@ -6790,8 +6781,9 @@ a_expr: c_expr { $$ = $1; }
/* in_expr returns a SubLink or a list of a_exprs */
if (IsA($4, SubLink))
{
/* Make an IN node */
SubLink *n = (SubLink *)$4;
/* generate NOT (foo = ANY (subquery)) */
/* Make an = ANY node */
SubLink *n = (SubLink *) $4;
n->subLinkType = ANY_SUBLINK;
if (IsA($1, RowExpr))
n->lefthand = ((RowExpr *) $1)->args;
......@@ -6803,18 +6795,8 @@ a_expr: c_expr { $$ = $1; }
}
else
{
Node *n = NULL;
ListCell *l;
foreach(l, (List *) $4)
{
Node *cmp;
cmp = (Node *) makeSimpleA_Expr(AEXPR_OP, "<>", $1, lfirst(l));
if (n == NULL)
n = cmp;
else
n = (Node *) makeA_Expr(AEXPR_AND, NIL, n, cmp);
}
$$ = n;
/* generate scalar NOT IN expression */
$$ = (Node *) makeSimpleA_Expr(AEXPR_IN, "<>", $1, $4);
}
}
| a_expr subquery_Op sub_type select_with_parens %prec Op
......@@ -6904,7 +6886,7 @@ b_expr: c_expr
}
| b_expr IS NOT OF '(' type_list ')' %prec IS
{
$$ = (Node *) makeSimpleA_Expr(AEXPR_OF, "!=", $1, (Node *) $6);
$$ = (Node *) makeSimpleA_Expr(AEXPR_OF, "<>", $1, (Node *) $6);
}
;
......
......@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/parse_expr.c,v 1.187 2005/11/22 18:17:16 momjian Exp $
* $PostgreSQL: pgsql/src/backend/parser/parse_expr.c,v 1.188 2005/11/28 04:35:31 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -47,6 +47,7 @@ static Node *transformAExprOpAll(ParseState *pstate, A_Expr *a);
static Node *transformAExprDistinct(ParseState *pstate, A_Expr *a);
static Node *transformAExprNullIf(ParseState *pstate, A_Expr *a);
static Node *transformAExprOf(ParseState *pstate, A_Expr *a);
static Node *transformAExprIn(ParseState *pstate, A_Expr *a);
static Node *transformFuncCall(ParseState *pstate, FuncCall *fn);
static Node *transformCaseExpr(ParseState *pstate, CaseExpr *c);
static Node *transformSubLink(ParseState *pstate, SubLink *sublink);
......@@ -64,9 +65,9 @@ static Node *transformIndirection(ParseState *pstate, Node *basenode,
static Node *typecast_expression(ParseState *pstate, Node *expr,
TypeName *typename);
static Node *make_row_op(ParseState *pstate, List *opname,
Node *ltree, Node *rtree);
RowExpr *lrow, RowExpr *rrow);
static Node *make_row_distinct_op(ParseState *pstate, List *opname,
Node *ltree, Node *rtree);
RowExpr *lrow, RowExpr *rrow);
static Expr *make_distinct_op(ParseState *pstate, List *opname,
Node *ltree, Node *rtree);
......@@ -180,6 +181,9 @@ transformExpr(ParseState *pstate, Node *expr)
case AEXPR_OF:
result = transformAExprOf(pstate, a);
break;
case AEXPR_IN:
result = transformAExprIn(pstate, a);
break;
default:
elog(ERROR, "unrecognized A_Expr kind: %d", a->kind);
}
......@@ -603,7 +607,15 @@ transformAExprOp(ParseState *pstate, A_Expr *a)
rexpr && IsA(rexpr, RowExpr))
{
/* "row op row" */
result = make_row_op(pstate, a->name, lexpr, rexpr);
lexpr = transformExpr(pstate, lexpr);
rexpr = transformExpr(pstate, rexpr);
Assert(IsA(lexpr, RowExpr));
Assert(IsA(rexpr, RowExpr));
result = make_row_op(pstate,
a->name,
(RowExpr *) lexpr,
(RowExpr *) rexpr);
}
else
{
......@@ -686,22 +698,20 @@ transformAExprOpAll(ParseState *pstate, A_Expr *a)
static Node *
transformAExprDistinct(ParseState *pstate, A_Expr *a)
{
Node *lexpr = a->lexpr;
Node *rexpr = a->rexpr;
Node *lexpr = transformExpr(pstate, a->lexpr);
Node *rexpr = transformExpr(pstate, a->rexpr);
if (lexpr && IsA(lexpr, RowExpr) &&
rexpr && IsA(rexpr, RowExpr))
{
/* "row op row" */
return make_row_distinct_op(pstate, a->name,
lexpr, rexpr);
(RowExpr *) lexpr,
(RowExpr *) rexpr);
}
else
{
/* Ordinary scalar operator */
lexpr = transformExpr(pstate, lexpr);
rexpr = transformExpr(pstate, rexpr);
return (Node *) make_distinct_op(pstate,
a->name,
lexpr,
......@@ -737,15 +747,14 @@ static Node *
transformAExprOf(ParseState *pstate, A_Expr *a)
{
/*
* Checking an expression for match to type. Will result in a boolean
* constant node.
* Checking an expression for match to a list of type names.
* Will result in a boolean constant node.
*/
Node *lexpr = transformExpr(pstate, a->lexpr);
ListCell *telem;
A_Const *n;
Oid ltype,
rtype;
bool matched = false;
Node *lexpr = transformExpr(pstate, a->lexpr);
ltype = exprType(lexpr);
foreach(telem, (List *) a->rexpr)
......@@ -757,18 +766,145 @@ transformAExprOf(ParseState *pstate, A_Expr *a)
}
/*
* Expect two forms: equals or not equals. Flip the sense of the result
* We have two forms: equals or not equals. Flip the sense of the result
* for not equals.
*/
if (strcmp(strVal(linitial(a->name)), "!=") == 0)
if (strcmp(strVal(linitial(a->name)), "<>") == 0)
matched = (!matched);
n = makeNode(A_Const);
n->val.type = T_String;
n->val.val.str = (matched ? "t" : "f");
n->typename = SystemTypeName("bool");
return makeBoolConst(matched, false);
}
static Node *
transformAExprIn(ParseState *pstate, A_Expr *a)
{
Node *lexpr;
List *rexprs;
List *typeids;
bool useOr;
bool haveRowExpr;
Node *result;
ListCell *l;
/*
* If the operator is <>, combine with AND not OR.
*/
if (strcmp(strVal(linitial(a->name)), "<>") == 0)
useOr = false;
else
useOr = true;
/*
* We try to generate a ScalarArrayOpExpr from IN/NOT IN, but this is
* only possible if the inputs are all scalars (no RowExprs) and there
* is a suitable array type available. If not, we fall back to a
* boolean condition tree with multiple copies of the lefthand expression.
*
* First step: transform all the inputs, and detect whether any are
* RowExprs.
*/
lexpr = transformExpr(pstate, a->lexpr);
haveRowExpr = (lexpr && IsA(lexpr, RowExpr));
typeids = list_make1_oid(exprType(lexpr));
rexprs = NIL;
foreach(l, (List *) a->rexpr)
{
Node *rexpr = transformExpr(pstate, lfirst(l));
haveRowExpr |= (rexpr && IsA(rexpr, RowExpr));
rexprs = lappend(rexprs, rexpr);
typeids = lappend_oid(typeids, exprType(rexpr));
}
return transformExpr(pstate, (Node *) n);
/*
* If not forced by presence of RowExpr, try to resolve a common
* scalar type for all the expressions, and see if it has an array type.
* (But if there's only one righthand expression, we may as well just
* fall through and generate a simple = comparison.)
*/
if (!haveRowExpr && list_length(rexprs) != 1)
{
Oid scalar_type;
Oid array_type;
/*
* Select a common type for the array elements. Note that since
* the LHS' type is first in the list, it will be preferred when
* there is doubt (eg, when all the RHS items are unknown literals).
*/
scalar_type = select_common_type(typeids, "IN");
/* Do we have an array type to use? */
array_type = get_array_type(scalar_type);
if (array_type != InvalidOid)
{
/*
* OK: coerce all the right-hand inputs to the common type
* and build an ArrayExpr for them.
*/
List *aexprs;
ArrayExpr *newa;
aexprs = NIL;
foreach(l, rexprs)
{
Node *rexpr = (Node *) lfirst(l);
rexpr = coerce_to_common_type(pstate, rexpr,
scalar_type,
"IN");
aexprs = lappend(aexprs, rexpr);
}
newa = makeNode(ArrayExpr);
newa->array_typeid = array_type;
newa->element_typeid = scalar_type;
newa->elements = aexprs;
newa->multidims = false;
return (Node *) make_scalar_array_op(pstate,
a->name,
useOr,
lexpr,
(Node *) newa);
}
}
/*
* Must do it the hard way, ie, with a boolean expression tree.
*/
result = NULL;
foreach(l, rexprs)
{
Node *rexpr = (Node *) lfirst(l);
Node *cmp;
if (haveRowExpr)
{
if (!IsA(lexpr, RowExpr) ||
!IsA(rexpr, RowExpr))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("arguments of row IN must all be row expressions")));
cmp = make_row_op(pstate,
a->name,
(RowExpr *) copyObject(lexpr),
(RowExpr *) rexpr);
}
else
cmp = (Node *) make_op(pstate,
a->name,
copyObject(lexpr),
rexpr);
cmp = coerce_to_boolean(pstate, cmp, "IN");
if (result == NULL)
result = cmp;
else
result = (Node *) makeBoolExpr(useOr ? OR_EXPR : AND_EXPR,
list_make2(result, cmp));
}
return result;
}
static Node *
......@@ -1818,32 +1954,25 @@ typecast_expression(ParseState *pstate, Node *expr, TypeName *typename)
/*
* Transform a "row op row" construct
*
* The input RowExprs are already transformed
*/
static Node *
make_row_op(ParseState *pstate, List *opname, Node *ltree, Node *rtree)
make_row_op(ParseState *pstate, List *opname,
RowExpr *lrow, RowExpr *rrow)
{
Node *result = NULL;
RowExpr *lrow,
*rrow;
List *largs,
*rargs;
List *largs = lrow->args;
List *rargs = rrow->args;
ListCell *l,
*r;
char *oprname;
BoolExprType boolop;
/* Inputs are untransformed RowExprs */
lrow = (RowExpr *) transformExpr(pstate, ltree);
rrow = (RowExpr *) transformExpr(pstate, rtree);
Assert(IsA(lrow, RowExpr));
Assert(IsA(rrow, RowExpr));
largs = lrow->args;
rargs = rrow->args;
if (list_length(largs) != list_length(rargs))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unequal number of entries in row expression")));
errmsg("unequal number of entries in row expressions")));
/*
* XXX it's really wrong to generate a simple AND combination for < <= >
......@@ -1898,31 +2027,23 @@ make_row_op(ParseState *pstate, List *opname, Node *ltree, Node *rtree)
/*
* Transform a "row IS DISTINCT FROM row" construct
*
* The input RowExprs are already transformed
*/
static Node *
make_row_distinct_op(ParseState *pstate, List *opname,
Node *ltree, Node *rtree)
RowExpr *lrow, RowExpr *rrow)
{
Node *result = NULL;
RowExpr *lrow,
*rrow;
List *largs,
*rargs;
List *largs = lrow->args;
List *rargs = rrow->args;
ListCell *l,
*r;
/* Inputs are untransformed RowExprs */
lrow = (RowExpr *) transformExpr(pstate, ltree);
rrow = (RowExpr *) transformExpr(pstate, rtree);
Assert(IsA(lrow, RowExpr));
Assert(IsA(rrow, RowExpr));
largs = lrow->args;
rargs = rrow->args;
if (list_length(largs) != list_length(rargs))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("unequal number of entries in row expression")));
errmsg("unequal number of entries in row expressions")));
forboth(l, largs, r, rargs)
{
......
......@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.296 2005/11/22 18:17:31 momjian Exp $
* $PostgreSQL: pgsql/src/include/nodes/parsenodes.h,v 1.297 2005/11/28 04:35:32 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -194,7 +194,8 @@ typedef enum A_Expr_Kind
AEXPR_OP_ALL, /* scalar op ALL (array) */
AEXPR_DISTINCT, /* IS DISTINCT FROM - name must be "=" */
AEXPR_NULLIF, /* NULLIF - name must be "=" */
AEXPR_OF /* IS (not) OF - name must be "=" or "!=" */
AEXPR_OF, /* IS [NOT] OF - name must be "=" or "<>" */
AEXPR_IN /* [NOT] IN - name must be "=" or "<>" */
} A_Expr_Kind;
typedef struct A_Expr
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment