Commit ff673f55 authored by Tom Lane's avatar Tom Lane

Fix convert_IN_to_join to properly handle the case where the subselect's

output is not of the same type that's needed for the IN comparison (ie,
where the parser inserted an implicit coercion above the subselect result).
We should record the coerced expression, not just a raw Var referencing
the subselect output, as the quantity that needs to be unique-ified if
we choose to implement the IN as Unique followed by a plain join.

As of 8.3 this error was causing crashes, as seen in bug #4113 from Javier
Hernandez, because the executor was being told to hash or sort the raw
subselect output column using operators appropriate to the coerced type.

In prior versions there was no crash because the executor chose the
hash or sort operators for itself based on the column type it saw.
However, that's still not really right, because what's unique for one data
type might not be unique for another.  In corner cases we could get multiple
outputs of a row that should appear only once, as demonstrated by the
regression test case included in this commit.

However, this patch doesn't apply cleanly to 8.2 or before, and the code
involved has shifted enough over time that I'm hesitant to try to back-patch.
Given the lack of complaints from the field about such corner cases, I think
the bug may not be important enough to risk breaking other things with a
back-patch.
parent a31b03ba
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.129 2008/01/17 20:35:27 tgl Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/plan/subselect.c,v 1.130 2008/04/21 20:54:15 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -725,10 +725,13 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink) ...@@ -725,10 +725,13 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
Query *parse = root->parse; Query *parse = root->parse;
Query *subselect = (Query *) sublink->subselect; Query *subselect = (Query *) sublink->subselect;
List *in_operators; List *in_operators;
List *left_exprs;
List *right_exprs;
Relids left_varnos; Relids left_varnos;
int rtindex; int rtindex;
RangeTblEntry *rte; RangeTblEntry *rte;
RangeTblRef *rtr; RangeTblRef *rtr;
List *subquery_vars;
InClauseInfo *ininfo; InClauseInfo *ininfo;
Node *result; Node *result;
...@@ -744,28 +747,37 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink) ...@@ -744,28 +747,37 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
return NULL; return NULL;
if (sublink->testexpr && IsA(sublink->testexpr, OpExpr)) if (sublink->testexpr && IsA(sublink->testexpr, OpExpr))
{ {
Oid opno = ((OpExpr *) sublink->testexpr)->opno; OpExpr *op = (OpExpr *) sublink->testexpr;
Oid opno = op->opno;
List *opfamilies; List *opfamilies;
List *opstrats; List *opstrats;
if (list_length(op->args) != 2)
return NULL; /* not binary operator? */
get_op_btree_interpretation(opno, &opfamilies, &opstrats); get_op_btree_interpretation(opno, &opfamilies, &opstrats);
if (!list_member_int(opstrats, ROWCOMPARE_EQ)) if (!list_member_int(opstrats, ROWCOMPARE_EQ))
return NULL; return NULL;
in_operators = list_make1_oid(opno); in_operators = list_make1_oid(opno);
left_exprs = list_make1(linitial(op->args));
right_exprs = list_make1(lsecond(op->args));
} }
else if (and_clause(sublink->testexpr)) else if (and_clause(sublink->testexpr))
{ {
ListCell *lc; ListCell *lc;
/* OK, but we need to extract the per-column operator OIDs */ /* OK, but we need to extract the per-column info */
in_operators = NIL; in_operators = left_exprs = right_exprs = NIL;
foreach(lc, ((BoolExpr *) sublink->testexpr)->args) foreach(lc, ((BoolExpr *) sublink->testexpr)->args)
{ {
OpExpr *op = (OpExpr *) lfirst(lc); OpExpr *op = (OpExpr *) lfirst(lc);
if (!IsA(op, OpExpr)) /* probably shouldn't happen */ if (!IsA(op, OpExpr)) /* probably shouldn't happen */
return NULL; return NULL;
if (list_length(op->args) != 2)
return NULL; /* not binary operator? */
in_operators = lappend_oid(in_operators, op->opno); in_operators = lappend_oid(in_operators, op->opno);
left_exprs = lappend(left_exprs, linitial(op->args));
right_exprs = lappend(right_exprs, lsecond(op->args));
} }
} }
else else
...@@ -782,10 +794,13 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink) ...@@ -782,10 +794,13 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
* The left-hand expressions must contain some Vars of the current query, * The left-hand expressions must contain some Vars of the current query,
* else it's not gonna be a join. * else it's not gonna be a join.
*/ */
left_varnos = pull_varnos(sublink->testexpr); left_varnos = pull_varnos((Node *) left_exprs);
if (bms_is_empty(left_varnos)) if (bms_is_empty(left_varnos))
return NULL; return NULL;
/* ... and the right-hand expressions better not contain Vars at all */
Assert(!contain_var_clause((Node *) right_exprs));
/* /*
* The combining operators and left-hand expressions mustn't be volatile. * The combining operators and left-hand expressions mustn't be volatile.
*/ */
...@@ -810,6 +825,20 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink) ...@@ -810,6 +825,20 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
rtr->rtindex = rtindex; rtr->rtindex = rtindex;
parse->jointree->fromlist = lappend(parse->jointree->fromlist, rtr); parse->jointree->fromlist = lappend(parse->jointree->fromlist, rtr);
/*
* Build a list of Vars representing the subselect outputs.
*/
subquery_vars = generate_subquery_vars(root,
subselect->targetList,
rtindex);
/*
* Build the result qual expression, replacing Params with these Vars.
*/
result = convert_testexpr(root,
sublink->testexpr,
subquery_vars);
/* /*
* Now build the InClauseInfo node. * Now build the InClauseInfo node.
*/ */
...@@ -819,24 +848,20 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink) ...@@ -819,24 +848,20 @@ convert_IN_to_join(PlannerInfo *root, SubLink *sublink)
ininfo->in_operators = in_operators; ininfo->in_operators = in_operators;
/* /*
* ininfo->sub_targetlist is filled with a list of Vars representing the * ininfo->sub_targetlist must be filled with a list of expressions that
* subselect outputs. * would need to be unique-ified if we try to implement the IN using a
* regular join to unique-ified subquery output. This is most easily done
* by applying convert_testexpr to just the RHS inputs of the testexpr
* operators. That handles cases like type coercions of the subquery
* outputs, clauses dropped due to const-simplification, etc.
*/ */
ininfo->sub_targetlist = generate_subquery_vars(root, ininfo->sub_targetlist = (List *) convert_testexpr(root,
subselect->targetList, (Node *) right_exprs,
rtindex); subquery_vars);
Assert(list_length(in_operators) == list_length(ininfo->sub_targetlist));
/* Add the completed node to the query's list */ /* Add the completed node to the query's list */
root->in_info_list = lappend(root->in_info_list, ininfo); root->in_info_list = lappend(root->in_info_list, ininfo);
/*
* Build the result qual expression.
*/
result = convert_testexpr(root,
sublink->testexpr,
ininfo->sub_targetlist);
return result; return result;
} }
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.142 2008/01/01 19:45:50 momjian Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.143 2008/04/21 20:54:15 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -898,7 +898,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath) ...@@ -898,7 +898,7 @@ create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
/* /*
* translate_sub_tlist - get subquery column numbers represented by tlist * translate_sub_tlist - get subquery column numbers represented by tlist
* *
* The given targetlist should contain only Vars referencing the given relid. * The given targetlist usually contains only Vars referencing the given relid.
* Extract their varattnos (ie, the column numbers of the subquery) and return * Extract their varattnos (ie, the column numbers of the subquery) and return
* as an integer List. * as an integer List.
* *
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.155 2008/03/24 21:53:04 tgl Exp $ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.156 2008/04/21 20:54:15 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
...@@ -1107,8 +1107,10 @@ typedef struct OuterJoinInfo ...@@ -1107,8 +1107,10 @@ typedef struct OuterJoinInfo
* We record information about each such IN clause in an InClauseInfo struct. * We record information about each such IN clause in an InClauseInfo struct.
* These structs are kept in the PlannerInfo node's in_info_list. * These structs are kept in the PlannerInfo node's in_info_list.
* *
* Note: sub_targetlist is just a list of Vars or expressions; it does not * Note: sub_targetlist is a bit misnamed; it is a list of the expressions
* contain TargetEntry nodes. * on the RHS of the IN's join clauses. (This normally starts out as a list
* of Vars referencing the subquery outputs, but can get mutated if the
* subquery is flattened into the main query.)
*/ */
typedef struct InClauseInfo typedef struct InClauseInfo
...@@ -1116,8 +1118,8 @@ typedef struct InClauseInfo ...@@ -1116,8 +1118,8 @@ typedef struct InClauseInfo
NodeTag type; NodeTag type;
Relids lefthand; /* base relids in lefthand expressions */ Relids lefthand; /* base relids in lefthand expressions */
Relids righthand; /* base relids coming from the subselect */ Relids righthand; /* base relids coming from the subselect */
List *sub_targetlist; /* targetlist of original RHS subquery */ List *sub_targetlist; /* RHS expressions of the IN's comparisons */
List *in_operators; /* OIDs of the IN's equality operator(s) */ List *in_operators; /* OIDs of the IN's equality operators */
} InClauseInfo; } InClauseInfo;
/* /*
......
...@@ -408,3 +408,34 @@ select * from ( ...@@ -408,3 +408,34 @@ select * from (
0 0
(1 row) (1 row)
--
-- Test that an IN implemented using a UniquePath does unique-ification
-- with the right semantics, as per bug #4113. (Unfortunately we have
-- no simple way to ensure that this test case actually chooses that type
-- of plan, but it does in releases 7.4-8.3. Note that an ordering difference
-- here might mean that some other plan type is being used, rendering the test
-- pointless.)
--
create temp table numeric_table (num_col numeric);
insert into numeric_table values (1), (1.000000000000000000001), (2), (3);
create temp table float_table (float_col float8);
insert into float_table values (1), (2), (3);
select * from float_table
where float_col in (select num_col from numeric_table);
float_col
-----------
1
2
3
(3 rows)
select * from numeric_table
where num_col in (select float_col from float_table);
num_col
-------------------------
1
1.000000000000000000001
2
3
(4 rows)
...@@ -251,3 +251,24 @@ select * from ( ...@@ -251,3 +251,24 @@ select * from (
select min(unique1) from tenk1 as a select min(unique1) from tenk1 as a
where not exists (select 1 from tenk1 as b where b.unique2 = 10000) where not exists (select 1 from tenk1 as b where b.unique2 = 10000)
) ss; ) ss;
--
-- Test that an IN implemented using a UniquePath does unique-ification
-- with the right semantics, as per bug #4113. (Unfortunately we have
-- no simple way to ensure that this test case actually chooses that type
-- of plan, but it does in releases 7.4-8.3. Note that an ordering difference
-- here might mean that some other plan type is being used, rendering the test
-- pointless.)
--
create temp table numeric_table (num_col numeric);
insert into numeric_table values (1), (1.000000000000000000001), (2), (3);
create temp table float_table (float_col float8);
insert into float_table values (1), (2), (3);
select * from float_table
where float_col in (select num_col from numeric_table);
select * from numeric_table
where num_col in (select float_col from float_table);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment