Commit b30fb56b authored by Robert Haas's avatar Robert Haas

postgres_fdw: Push down FULL JOINs with restriction clauses.

The previous deparsing logic wasn't smart enough to produce subqueries
when deparsing; make it smart enough to do that.  However, we only do
it that way when necessary, because it generates more complicated SQL
which will be harder for any humans reading the queries to understand.

Etsuro Fujita, reviewed by Ashutosh Bapat

Discussion: http://postgr.es/m/c449261a-b033-dc02-9254-2fe5b7044795@lab.ntt.co.jp
parent a3eac988
This diff is collapsed.
......@@ -669,6 +669,13 @@ postgresGetForeignRelSize(PlannerInfo *root,
if (*refname && strcmp(refname, relname) != 0)
appendStringInfo(fpinfo->relation_name, " %s",
quote_identifier(rte->eref->aliasname));
/* No outer and inner relations. */
fpinfo->make_outerrel_subquery = false;
fpinfo->make_innerrel_subquery = false;
fpinfo->lower_subquery_rels = NULL;
/* Set the relation index. */
fpinfo->relation_index = baserel->relid;
}
/*
......@@ -1238,7 +1245,7 @@ postgresGetForeignPlan(PlannerInfo *root,
initStringInfo(&sql);
deparseSelectStmtForRel(&sql, root, foreignrel, fdw_scan_tlist,
remote_conds, best_path->path.pathkeys,
&retrieved_attrs, &params_list);
false, &retrieved_attrs, &params_list);
/*
* Build the fdw_private list that will be available to the executor.
......@@ -2550,8 +2557,8 @@ estimate_path_cost_size(PlannerInfo *root,
initStringInfo(&sql);
appendStringInfoString(&sql, "EXPLAIN ");
deparseSelectStmtForRel(&sql, root, foreignrel, fdw_scan_tlist,
remote_conds, pathkeys, &retrieved_attrs,
NULL);
remote_conds, pathkeys, false,
&retrieved_attrs, NULL);
/* Get the remote estimate */
conn = GetConnection(fpinfo->user, false);
......@@ -4149,11 +4156,23 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
fpinfo->innerrel = innerrel;
fpinfo->jointype = jointype;
/*
* By default, both the input relations are not required to be deparsed
* as subqueries, but there might be some relations covered by the input
* relations that are required to be deparsed as subqueries, so save the
* relids of those relations for later use by the deparser.
*/
fpinfo->make_outerrel_subquery = false;
fpinfo->make_innerrel_subquery = false;
Assert(bms_is_subset(fpinfo_o->lower_subquery_rels, outerrel->relids));
Assert(bms_is_subset(fpinfo_i->lower_subquery_rels, innerrel->relids));
fpinfo->lower_subquery_rels = bms_union(fpinfo_o->lower_subquery_rels,
fpinfo_i->lower_subquery_rels);
/*
* Pull the other remote conditions from the joining relations into join
* clauses or other remote clauses (remote_conds) of this relation
* wherever possible. This avoids building subqueries at every join step,
* which is not currently supported by the deparser logic.
* wherever possible. This avoids building subqueries at every join step.
*
* For an inner join, clauses from both the relations are added to the
* other remote clauses. For LEFT and RIGHT OUTER join, the clauses from
......@@ -4164,8 +4183,7 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
*
* For a FULL OUTER JOIN, the other clauses from either relation can not
* be added to the joinclauses or remote_conds, since each relation acts
* as an outer relation for the other. Consider such full outer join as
* unshippable because of the reasons mentioned above in this comment.
* as an outer relation for the other.
*
* The joining sides can not have local conditions, thus no need to test
* shippability of the clauses being pulled up.
......@@ -4194,8 +4212,29 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
break;
case JOIN_FULL:
if (fpinfo_i->remote_conds || fpinfo_o->remote_conds)
return false;
/*
* In this case, if any of the input relations has conditions,
* we need to deparse that relation as a subquery so that the
* conditions can be evaluated before the join. Remember it in
* the fpinfo of this relation so that the deparser can take
* appropriate action. Also, save the relids of base relations
* covered by that relation for later use by the deparser.
*/
if (fpinfo_o->remote_conds)
{
fpinfo->make_outerrel_subquery = true;
fpinfo->lower_subquery_rels =
bms_add_members(fpinfo->lower_subquery_rels,
outerrel->relids);
}
if (fpinfo_i->remote_conds)
{
fpinfo->make_innerrel_subquery = true;
fpinfo->lower_subquery_rels =
bms_add_members(fpinfo->lower_subquery_rels,
innerrel->relids);
}
break;
default:
......@@ -4276,6 +4315,16 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
get_jointype_name(fpinfo->jointype),
fpinfo_i->relation_name->data);
/*
* Set the relation index. This is defined as the position of this
* joinrel in the join_rel_list list plus the length of the rtable list.
* Note that since this joinrel is at the end of the join_rel_list list
* when we are called, we can get the position by list_length.
*/
Assert(fpinfo->relation_index == 0); /* shouldn't be set yet */
fpinfo->relation_index =
list_length(root->parse->rtable) + list_length(root->join_rel_list);
return true;
}
......
......@@ -95,6 +95,20 @@ typedef struct PgFdwRelationInfo
/* Grouping information */
List *grouped_tlist;
/* Subquery information */
bool make_outerrel_subquery; /* do we deparse outerrel as a
* subquery? */
bool make_innerrel_subquery; /* do we deparse innerrel as a
* subquery? */
Relids lower_subquery_rels; /* all relids appearing in lower
* subqueries */
/*
* Index of the relation. It is used to create an alias to a subquery
* representing the relation.
*/
int relation_index;
} PgFdwRelationInfo;
/* in postgres_fdw.c */
......@@ -161,7 +175,7 @@ extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
extern List *build_tlist_to_deparse(RelOptInfo *foreignrel);
extern void deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root,
RelOptInfo *foreignrel, List *tlist,
List *remote_conds, List *pathkeys,
List *remote_conds, List *pathkeys, bool is_subquery,
List **retrieved_attrs, List **params_list);
/* in shippable.c */
......
......@@ -391,9 +391,26 @@ EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10;
SELECT t1.c1, t2.c1 FROM ft4 t1 FULL JOIN ft5 t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1 OFFSET 45 LIMIT 10;
-- full outer join with restrictions on the joining relations
-- a. the joining relations are both base relations
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1;
SELECT t1.c1, t2.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t2 ON (t1.c1 = t2.c1) ORDER BY t1.c1, t2.c1;
EXPLAIN (VERBOSE, COSTS OFF)
SELECT 1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t2 ON (TRUE) OFFSET 10 LIMIT 10;
SELECT 1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t2 ON (TRUE) OFFSET 10 LIMIT 10;
-- b. one of the joining relations is a base relation and the other is a join
-- relation
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT t2.c1, t3.c1 FROM ft4 t2 LEFT JOIN ft5 t3 ON (t2.c1 = t3.c1) WHERE (t2.c1 between 50 and 60)) ss(a, b) ON (t1.c1 = ss.a) ORDER BY t1.c1, ss.a, ss.b;
SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT t2.c1, t3.c1 FROM ft4 t2 LEFT JOIN ft5 t3 ON (t2.c1 = t3.c1) WHERE (t2.c1 between 50 and 60)) ss(a, b) ON (t1.c1 = ss.a) ORDER BY t1.c1, ss.a, ss.b;
-- c. test deparsing the remote query as nested subqueries
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (t1.c1 = ss.a) ORDER BY t1.c1, ss.a, ss.b;
SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t1 FULL JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (t1.c1 = ss.a) ORDER BY t1.c1, ss.a, ss.b;
-- d. test deparsing rowmarked relations as subqueries
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM "S 1"."T 3" WHERE c1 = 50) t1 INNER JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (TRUE) ORDER BY t1.c1, ss.a, ss.b FOR UPDATE OF t1;
SELECT t1.c1, ss.a, ss.b FROM (SELECT c1 FROM "S 1"."T 3" WHERE c1 = 50) t1 INNER JOIN (SELECT t2.c1, t3.c1 FROM (SELECT c1 FROM ft4 WHERE c1 between 50 and 60) t2 FULL JOIN (SELECT c1 FROM ft5 WHERE c1 between 50 and 60) t3 ON (t2.c1 = t3.c1) WHERE t2.c1 IS NULL OR t2.c1 IS NOT NULL) ss(a, b) ON (TRUE) ORDER BY t1.c1, ss.a, ss.b FOR UPDATE OF t1;
-- full outer join + inner join
EXPLAIN (VERBOSE, COSTS OFF)
SELECT t1.c1, t2.c1, t3.c1 FROM ft4 t1 INNER JOIN ft5 t2 ON (t1.c1 = t2.c1 + 1 and t1.c1 between 50 and 60) FULL JOIN ft4 t3 ON (t2.c1 = t3.c1) ORDER BY t1.c1, t2.c1, t3.c1 LIMIT 10;
......@@ -793,6 +810,12 @@ explain (verbose, costs off)
select avg(t1.c1), sum(t2.c1) from ft4 t1 full join ft5 t2 on (t1.c1 = t2.c1) group by t2.c1 having (avg(t1.c1) is null and sum(t2.c1) < 10) or sum(t2.c1) is null order by 1 nulls last, 2;
select avg(t1.c1), sum(t2.c1) from ft4 t1 full join ft5 t2 on (t1.c1 = t2.c1) group by t2.c1 having (avg(t1.c1) is null and sum(t2.c1) < 10) or sum(t2.c1) is null order by 1 nulls last, 2;
-- Aggregate over FULL join needing to deparse the joining relations as
-- subqueries.
explain (verbose, costs off)
select count(*), sum(t1.c1), avg(t2.c1) from (select c1 from ft4 where c1 between 50 and 60) t1 full join (select c1 from ft5 where c1 between 50 and 60) t2 on (t1.c1 = t2.c1);
select count(*), sum(t1.c1), avg(t2.c1) from (select c1 from ft4 where c1 between 50 and 60) t1 full join (select c1 from ft5 where c1 between 50 and 60) t2 on (t1.c1 = t2.c1);
-- ORDER BY expression is part of the target list but not pushed down to
-- foreign server.
explain (verbose, costs off)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment