Commit aa09cd24 authored by Robert Haas's avatar Robert Haas

postgres_fdw: Consider foreign joining and foreign sorting together.

Commit ccd8f979 gave us the ability to
request that the remote side sort the data, and, later, commit
e4106b25 gave us the ability to
request that the remote side perform the join for us rather than doing
it locally.  But we could not do both things at the same time: a
remote SQL query that had an ORDER BY clause would never be a join.
This commit adds that capability.

Ashutosh Bapat, reviewed by me.
parent d31f20e2
...@@ -283,9 +283,6 @@ static void postgresGetForeignJoinPaths(PlannerInfo *root, ...@@ -283,9 +283,6 @@ static void postgresGetForeignJoinPaths(PlannerInfo *root,
JoinPathExtraData *extra); JoinPathExtraData *extra);
static bool postgresRecheckForeignScan(ForeignScanState *node, static bool postgresRecheckForeignScan(ForeignScanState *node,
TupleTableSlot *slot); TupleTableSlot *slot);
static List *get_useful_pathkeys_for_relation(PlannerInfo *root,
RelOptInfo *rel);
static List *get_useful_ecs_for_relation(PlannerInfo *root, RelOptInfo *rel);
/* /*
* Helper functions * Helper functions
...@@ -331,6 +328,11 @@ static void conversion_error_callback(void *arg); ...@@ -331,6 +328,11 @@ static void conversion_error_callback(void *arg);
static bool foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, static bool foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel,
JoinType jointype, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, RelOptInfo *outerrel, RelOptInfo *innerrel,
JoinPathExtraData *extra); JoinPathExtraData *extra);
static List *get_useful_pathkeys_for_relation(PlannerInfo *root,
RelOptInfo *rel);
static List *get_useful_ecs_for_relation(PlannerInfo *root, RelOptInfo *rel);
static void add_paths_with_pathkeys_for_rel(PlannerInfo *root, RelOptInfo *rel,
Path *epq_path);
/* /*
...@@ -502,6 +504,14 @@ postgresGetForeignRelSize(PlannerInfo *root, ...@@ -502,6 +504,14 @@ postgresGetForeignRelSize(PlannerInfo *root,
cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root); cost_qual_eval(&fpinfo->local_conds_cost, fpinfo->local_conds, root);
/*
* Set cached relation costs to some negative value, so that we can detect
* when they are set to some sensible costs during one (usually the first)
* of the calls to estimate_path_cost_size().
*/
fpinfo->rel_startup_cost = -1;
fpinfo->rel_total_cost = -1;
/* /*
* If the table or the server is configured to use remote estimates, * If the table or the server is configured to use remote estimates,
* connect to the foreign server and execute EXPLAIN to estimate the * connect to the foreign server and execute EXPLAIN to estimate the
...@@ -774,7 +784,6 @@ postgresGetForeignPaths(PlannerInfo *root, ...@@ -774,7 +784,6 @@ postgresGetForeignPaths(PlannerInfo *root,
ForeignPath *path; ForeignPath *path;
List *ppi_list; List *ppi_list;
ListCell *lc; ListCell *lc;
List *useful_pathkeys_list = NIL; /* List of all pathkeys */
/* /*
* Create simplest ForeignScan path node and add it to baserel. This path * Create simplest ForeignScan path node and add it to baserel. This path
...@@ -793,30 +802,8 @@ postgresGetForeignPaths(PlannerInfo *root, ...@@ -793,30 +802,8 @@ postgresGetForeignPaths(PlannerInfo *root,
NIL); /* no fdw_private list */ NIL); /* no fdw_private list */
add_path(baserel, (Path *) path); add_path(baserel, (Path *) path);
useful_pathkeys_list = get_useful_pathkeys_for_relation(root, baserel); /* Add paths with pathkeys */
add_paths_with_pathkeys_for_rel(root, baserel, NULL);
/* Create one path for each set of pathkeys we found above. */
foreach(lc, useful_pathkeys_list)
{
double rows;
int width;
Cost startup_cost;
Cost total_cost;
List *useful_pathkeys = lfirst(lc);
estimate_path_cost_size(root, baserel, NIL, useful_pathkeys,
&rows, &width, &startup_cost, &total_cost);
add_path(baserel, (Path *)
create_foreignscan_path(root, baserel,
rows,
startup_cost,
total_cost,
useful_pathkeys,
NULL,
NULL,
NIL));
}
/* /*
* If we're not using remote estimates, stop here. We have no way to * If we're not using remote estimates, stop here. We have no way to
...@@ -2182,7 +2169,18 @@ estimate_path_cost_size(PlannerInfo *root, ...@@ -2182,7 +2169,18 @@ estimate_path_cost_size(PlannerInfo *root,
/* Back into an estimate of the number of retrieved rows. */ /* Back into an estimate of the number of retrieved rows. */
retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel); retrieved_rows = clamp_row_est(rows / fpinfo->local_conds_sel);
if (foreignrel->reloptkind != RELOPT_JOINREL) /*
* We will come here again and again with different set of pathkeys
* that caller wants to cost. We don't need to calculate the cost of
* bare scan each time. Instead, use the costs if we have cached them
* already.
*/
if (fpinfo->rel_startup_cost > 0 && fpinfo->rel_total_cost > 0)
{
startup_cost = fpinfo->rel_startup_cost;
run_cost = fpinfo->rel_total_cost - fpinfo->rel_startup_cost;
}
else if (foreignrel->reloptkind != RELOPT_JOINREL)
{ {
/* Clamp retrieved rows estimates to at most foreignrel->tuples. */ /* Clamp retrieved rows estimates to at most foreignrel->tuples. */
retrieved_rows = Min(retrieved_rows, foreignrel->tuples); retrieved_rows = Min(retrieved_rows, foreignrel->tuples);
...@@ -2284,13 +2282,19 @@ estimate_path_cost_size(PlannerInfo *root, ...@@ -2284,13 +2282,19 @@ estimate_path_cost_size(PlannerInfo *root,
} }
/* /*
* Cache the costs prior to adding the costs for transferring data from * Cache the costs for scans without any pathkeys or parameterization
* the foreign server. These costs are useful for costing the join between * before adding the costs for transferring data from the foreign server.
* this relation and another foreign relation, when the cost of join can * These costs are useful for costing the join between this relation and
* not be obtained from the foreign server. * another foreign relation or to calculate the costs of paths with
* pathkeys for this relation, when the costs can not be obtained from the
* foreign server. This function will be called at least once for every
* foreign relation without pathkeys and parameterization.
*/ */
fpinfo->rel_startup_cost = startup_cost; if (pathkeys == NIL && param_join_conds == NIL)
fpinfo->rel_total_cost = total_cost; {
fpinfo->rel_startup_cost = startup_cost;
fpinfo->rel_total_cost = total_cost;
}
/* /*
* Add some additional cost factors to account for connection overhead * Add some additional cost factors to account for connection overhead
...@@ -3458,6 +3462,14 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, ...@@ -3458,6 +3462,14 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
fpinfo->fdw_startup_cost = fpinfo_o->fdw_startup_cost; fpinfo->fdw_startup_cost = fpinfo_o->fdw_startup_cost;
fpinfo->fdw_tuple_cost = fpinfo_o->fdw_tuple_cost; fpinfo->fdw_tuple_cost = fpinfo_o->fdw_tuple_cost;
/*
* Set cached relation costs to some negative value, so that we can detect
* when they are set to some sensible costs, during one (usually the
* first) of the calls to estimate_path_cost_size().
*/
fpinfo->rel_startup_cost = -1;
fpinfo->rel_total_cost = -1;
/* Mark that this join can be pushed down safely */ /* Mark that this join can be pushed down safely */
fpinfo->pushdown_safe = true; fpinfo->pushdown_safe = true;
...@@ -3532,6 +3544,39 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype, ...@@ -3532,6 +3544,39 @@ foreign_join_ok(PlannerInfo *root, RelOptInfo *joinrel, JoinType jointype,
return true; return true;
} }
static void
add_paths_with_pathkeys_for_rel(PlannerInfo *root, RelOptInfo *rel,
Path *epq_path)
{
List *useful_pathkeys_list = NIL; /* List of all pathkeys */
ListCell *lc;
useful_pathkeys_list = get_useful_pathkeys_for_relation(root, rel);
/* Create one path for each set of pathkeys we found above. */
foreach(lc, useful_pathkeys_list)
{
double rows;
int width;
Cost startup_cost;
Cost total_cost;
List *useful_pathkeys = lfirst(lc);
estimate_path_cost_size(root, rel, NIL, useful_pathkeys,
&rows, &width, &startup_cost, &total_cost);
add_path(rel, (Path *)
create_foreignscan_path(root, rel,
rows,
startup_cost,
total_cost,
useful_pathkeys,
NULL,
epq_path,
NIL));
}
}
/* /*
* postgresGetForeignJoinPaths * postgresGetForeignJoinPaths
* Add possible ForeignPath to joinrel, if join is safe to push down. * Add possible ForeignPath to joinrel, if join is safe to push down.
...@@ -3670,7 +3715,8 @@ postgresGetForeignJoinPaths(PlannerInfo *root, ...@@ -3670,7 +3715,8 @@ postgresGetForeignJoinPaths(PlannerInfo *root,
/* Add generated path into joinrel by add_path(). */ /* Add generated path into joinrel by add_path(). */
add_path(joinrel, (Path *) joinpath); add_path(joinrel, (Path *) joinpath);
/* XXX Consider pathkeys for the join relation */ /* Consider pathkeys for the join relation */
add_paths_with_pathkeys_for_rel(root, joinrel, epq_path);
/* XXX Consider parameterized paths for the join relation */ /* XXX Consider parameterized paths for the join relation */
} }
...@@ -3877,7 +3923,7 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel) ...@@ -3877,7 +3923,7 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
{ {
EquivalenceMember *em = lfirst(lc_em); EquivalenceMember *em = lfirst(lc_em);
if (bms_equal(em->em_relids, rel->relids)) if (bms_is_subset(em->em_relids, rel->relids))
{ {
/* /*
* If there is more than one equivalence member whose Vars are * If there is more than one equivalence member whose Vars are
......
...@@ -237,6 +237,11 @@ SELECT t1.c1, t2."C 1" FROM ft2 t1 JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFF ...@@ -237,6 +237,11 @@ SELECT t1.c1, t2."C 1" FROM ft2 t1 JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFF
EXPLAIN (VERBOSE, COSTS false) EXPLAIN (VERBOSE, COSTS false)
SELECT t1.c1, t2."C 1" FROM ft2 t1 LEFT JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10; SELECT t1.c1, t2."C 1" FROM ft2 t1 LEFT JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10;
SELECT t1.c1, t2."C 1" FROM ft2 t1 LEFT JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10; SELECT t1.c1, t2."C 1" FROM ft2 t1 LEFT JOIN "S 1"."T 1" t2 ON (t1.c1 = t2."C 1") OFFSET 100 LIMIT 10;
-- A join between local table and foreign join. ORDER BY clause is added to the
-- foreign join so that the local table can be joined using merge join strategy.
EXPLAIN (COSTS false, VERBOSE)
SELECT t1."C 1" FROM "S 1"."T 1" t1 left join ft1 t2 join ft2 t3 on (t2.c1 = t3.c1) on (t3.c1 = t1."C 1") OFFSET 100 LIMIT 10;
SELECT t1."C 1" FROM "S 1"."T 1" t1 left join ft1 t2 join ft2 t3 on (t2.c1 = t3.c1) on (t3.c1 = t1."C 1") OFFSET 100 LIMIT 10;
RESET enable_hashjoin; RESET enable_hashjoin;
RESET enable_nestloop; RESET enable_nestloop;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment