Commit b5415e3c authored by Tom Lane's avatar Tom Lane

Support parameterized TidPaths.

Up to now we've not worried much about joins where the join key is a
relation's CTID column, reasoning that storing a table's CTIDs in some
other table would be pretty useless.  However, there are use-cases for
this sort of query involving self-joins, so that argument doesn't really
hold water.

This patch allows generating plans for joins on CTID that use a nestloop
with inner TidScan, similar to what we might do with an index on the join
column.  This is the most efficient way to join when the outer side of
the nestloop is expected to yield relatively few rows.

This change requires upgrading tidpath.c and the generated TidPaths
to work with RestrictInfos instead of bare qual clauses, but that's
long-postponed technical debt anyway.

Discussion: https://postgr.es/m/17443.1545435266@sss.pgh.pa.us
parent 6f19a8c4
......@@ -1202,15 +1202,18 @@ cost_tidscan(Path *path, PlannerInfo *root,
ntuples = 0;
foreach(l, tidquals)
{
if (IsA(lfirst(l), ScalarArrayOpExpr))
RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
Expr *qual = rinfo->clause;
if (IsA(qual, ScalarArrayOpExpr))
{
/* Each element of the array yields 1 tuple */
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) lfirst(l);
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) qual;
Node *arraynode = (Node *) lsecond(saop->args);
ntuples += estimate_array_length(arraynode);
}
else if (IsA(lfirst(l), CurrentOfExpr))
else if (IsA(qual, CurrentOfExpr))
{
/* CURRENT OF yields 1 tuple */
isCurrentOf = true;
......
This diff is collapsed.
......@@ -3083,18 +3083,72 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path,
TidScan *scan_plan;
Index scan_relid = best_path->path.parent->relid;
List *tidquals = best_path->tidquals;
List *ortidquals;
/* it should be a base rel... */
Assert(scan_relid > 0);
Assert(best_path->path.parent->rtekind == RTE_RELATION);
/*
* The qpqual list must contain all restrictions not enforced by the
* tidquals list. Since tidquals has OR semantics, we have to be careful
* about matching it up to scan_clauses. It's convenient to handle the
* single-tidqual case separately from the multiple-tidqual case. In the
* single-tidqual case, we look through the scan_clauses while they are
* still in RestrictInfo form, and drop any that are redundant with the
* tidqual.
*
* In normal cases simple pointer equality checks will be enough to spot
* duplicate RestrictInfos, so we try that first.
*
* Another common case is that a scan_clauses entry is generated from the
* same EquivalenceClass as some tidqual, and is therefore redundant with
* it, though not equal.
*
* Unlike indexpaths, we don't bother with predicate_implied_by(); the
* number of cases where it could win are pretty small.
*/
if (list_length(tidquals) == 1)
{
List *qpqual = NIL;
ListCell *l;
foreach(l, scan_clauses)
{
RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
if (rinfo->pseudoconstant)
continue; /* we may drop pseudoconstants here */
if (list_member_ptr(tidquals, rinfo))
continue; /* simple duplicate */
if (is_redundant_derived_clause(rinfo, tidquals))
continue; /* derived from same EquivalenceClass */
qpqual = lappend(qpqual, rinfo);
}
scan_clauses = qpqual;
}
/* Sort clauses into best execution order */
scan_clauses = order_qual_clauses(root, scan_clauses);
/* Reduce RestrictInfo list to bare expressions; ignore pseudoconstants */
/* Reduce RestrictInfo lists to bare expressions; ignore pseudoconstants */
tidquals = extract_actual_clauses(tidquals, false);
scan_clauses = extract_actual_clauses(scan_clauses, false);
/*
* If we have multiple tidquals, it's more convenient to remove duplicate
* scan_clauses after stripping the RestrictInfos. In this situation,
* because the tidquals represent OR sub-clauses, they could not have come
* from EquivalenceClasses so we don't have to worry about matching up
* non-identical clauses. On the other hand, because tidpath.c will have
* extracted those sub-clauses from some OR clause and built its own list,
* we will certainly not have pointer equality to any scan clause. So
* convert the tidquals list to an explicit OR clause and see if we can
* match it via equal() to any scan clause.
*/
if (list_length(tidquals) > 1)
scan_clauses = list_difference(scan_clauses,
list_make1(make_orclause(tidquals)));
/* Replace any outer-relation variables with nestloop params */
if (best_path->path.param_info)
{
......@@ -3104,15 +3158,6 @@ create_tidscan_plan(PlannerInfo *root, TidPath *best_path,
replace_nestloop_params(root, (Node *) scan_clauses);
}
/*
* Remove any clauses that are TID quals. This is a bit tricky since the
* tidquals list has implicit OR semantics.
*/
ortidquals = tidquals;
if (list_length(ortidquals) > 1)
ortidquals = list_make1(make_orclause(ortidquals));
scan_clauses = list_difference(scan_clauses, ortidquals);
scan_plan = make_tidscan(tlist,
scan_clauses,
scan_relid,
......
......@@ -3715,12 +3715,8 @@ do { \
{
TidPath *tpath;
/*
* TidPath contains tidquals, which do not contain any
* external parameters per create_tidscan_path(). So don't
* bother to translate those.
*/
FLAT_COPY_PATH(tpath, path, TidPath);
ADJUST_CHILD_ATTRS(tpath->tidquals);
new_path = (Path *) tpath;
}
break;
......
......@@ -478,7 +478,8 @@ typedef struct BitmapHeapScan
* tid scan node
*
* tidquals is an implicitly OR'ed list of qual expressions of the form
* "CTID = pseudoconstant" or "CTID = ANY(pseudoconstant_array)".
* "CTID = pseudoconstant", or "CTID = ANY(pseudoconstant_array)",
* or a CurrentOfExpr for the relation.
* ----------------
*/
typedef struct TidScan
......
......@@ -1229,8 +1229,8 @@ typedef struct BitmapOrPath
* TidPath represents a scan by TID
*
* tidquals is an implicitly OR'ed list of qual expressions of the form
* "CTID = pseudoconstant" or "CTID = ANY(pseudoconstant_array)".
* Note they are bare expressions, not RestrictInfos.
* "CTID = pseudoconstant", or "CTID = ANY(pseudoconstant_array)",
* or a CurrentOfExpr for the relation.
*/
typedef struct TidPath
{
......
......@@ -40,6 +40,22 @@ SELECT ctid, * FROM tidscan WHERE '(0,1)' = ctid;
(0,1) | 1
(1 row)
-- OR'd clauses
EXPLAIN (COSTS OFF)
SELECT ctid, * FROM tidscan WHERE ctid = '(0,2)' OR '(0,1)' = ctid;
QUERY PLAN
--------------------------------------------------------------
Tid Scan on tidscan
TID Cond: ((ctid = '(0,2)'::tid) OR ('(0,1)'::tid = ctid))
(2 rows)
SELECT ctid, * FROM tidscan WHERE ctid = '(0,2)' OR '(0,1)' = ctid;
ctid | id
-------+----
(0,1) | 1
(0,2) | 2
(2 rows)
-- ctid = ScalarArrayOp - implemented as tidscan
EXPLAIN (COSTS OFF)
SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,1)', '(0,2)']::tid[]);
......@@ -92,6 +108,45 @@ WHERE (id = 3 AND ctid IN ('(0,2)', '(0,3)')) OR (ctid = '(0,1)' AND id = 1);
(0,3) | 3
(2 rows)
-- nestloop-with-inner-tidscan joins on tid
EXPLAIN (COSTS OFF)
SELECT t1.ctid, t1.*, t2.ctid, t2.*
FROM tidscan t1 JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
QUERY PLAN
------------------------------------
Nested Loop
-> Seq Scan on tidscan t1
Filter: (id = 1)
-> Tid Scan on tidscan t2
TID Cond: (ctid = t1.ctid)
(5 rows)
SELECT t1.ctid, t1.*, t2.ctid, t2.*
FROM tidscan t1 JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
ctid | id | ctid | id
-------+----+-------+----
(0,1) | 1 | (0,1) | 1
(1 row)
EXPLAIN (COSTS OFF)
SELECT t1.ctid, t1.*, t2.ctid, t2.*
FROM tidscan t1 LEFT JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
QUERY PLAN
------------------------------------
Nested Loop Left Join
-> Seq Scan on tidscan t1
Filter: (id = 1)
-> Tid Scan on tidscan t2
TID Cond: (t1.ctid = ctid)
(5 rows)
SELECT t1.ctid, t1.*, t2.ctid, t2.*
FROM tidscan t1 LEFT JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
ctid | id | ctid | id
-------+----+-------+----
(0,1) | 1 | (0,1) | 1
(1 row)
-- exercise backward scan and rewind
BEGIN;
DECLARE c CURSOR FOR
......
......@@ -17,6 +17,11 @@ EXPLAIN (COSTS OFF)
SELECT ctid, * FROM tidscan WHERE '(0,1)' = ctid;
SELECT ctid, * FROM tidscan WHERE '(0,1)' = ctid;
-- OR'd clauses
EXPLAIN (COSTS OFF)
SELECT ctid, * FROM tidscan WHERE ctid = '(0,2)' OR '(0,1)' = ctid;
SELECT ctid, * FROM tidscan WHERE ctid = '(0,2)' OR '(0,1)' = ctid;
-- ctid = ScalarArrayOp - implemented as tidscan
EXPLAIN (COSTS OFF)
SELECT ctid, * FROM tidscan WHERE ctid = ANY(ARRAY['(0,1)', '(0,2)']::tid[]);
......@@ -34,6 +39,18 @@ WHERE (id = 3 AND ctid IN ('(0,2)', '(0,3)')) OR (ctid = '(0,1)' AND id = 1);
SELECT ctid, * FROM tidscan
WHERE (id = 3 AND ctid IN ('(0,2)', '(0,3)')) OR (ctid = '(0,1)' AND id = 1);
-- nestloop-with-inner-tidscan joins on tid
EXPLAIN (COSTS OFF)
SELECT t1.ctid, t1.*, t2.ctid, t2.*
FROM tidscan t1 JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
SELECT t1.ctid, t1.*, t2.ctid, t2.*
FROM tidscan t1 JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
EXPLAIN (COSTS OFF)
SELECT t1.ctid, t1.*, t2.ctid, t2.*
FROM tidscan t1 LEFT JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
SELECT t1.ctid, t1.*, t2.ctid, t2.*
FROM tidscan t1 LEFT JOIN tidscan t2 ON t1.ctid = t2.ctid WHERE t1.id = 1;
-- exercise backward scan and rewind
BEGIN;
DECLARE c CURSOR FOR
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment