Commit 9fdb675f authored by Alvaro Herrera's avatar Alvaro Herrera

Faster partition pruning

Add a new module backend/partitioning/partprune.c, implementing a more
sophisticated algorithm for partition pruning.  The new module uses each
partition's "boundinfo" for pruning instead of constraint exclusion,
based on an idea proposed by Robert Haas of a "pruning program": a list
of steps generated from the query quals which are run iteratively to
obtain a list of partitions that must be scanned in order to satisfy
those quals.

At present, this targets planner-time partition pruning, but there exist
further patches to apply partition pruning at execution time as well.

This commit also moves some definitions from include/catalog/partition.h
to a new file include/partitioning/partbounds.h, in an attempt to
rationalize partitioning related code.

Authors: Amit Langote, David Rowley, Dilip Kumar
Reviewers: Robert Haas, Kyotaro Horiguchi, Ashutosh Bapat, Jesper Pedersen.
Discussion: https://postgr.es/m/098b9c71-1915-1a2a-8d52-1a7a50ce79e8@lab.ntt.co.jp
parent 11523e86
......@@ -18,7 +18,8 @@ top_builddir = ../..
include $(top_builddir)/src/Makefile.global
SUBDIRS = access bootstrap catalog parser commands executor foreign lib libpq \
main nodes optimizer port postmaster regex replication rewrite \
main nodes optimizer partitioning port postmaster \
regex replication rewrite \
statistics storage tcop tsearch utils $(top_builddir)/src/timezone \
jit
......
......@@ -41,6 +41,7 @@
#include "optimizer/prep.h"
#include "optimizer/var.h"
#include "parser/parse_coerce.h"
#include "partitioning/partbounds.h"
#include "rewrite/rewriteManip.h"
#include "storage/lmgr.h"
#include "utils/array.h"
......@@ -55,89 +56,6 @@
#include "utils/ruleutils.h"
#include "utils/syscache.h"
/*
* Information about bounds of a partitioned relation
*
* A list partition datum that is known to be NULL is never put into the
* datums array. Instead, it is tracked using the null_index field.
*
* In the case of range partitioning, ndatums will typically be far less than
* 2 * nparts, because a partition's upper bound and the next partition's lower
* bound are the same in most common cases, and we only store one of them (the
* upper bound). In case of hash partitioning, ndatums will be same as the
* number of partitions.
*
* For range and list partitioned tables, datums is an array of datum-tuples
* with key->partnatts datums each. For hash partitioned tables, it is an array
* of datum-tuples with 2 datums, modulus and remainder, corresponding to a
* given partition.
*
* The datums in datums array are arranged in increasing order as defined by
* functions qsort_partition_rbound_cmp(), qsort_partition_list_value_cmp() and
* qsort_partition_hbound_cmp() for range, list and hash partitioned tables
* respectively. For range and list partitions this simply means that the
* datums in the datums array are arranged in increasing order as defined by
* the partition key's operator classes and collations.
*
* In the case of list partitioning, the indexes array stores one entry for
* every datum, which is the index of the partition that accepts a given datum.
* In case of range partitioning, it stores one entry per distinct range
* datum, which is the index of the partition for which a given datum
* is an upper bound. In the case of hash partitioning, the number of the
* entries in the indexes array is same as the greatest modulus amongst all
* partitions. For a given partition key datum-tuple, the index of the
* partition which would accept that datum-tuple would be given by the entry
* pointed by remainder produced when hash value of the datum-tuple is divided
* by the greatest modulus.
*/
typedef struct PartitionBoundInfoData
{
char strategy; /* hash, list or range? */
int ndatums; /* Length of the datums following array */
Datum **datums;
PartitionRangeDatumKind **kind; /* The kind of each range bound datum;
* NULL for hash and list partitioned
* tables */
int *indexes; /* Partition indexes */
int null_index; /* Index of the null-accepting partition; -1
* if there isn't one */
int default_index; /* Index of the default partition; -1 if there
* isn't one */
} PartitionBoundInfoData;
#define partition_bound_accepts_nulls(bi) ((bi)->null_index != -1)
#define partition_bound_has_default(bi) ((bi)->default_index != -1)
/*
* When qsort'ing partition bounds after reading from the catalog, each bound
* is represented with one of the following structs.
*/
/* One bound of a hash partition */
typedef struct PartitionHashBound
{
int modulus;
int remainder;
int index;
} PartitionHashBound;
/* One value coming from some (index'th) list partition */
typedef struct PartitionListValue
{
int index;
Datum value;
} PartitionListValue;
/* One bound of a range partition */
typedef struct PartitionRangeBound
{
int index;
Datum *datums; /* range bound datums */
PartitionRangeDatumKind *kind; /* the kind of each datum */
bool lower; /* this is the lower (vs upper) bound */
} PartitionRangeBound;
static Oid get_partition_parent_worker(Relation inhRel, Oid relid);
static void get_partition_ancestors_worker(Relation inhRel, Oid relid,
......@@ -173,29 +91,9 @@ static int32 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
Oid *partcollation, Datum *datums1,
PartitionRangeDatumKind *kind1, bool lower1,
PartitionRangeBound *b2);
static int32 partition_rbound_datum_cmp(FmgrInfo *partsupfunc,
Oid *partcollation,
Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
Datum *tuple_datums, int n_tuple_datums);
static int partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
PartitionBoundInfo boundinfo,
Datum value, bool *is_equal);
static int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
Oid *partcollation,
PartitionBoundInfo boundinfo,
PartitionRangeBound *probe, bool *is_equal);
static int partition_range_datum_bsearch(FmgrInfo *partsupfunc,
Oid *partcollation,
PartitionBoundInfo boundinfo,
int nvalues, Datum *values, bool *is_equal);
static int partition_hash_bsearch(PartitionBoundInfo boundinfo,
int modulus, int remainder);
static int get_partition_bound_num_indexes(PartitionBoundInfo b);
static int get_greatest_modulus(PartitionBoundInfo b);
static uint64 compute_hash_value(int partnatts, FmgrInfo *partsupfunc,
Datum *values, bool *isnull);
/*
* RelationBuildPartitionDesc
......@@ -765,13 +663,13 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
if (b1->strategy == PARTITION_STRATEGY_HASH)
{
int greatest_modulus = get_greatest_modulus(b1);
int greatest_modulus = get_hash_partition_greatest_modulus(b1);
/*
* If two hash partitioned tables have different greatest moduli,
* their partition schemes don't match.
*/
if (greatest_modulus != get_greatest_modulus(b2))
if (greatest_modulus != get_hash_partition_greatest_modulus(b2))
return false;
/*
......@@ -1029,7 +927,7 @@ check_new_partition_bound(char *relname, Relation parent,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("every hash partition modulus must be a factor of the next larger modulus")));
greatest_modulus = get_greatest_modulus(boundinfo);
greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
remainder = spec->remainder;
/*
......@@ -1620,7 +1518,6 @@ get_partition_qual_relid(Oid relid)
return result;
}
/* Module-local functions */
/*
* get_partition_operator
......@@ -2637,7 +2534,7 @@ get_partition_for_tuple(Relation relation, Datum *values, bool *isnull)
case PARTITION_STRATEGY_HASH:
{
PartitionBoundInfo boundinfo = partdesc->boundinfo;
int greatest_modulus = get_greatest_modulus(boundinfo);
int greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
uint64 rowHash = compute_hash_value(key->partnatts,
key->partsupfunc,
values, isnull);
......@@ -2971,7 +2868,7 @@ partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation,
* of attributes resp.
*
*/
static int32
int32
partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
Datum *tuple_datums, int n_tuple_datums)
......@@ -3005,7 +2902,7 @@ partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
* *is_equal is set to true if the bound datum at the returned index is equal
* to the input value.
*/
static int
int
partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
PartitionBoundInfo boundinfo,
Datum value, bool *is_equal)
......@@ -3048,7 +2945,7 @@ partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
* *is_equal is set to true if the range bound at the returned index is equal
* to the input range bound
*/
static int
int
partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
Oid *partcollation,
PartitionBoundInfo boundinfo,
......@@ -3093,7 +2990,7 @@ partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
* *is_equal is set to true if the range bound at the returned index is equal
* to the input tuple.
*/
static int
int
partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
PartitionBoundInfo boundinfo,
int nvalues, Datum *values, bool *is_equal)
......@@ -3136,7 +3033,7 @@ partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
* less than or equal to the given (modulus, remainder) pair or -1 if
* all of them are greater
*/
static int
int
partition_hash_bsearch(PartitionBoundInfo boundinfo,
int modulus, int remainder)
{
......@@ -3294,7 +3191,7 @@ get_partition_bound_num_indexes(PartitionBoundInfo bound)
* The number of the entries in the indexes array is same as the
* greatest modulus.
*/
num_indexes = get_greatest_modulus(bound);
num_indexes = get_hash_partition_greatest_modulus(bound);
break;
case PARTITION_STRATEGY_LIST:
......@@ -3315,14 +3212,14 @@ get_partition_bound_num_indexes(PartitionBoundInfo bound)
}
/*
* get_greatest_modulus
* get_hash_partition_greatest_modulus
*
* Returns the greatest modulus of the hash partition bound. The greatest
* modulus will be at the end of the datums array because hash partitions are
* arranged in the ascending order of their modulus and remainders.
*/
static int
get_greatest_modulus(PartitionBoundInfo bound)
int
get_hash_partition_greatest_modulus(PartitionBoundInfo bound)
{
Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH);
Assert(bound->datums && bound->ndatums > 0);
......@@ -3336,7 +3233,7 @@ get_greatest_modulus(PartitionBoundInfo bound)
*
* Compute the hash value for given not null partition key values.
*/
static uint64
uint64
compute_hash_value(int partnatts, FmgrInfo *partsupfunc,
Datum *values, bool *isnull)
{
......
......@@ -2150,6 +2150,38 @@ _copyMergeAction(const MergeAction *from)
return newnode;
}
/*
* _copyPartitionPruneStepOp
*/
static PartitionPruneStepOp *
_copyPartitionPruneStepOp(const PartitionPruneStepOp *from)
{
PartitionPruneStepOp *newnode = makeNode(PartitionPruneStepOp);
COPY_SCALAR_FIELD(step.step_id);
COPY_SCALAR_FIELD(opstrategy);
COPY_NODE_FIELD(exprs);
COPY_NODE_FIELD(cmpfns);
COPY_BITMAPSET_FIELD(nullkeys);
return newnode;
}
/*
* _copyPartitionPruneStepCombine
*/
static PartitionPruneStepCombine *
_copyPartitionPruneStepCombine(const PartitionPruneStepCombine *from)
{
PartitionPruneStepCombine *newnode = makeNode(PartitionPruneStepCombine);
COPY_SCALAR_FIELD(step.step_id);
COPY_SCALAR_FIELD(combineOp);
COPY_NODE_FIELD(source_stepids);
return newnode;
}
/* ****************************************************************
* relation.h copy functions
*
......@@ -2277,21 +2309,6 @@ _copyAppendRelInfo(const AppendRelInfo *from)
return newnode;
}
/*
* _copyPartitionedChildRelInfo
*/
static PartitionedChildRelInfo *
_copyPartitionedChildRelInfo(const PartitionedChildRelInfo *from)
{
PartitionedChildRelInfo *newnode = makeNode(PartitionedChildRelInfo);
COPY_SCALAR_FIELD(parent_relid);
COPY_NODE_FIELD(child_rels);
COPY_SCALAR_FIELD(part_cols_updated);
return newnode;
}
/*
* _copyPlaceHolderInfo
*/
......@@ -5076,6 +5093,12 @@ copyObjectImpl(const void *from)
case T_MergeAction:
retval = _copyMergeAction(from);
break;
case T_PartitionPruneStepOp:
retval = _copyPartitionPruneStepOp(from);
break;
case T_PartitionPruneStepCombine:
retval = _copyPartitionPruneStepCombine(from);
break;
/*
* RELATION NODES
......@@ -5095,9 +5118,6 @@ copyObjectImpl(const void *from)
case T_AppendRelInfo:
retval = _copyAppendRelInfo(from);
break;
case T_PartitionedChildRelInfo:
retval = _copyPartitionedChildRelInfo(from);
break;
case T_PlaceHolderInfo:
retval = _copyPlaceHolderInfo(from);
break;
......
......@@ -915,16 +915,6 @@ _equalAppendRelInfo(const AppendRelInfo *a, const AppendRelInfo *b)
return true;
}
static bool
_equalPartitionedChildRelInfo(const PartitionedChildRelInfo *a, const PartitionedChildRelInfo *b)
{
COMPARE_SCALAR_FIELD(parent_relid);
COMPARE_NODE_FIELD(child_rels);
COMPARE_SCALAR_FIELD(part_cols_updated);
return true;
}
static bool
_equalPlaceHolderInfo(const PlaceHolderInfo *a, const PlaceHolderInfo *b)
{
......@@ -3230,9 +3220,6 @@ equal(const void *a, const void *b)
case T_AppendRelInfo:
retval = _equalAppendRelInfo(a, b);
break;
case T_PartitionedChildRelInfo:
retval = _equalPartitionedChildRelInfo(a, b);
break;
case T_PlaceHolderInfo:
retval = _equalPlaceHolderInfo(a, b);
break;
......
......@@ -2156,6 +2156,17 @@ expression_tree_walker(Node *node,
return true;
}
break;
case T_PartitionPruneStepOp:
{
PartitionPruneStepOp *opstep = (PartitionPruneStepOp *) node;
if (walker((Node *) opstep->exprs, context))
return true;
}
break;
case T_PartitionPruneStepCombine:
/* no expression subnodes */
break;
case T_JoinExpr:
{
JoinExpr *join = (JoinExpr *) node;
......@@ -2958,6 +2969,20 @@ expression_tree_mutator(Node *node,
return (Node *) newnode;
}
break;
case T_PartitionPruneStepOp:
{
PartitionPruneStepOp *opstep = (PartitionPruneStepOp *) node;
PartitionPruneStepOp *newnode;
FLATCOPY(newnode, opstep, PartitionPruneStepOp);
MUTATE(newnode->exprs, opstep->exprs, List *);
return (Node *) newnode;
}
break;
case T_PartitionPruneStepCombine:
/* no expression sub-nodes */
return (Node *) copyObject(node);
case T_JoinExpr:
{
JoinExpr *join = (JoinExpr *) node;
......
......@@ -1710,6 +1710,28 @@ _outFromExpr(StringInfo str, const FromExpr *node)
WRITE_NODE_FIELD(quals);
}
static void
_outPartitionPruneStepOp(StringInfo str, const PartitionPruneStepOp *node)
{
WRITE_NODE_TYPE("PARTITIONPRUNESTEPOP");
WRITE_INT_FIELD(step.step_id);
WRITE_INT_FIELD(opstrategy);
WRITE_NODE_FIELD(exprs);
WRITE_NODE_FIELD(cmpfns);
WRITE_BITMAPSET_FIELD(nullkeys);
}
static void
_outPartitionPruneStepCombine(StringInfo str, const PartitionPruneStepCombine *node)
{
WRITE_NODE_TYPE("PARTITIONPRUNESTEPCOMBINE");
WRITE_INT_FIELD(step.step_id);
WRITE_ENUM_FIELD(combineOp, PartitionPruneCombineOp);
WRITE_NODE_FIELD(source_stepids);
}
static void
_outOnConflictExpr(StringInfo str, const OnConflictExpr *node)
{
......@@ -2261,7 +2283,6 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node)
WRITE_NODE_FIELD(full_join_clauses);
WRITE_NODE_FIELD(join_info_list);
WRITE_NODE_FIELD(append_rel_list);
WRITE_NODE_FIELD(pcinfo_list);
WRITE_NODE_FIELD(rowMarks);
WRITE_NODE_FIELD(placeholder_list);
WRITE_NODE_FIELD(fkey_list);
......@@ -2286,6 +2307,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node)
WRITE_INT_FIELD(wt_param_id);
WRITE_BITMAPSET_FIELD(curOuterRels);
WRITE_NODE_FIELD(curOuterParams);
WRITE_BOOL_FIELD(partColsUpdated);
}
static void
......@@ -2335,6 +2357,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
WRITE_NODE_FIELD(joininfo);
WRITE_BOOL_FIELD(has_eclass_joins);
WRITE_BITMAPSET_FIELD(top_parent_relids);
WRITE_NODE_FIELD(partitioned_child_rels);
}
static void
......@@ -2559,16 +2582,6 @@ _outAppendRelInfo(StringInfo str, const AppendRelInfo *node)
WRITE_OID_FIELD(parent_reloid);
}
static void
_outPartitionedChildRelInfo(StringInfo str, const PartitionedChildRelInfo *node)
{
WRITE_NODE_TYPE("PARTITIONEDCHILDRELINFO");
WRITE_UINT_FIELD(parent_relid);
WRITE_NODE_FIELD(child_rels);
WRITE_BOOL_FIELD(part_cols_updated);
}
static void
_outPlaceHolderInfo(StringInfo str, const PlaceHolderInfo *node)
{
......@@ -3973,6 +3986,12 @@ outNode(StringInfo str, const void *obj)
case T_MergeAction:
_outMergeAction(str, obj);
break;
case T_PartitionPruneStepOp:
_outPartitionPruneStepOp(str, obj);
break;
case T_PartitionPruneStepCombine:
_outPartitionPruneStepCombine(str, obj);
break;
case T_Path:
_outPath(str, obj);
break;
......@@ -4114,9 +4133,6 @@ outNode(StringInfo str, const void *obj)
case T_AppendRelInfo:
_outAppendRelInfo(str, obj);
break;
case T_PartitionedChildRelInfo:
_outPartitionedChildRelInfo(str, obj);
break;
case T_PlaceHolderInfo:
_outPlaceHolderInfo(str, obj);
break;
......
......@@ -1331,6 +1331,32 @@ _readOnConflictExpr(void)
READ_DONE();
}
static PartitionPruneStepOp *
_readPartitionPruneStepOp(void)
{
READ_LOCALS(PartitionPruneStepOp);
READ_INT_FIELD(step.step_id);
READ_INT_FIELD(opstrategy);
READ_NODE_FIELD(exprs);
READ_NODE_FIELD(cmpfns);
READ_BITMAPSET_FIELD(nullkeys);
READ_DONE();
}
static PartitionPruneStepCombine *
_readPartitionPruneStepCombine(void)
{
READ_LOCALS(PartitionPruneStepCombine);
READ_INT_FIELD(step.step_id);
READ_ENUM_FIELD(combineOp, PartitionPruneCombineOp);
READ_NODE_FIELD(source_stepids);
READ_DONE();
}
/*
* _readMergeAction
*/
......@@ -2615,6 +2641,10 @@ parseNodeString(void)
return_value = _readOnConflictExpr();
else if (MATCH("MERGEACTION", 11))
return_value = _readMergeAction();
else if (MATCH("PARTITIONPRUNESTEPOP", 20))
return_value = _readPartitionPruneStepOp();
else if (MATCH("PARTITIONPRUNESTEPCOMBINE", 25))
return_value = _readPartitionPruneStepCombine();
else if (MATCH("RTE", 3))
return_value = _readRangeTblEntry();
else if (MATCH("RANGETBLFUNCTION", 16))
......
......@@ -43,6 +43,7 @@
#include "optimizer/var.h"
#include "parser/parse_clause.h"
#include "parser/parsetree.h"
#include "partitioning/partprune.h"
#include "rewrite/rewriteManip.h"
#include "utils/lsyscache.h"
......@@ -874,12 +875,39 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
double *parent_attrsizes;
int nattrs;
ListCell *l;
Relids live_children = NULL;
bool did_pruning = false;
/* Guard against stack overflow due to overly deep inheritance tree. */
check_stack_depth();
Assert(IS_SIMPLE_REL(rel));
/*
* Initialize partitioned_child_rels to contain this RT index.
*
* Note that during the set_append_rel_pathlist() phase, we will bubble up
* the indexes of partitioned relations that appear down in the tree, so
* that when we've created Paths for all the children, the root
* partitioned table's list will contain all such indexes.
*/
if (rte->relkind == RELKIND_PARTITIONED_TABLE)
rel->partitioned_child_rels = list_make1_int(rti);
/*
* If the partitioned relation has any baserestrictinfo quals then we
* attempt to use these quals to prune away partitions that cannot
* possibly contain any tuples matching these quals. In this case we'll
* store the relids of all partitions which could possibly contain a
* matching tuple, and skip anything else in the loop below.
*/
if (rte->relkind == RELKIND_PARTITIONED_TABLE &&
rel->baserestrictinfo != NIL)
{
live_children = prune_append_rel_partitions(rel);
did_pruning = true;
}
/*
* Initialize to compute size estimates for whole append relation.
*
......@@ -1128,6 +1156,13 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
continue;
}
if (did_pruning && !bms_is_member(appinfo->child_relid, live_children))
{
/* This partition was pruned; skip it. */
set_dummy_rel_pathlist(childrel);
continue;
}
if (relation_excluded_by_constraints(root, childrel, childRTE))
{
/*
......@@ -1309,6 +1344,12 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
if (IS_DUMMY_REL(childrel))
continue;
/* Bubble up childrel's partitioned children. */
if (rel->part_scheme)
rel->partitioned_child_rels =
list_concat(rel->partitioned_child_rels,
list_copy(childrel->partitioned_child_rels));
/*
* Child is live, so add it to the live_childrels list for use below.
*/
......@@ -1346,49 +1387,55 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
List *all_child_outers = NIL;
ListCell *l;
List *partitioned_rels = NIL;
RangeTblEntry *rte;
bool build_partitioned_rels = false;
double partial_rows = -1;
if (IS_SIMPLE_REL(rel))
{
/*
* A root partition will already have a PartitionedChildRelInfo, and a
* non-root partitioned table doesn't need one, because its Append
* paths will get flattened into the parent anyway. For a subquery
* RTE, no PartitionedChildRelInfo exists; we collect all
* partitioned_rels associated with any child. (This assumes that we
* don't need to look through multiple levels of subquery RTEs; if we
* ever do, we could create a PartitionedChildRelInfo with the
* accumulated list of partitioned_rels which would then be found when
* populated our parent rel with paths. For the present, that appears
* to be unnecessary.)
*/
rte = planner_rt_fetch(rel->relid, root);
switch (rte->rtekind)
/*
* AppendPath generated for partitioned tables must record the RT indexes
* of partitioned tables that are direct or indirect children of this
* Append rel.
*
* AppendPath may be for a sub-query RTE (UNION ALL), in which case, 'rel'
* itself does not represent a partitioned relation, but the child sub-
* queries may contain references to partitioned relations. The loop
* below will look for such children and collect them in a list to be
* passed to the path creation function. (This assumes that we don't need
* to look through multiple levels of subquery RTEs; if we ever do, we
* could consider stuffing the list we generate here into sub-query RTE's
* RelOptInfo, just like we do for partitioned rels, which would be used
* when populating our parent rel with paths. For the present, that
* appears to be unnecessary.)
*/
if (rel->part_scheme != NULL)
{
if (IS_SIMPLE_REL(rel))
partitioned_rels = rel->partitioned_child_rels;
else if (IS_JOIN_REL(rel))
{
case RTE_RELATION:
if (rte->relkind == RELKIND_PARTITIONED_TABLE)
partitioned_rels =
get_partitioned_child_rels(root, rel->relid, NULL);
break;
case RTE_SUBQUERY:
build_partitioned_rels = true;
break;
default:
elog(ERROR, "unexpected rtekind: %d", (int) rte->rtekind);
int relid = -1;
/*
* For a partitioned joinrel, concatenate the component rels'
* partitioned_child_rels lists.
*/
while ((relid = bms_next_member(rel->relids, relid)) >= 0)
{
RelOptInfo *component;
Assert(relid >= 1 && relid < root->simple_rel_array_size);
component = root->simple_rel_array[relid];
Assert(component->part_scheme != NULL);
Assert(list_length(component->partitioned_child_rels) >= 1);
partitioned_rels =
list_concat(partitioned_rels,
list_copy(component->partitioned_child_rels));
}
}
Assert(list_length(partitioned_rels) >= 1);
}
else if (rel->reloptkind == RELOPT_JOINREL && rel->part_scheme)
{
/*
* Associate PartitionedChildRelInfo of the root partitioned tables
* being joined with the root partitioned join (indicated by
* RELOPT_JOINREL).
*/
partitioned_rels = get_partitioned_child_rels_for_join(root,
rel->relids);
}
else if (rel->rtekind == RTE_SUBQUERY)
build_partitioned_rels = true;
/*
* For every non-dummy child, remember the cheapest path. Also, identify
......@@ -1407,9 +1454,8 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
*/
if (build_partitioned_rels)
{
List *cprels;
List *cprels = childrel->partitioned_child_rels;
cprels = get_partitioned_child_rels(root, childrel->relid, NULL);
partitioned_rels = list_concat(partitioned_rels,
list_copy(cprels));
}
......
......@@ -40,9 +40,7 @@
#include "utils/selfuncs.h"
#define IsBooleanOpfamily(opfamily) \
((opfamily) == BOOL_BTREE_FAM_OID || (opfamily) == BOOL_HASH_FAM_OID)
/* XXX see PartCollMatchesExprColl */
#define IndexCollMatchesExprColl(idxcollation, exprcollation) \
((idxcollation) == InvalidOid || (idxcollation) == (exprcollation))
......
......@@ -616,7 +616,6 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
root->multiexpr_params = NIL;
root->eq_classes = NIL;
root->append_rel_list = NIL;
root->pcinfo_list = NIL;
root->rowMarks = NIL;
memset(root->upper_rels, 0, sizeof(root->upper_rels));
memset(root->upper_targets, 0, sizeof(root->upper_targets));
......@@ -631,6 +630,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
else
root->wt_param_id = -1;
root->non_recursive_path = NULL;
root->partColsUpdated = false;
/*
* If there is a WITH list, process each WITH query and build an initplan
......@@ -1191,12 +1191,12 @@ inheritance_planner(PlannerInfo *root)
ListCell *lc;
Index rti;
RangeTblEntry *parent_rte;
Relids partitioned_relids = NULL;
List *partitioned_rels = NIL;
PlannerInfo *parent_root;
Query *parent_parse;
Bitmapset *parent_relids = bms_make_singleton(top_parentRTindex);
PlannerInfo **parent_roots = NULL;
bool partColsUpdated = false;
Assert(parse->commandType != CMD_INSERT);
......@@ -1268,10 +1268,12 @@ inheritance_planner(PlannerInfo *root)
if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
{
nominalRelation = top_parentRTindex;
partitioned_rels = get_partitioned_child_rels(root, top_parentRTindex,
&partColsUpdated);
/* The root partitioned table is included as a child rel */
Assert(list_length(partitioned_rels) >= 1);
/*
* Root parent's RT index is always present in the partitioned_rels of
* the ModifyTable node, if one is needed at all.
*/
partitioned_relids = bms_make_singleton(top_parentRTindex);
}
/*
......@@ -1502,6 +1504,15 @@ inheritance_planner(PlannerInfo *root)
if (IS_DUMMY_PATH(subpath))
continue;
/*
* Add the current parent's RT index to the partitione_rels set if
* we're going to create the ModifyTable path for a partitioned root
* table.
*/
if (partitioned_relids)
partitioned_relids = bms_add_member(partitioned_relids,
appinfo->parent_relid);
/*
* If this is the first non-excluded child, its post-planning rtable
* becomes the initial contents of final_rtable; otherwise, append
......@@ -1603,6 +1614,21 @@ inheritance_planner(PlannerInfo *root)
else
rowMarks = root->rowMarks;
if (partitioned_relids)
{
int i;
i = -1;
while ((i = bms_next_member(partitioned_relids, i)) >= 0)
partitioned_rels = lappend_int(partitioned_rels, i);
/*
* If we're going to create ModifyTable at all, the list should
* contain at least one member, that is, the root parent's index.
*/
Assert(list_length(partitioned_rels) >= 1);
}
/* Create Path representing a ModifyTable to do the UPDATE/DELETE work */
add_path(final_rel, (Path *)
create_modifytable_path(root, final_rel,
......@@ -1610,7 +1636,7 @@ inheritance_planner(PlannerInfo *root)
parse->canSetTag,
nominalRelation,
partitioned_rels,
partColsUpdated,
root->partColsUpdated,
resultRelations,
0,
subpaths,
......@@ -6144,65 +6170,6 @@ done:
return parallel_workers;
}
/*
* get_partitioned_child_rels
* Returns a list of the RT indexes of the partitioned child relations
* with rti as the root parent RT index. Also sets
* *part_cols_updated to true if any of the root rte's updated
* columns is used in the partition key either of the relation whose RTI
* is specified or of any child relation.
*
* Note: This function might get called even for range table entries that
* are not partitioned tables; in such a case, it will simply return NIL.
*/
List *
get_partitioned_child_rels(PlannerInfo *root, Index rti,
bool *part_cols_updated)
{
List *result = NIL;
ListCell *l;
if (part_cols_updated)
*part_cols_updated = false;
foreach(l, root->pcinfo_list)
{
PartitionedChildRelInfo *pc = lfirst_node(PartitionedChildRelInfo, l);
if (pc->parent_relid == rti)
{
result = pc->child_rels;
if (part_cols_updated)
*part_cols_updated = pc->part_cols_updated;
break;
}
}
return result;
}
/*
* get_partitioned_child_rels_for_join
* Build and return a list containing the RTI of every partitioned
* relation which is a child of some rel included in the join.
*/
List *
get_partitioned_child_rels_for_join(PlannerInfo *root, Relids join_relids)
{
List *result = NIL;
ListCell *l;
foreach(l, root->pcinfo_list)
{
PartitionedChildRelInfo *pc = lfirst(l);
if (bms_is_member(pc->parent_relid, join_relids))
result = list_concat(result, list_copy(pc->child_rels));
}
return result;
}
/*
* add_paths_to_grouping_rel
*
......
......@@ -104,8 +104,7 @@ static void expand_partitioned_rtentry(PlannerInfo *root,
RangeTblEntry *parentrte,
Index parentRTindex, Relation parentrel,
PlanRowMark *top_parentrc, LOCKMODE lockmode,
List **appinfos, List **partitioned_child_rels,
bool *part_cols_updated);
List **appinfos);
static void expand_single_inheritance_child(PlannerInfo *root,
RangeTblEntry *parentrte,
Index parentRTindex, Relation parentrel,
......@@ -1587,9 +1586,6 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
/* Scan the inheritance set and expand it */
if (RelationGetPartitionDesc(oldrelation) != NULL)
{
List *partitioned_child_rels = NIL;
bool part_cols_updated = false;
Assert(rte->relkind == RELKIND_PARTITIONED_TABLE);
/*
......@@ -1598,28 +1594,7 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
* extract the partition key columns of all the partitioned tables.
*/
expand_partitioned_rtentry(root, rte, rti, oldrelation, oldrc,
lockmode, &root->append_rel_list,
&partitioned_child_rels,
&part_cols_updated);
/*
* We keep a list of objects in root, each of which maps a root
* partitioned parent RT index to the list of RT indexes of descendant
* partitioned child tables. When creating an Append or a ModifyTable
* path for the parent, we copy the child RT index list verbatim to
* the path so that it could be carried over to the executor so that
* the latter could identify the partitioned child tables.
*/
if (rte->inh && partitioned_child_rels != NIL)
{
PartitionedChildRelInfo *pcinfo;
pcinfo = makeNode(PartitionedChildRelInfo);
pcinfo->parent_relid = rti;
pcinfo->child_rels = partitioned_child_rels;
pcinfo->part_cols_updated = part_cols_updated;
root->pcinfo_list = lappend(root->pcinfo_list, pcinfo);
}
lockmode, &root->append_rel_list);
}
else
{
......@@ -1694,8 +1669,7 @@ static void
expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
Index parentRTindex, Relation parentrel,
PlanRowMark *top_parentrc, LOCKMODE lockmode,
List **appinfos, List **partitioned_child_rels,
bool *part_cols_updated)
List **appinfos)
{
int i;
RangeTblEntry *childrte;
......@@ -1717,8 +1691,8 @@ expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
* parentrte already has the root partrel's updatedCols translated to match
* the attribute ordering of parentrel.
*/
if (!*part_cols_updated)
*part_cols_updated =
if (!root->partColsUpdated)
root->partColsUpdated =
has_partition_attrs(parentrel, parentrte->updatedCols, NULL);
/* First expand the partitioned table itself. */
......@@ -1726,14 +1700,6 @@ expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
top_parentrc, parentrel,
appinfos, &childrte, &childRTindex);
/*
* The partitioned table does not have data for itself but still need to
* be locked. Update given list of partitioned children with RTI of this
* partitioned relation.
*/
*partitioned_child_rels = lappend_int(*partitioned_child_rels,
childRTindex);
for (i = 0; i < partdesc->nparts; i++)
{
Oid childOID = partdesc->oids[i];
......@@ -1760,8 +1726,7 @@ expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
if (childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
expand_partitioned_rtentry(root, childrte, childRTindex,
childrel, top_parentrc, lockmode,
appinfos, partitioned_child_rels,
part_cols_updated);
appinfos);
/* Close child relation, but keep locks */
heap_close(childrel, NoLock);
......
......@@ -1171,7 +1171,6 @@ get_relation_constraints(PlannerInfo *root,
Index varno = rel->relid;
Relation relation;
TupleConstr *constr;
List *pcqual;
/*
* We assume the relation has already been safely locked.
......@@ -1257,24 +1256,34 @@ get_relation_constraints(PlannerInfo *root,
}
}
/* Append partition predicates, if any */
pcqual = RelationGetPartitionQual(relation);
if (pcqual)
/*
* Append partition predicates, if any.
*
* For selects, partition pruning uses the parent table's partition bound
* descriptor, instead of constraint exclusion which is driven by the
* individual partition's partition constraint.
*/
if (root->parse->commandType != CMD_SELECT)
{
/*
* Run the partition quals through const-simplification similar to
* check constraints. We skip canonicalize_qual, though, because
* partition quals should be in canonical form already; also, since
* the qual is in implicit-AND format, we'd have to explicitly convert
* it to explicit-AND format and back again.
*/
pcqual = (List *) eval_const_expressions(root, (Node *) pcqual);
List *pcqual = RelationGetPartitionQual(relation);
/* Fix Vars to have the desired varno */
if (varno != 1)
ChangeVarNodes((Node *) pcqual, 1, varno, 0);
if (pcqual)
{
/*
* Run the partition quals through const-simplification similar to
* check constraints. We skip canonicalize_qual, though, because
* partition quals should be in canonical form already; also,
* since the qual is in implicit-AND format, we'd have to
* explicitly convert it to explicit-AND format and back again.
*/
pcqual = (List *) eval_const_expressions(root, (Node *) pcqual);
result = list_concat(result, pcqual);
/* Fix Vars to have the desired varno */
if (varno != 1)
ChangeVarNodes((Node *) pcqual, 1, varno, 0);
result = list_concat(result, pcqual);
}
}
heap_close(relation, NoLock);
......@@ -1869,6 +1878,7 @@ set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel,
rel->boundinfo = partition_bounds_copy(partdesc->boundinfo, partkey);
rel->nparts = partdesc->nparts;
set_baserel_partition_key_exprs(relation, rel);
rel->partition_qual = RelationGetPartitionQual(relation);
}
/*
......@@ -1881,7 +1891,8 @@ find_partition_scheme(PlannerInfo *root, Relation relation)
{
PartitionKey partkey = RelationGetPartitionKey(relation);
ListCell *lc;
int partnatts;
int partnatts,
i;
PartitionScheme part_scheme;
/* A partitioned table should have a partition key. */
......@@ -1899,7 +1910,7 @@ find_partition_scheme(PlannerInfo *root, Relation relation)
partnatts != part_scheme->partnatts)
continue;
/* Match the partition key types. */
/* Match partition key type properties. */
if (memcmp(partkey->partopfamily, part_scheme->partopfamily,
sizeof(Oid) * partnatts) != 0 ||
memcmp(partkey->partopcintype, part_scheme->partopcintype,
......@@ -1917,6 +1928,19 @@ find_partition_scheme(PlannerInfo *root, Relation relation)
Assert(memcmp(partkey->parttypbyval, part_scheme->parttypbyval,
sizeof(bool) * partnatts) == 0);
/*
* If partopfamily and partopcintype matched, must have the same
* partition comparison functions. Note that we cannot reliably
* Assert the equality of function structs themselves for they might
* be different across PartitionKey's, so just Assert for the function
* OIDs.
*/
#ifdef USE_ASSERT_CHECKING
for (i = 0; i < partkey->partnatts; i++)
Assert(partkey->partsupfunc[i].fn_oid ==
part_scheme->partsupfunc[i].fn_oid);
#endif
/* Found matching partition scheme. */
return part_scheme;
}
......@@ -1951,6 +1975,12 @@ find_partition_scheme(PlannerInfo *root, Relation relation)
memcpy(part_scheme->parttypbyval, partkey->parttypbyval,
sizeof(bool) * partnatts);
part_scheme->partsupfunc = (FmgrInfo *)
palloc(sizeof(FmgrInfo) * partnatts);
for (i = 0; i < partnatts; i++)
fmgr_info_copy(&part_scheme->partsupfunc[i], &partkey->partsupfunc[i],
CurrentMemoryContext);
/* Add the partitioning scheme to PlannerInfo. */
root->part_schemes = lappend(root->part_schemes, part_scheme);
......
......@@ -154,9 +154,11 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
rel->part_scheme = NULL;
rel->nparts = 0;
rel->boundinfo = NULL;
rel->partition_qual = NIL;
rel->part_rels = NULL;
rel->partexprs = NULL;
rel->nullable_partexprs = NULL;
rel->partitioned_child_rels = NIL;
/*
* Pass top parent's relids down the inheritance hierarchy. If the parent
......@@ -567,9 +569,11 @@ build_join_rel(PlannerInfo *root,
joinrel->part_scheme = NULL;
joinrel->nparts = 0;
joinrel->boundinfo = NULL;
joinrel->partition_qual = NIL;
joinrel->part_rels = NULL;
joinrel->partexprs = NULL;
joinrel->nullable_partexprs = NULL;
joinrel->partitioned_child_rels = NIL;
/* Compute information relevant to the foreign relations. */
set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
......@@ -734,9 +738,13 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
joinrel->has_eclass_joins = false;
joinrel->top_parent_relids = NULL;
joinrel->part_scheme = NULL;
joinrel->nparts = 0;
joinrel->boundinfo = NULL;
joinrel->partition_qual = NIL;
joinrel->part_rels = NULL;
joinrel->partexprs = NULL;
joinrel->nullable_partexprs = NULL;
joinrel->partitioned_child_rels = NIL;
joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids,
inner_rel->top_parent_relids);
......
#-------------------------------------------------------------------------
#
# Makefile--
# Makefile for backend/partitioning
#
# IDENTIFICATION
# src/backend/partitioning/Makefile
#
#-------------------------------------------------------------------------
subdir = src/backend/partitioning
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
OBJS = partprune.o
include $(top_srcdir)/src/backend/common.mk
This diff is collapsed.
......@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 201804052
#define CATALOG_VERSION_NO 201804061
#endif
......@@ -26,7 +26,7 @@
* PartitionBoundInfo encapsulates a set of partition bounds. It is usually
* associated with partitioned tables as part of its partition descriptor.
*
* The internal structure is opaque outside partition.c.
* The internal structure appears in partbounds.h.
*/
typedef struct PartitionBoundInfoData *PartitionBoundInfo;
......@@ -70,7 +70,6 @@ extern void check_default_allows_bound(Relation parent, Relation defaultRel,
PartitionBoundSpec *new_spec);
extern List *get_proposed_default_constraint(List *new_part_constaints);
/* For tuple routing */
extern int get_partition_for_tuple(Relation relation, Datum *values,
bool *isnull);
......
......@@ -53,6 +53,9 @@ typedef FormData_pg_opfamily *Form_pg_opfamily;
#define Anum_pg_opfamily_opfnamespace 3
#define Anum_pg_opfamily_opfowner 4
#define IsBooleanOpfamily(opfamily) \
((opfamily) == BOOL_BTREE_FAM_OID || (opfamily) == BOOL_HASH_FAM_OID)
/* ----------------
* initial contents of pg_opfamily
* ----------------
......
......@@ -193,6 +193,9 @@ typedef enum NodeTag
T_FromExpr,
T_OnConflictExpr,
T_IntoClause,
T_PartitionPruneStep,
T_PartitionPruneStepOp,
T_PartitionPruneStepCombine,
/*
* TAGS FOR EXPRESSION STATE NODES (execnodes.h)
......@@ -262,7 +265,6 @@ typedef enum NodeTag
T_PlaceHolderVar,
T_SpecialJoinInfo,
T_AppendRelInfo,
T_PartitionedChildRelInfo,
T_PlaceHolderInfo,
T_MinMaxAggInfo,
T_PlannerParamItem,
......
......@@ -18,6 +18,7 @@
#define PRIMNODES_H
#include "access/attnum.h"
#include "access/stratnum.h"
#include "nodes/bitmapset.h"
#include "nodes/pg_list.h"
......@@ -1506,4 +1507,78 @@ typedef struct OnConflictExpr
List *exclRelTlist; /* tlist of the EXCLUDED pseudo relation */
} OnConflictExpr;
/*
* Node types to represent a partition pruning step.
*/
/*
* The base Node type. step_id is the global identifier of a given step
* within a given pruning context.
*/
typedef struct PartitionPruneStep
{
NodeTag type;
int step_id;
} PartitionPruneStep;
/*----------
* PartitionPruneStepOp - Information to prune using a set of mutually AND'd
* OpExpr clauses
*
* This contains information extracted from up to partnatts OpExpr clauses,
* where partnatts is the number of partition key columns. 'opstrategy' is the
* strategy of the operator in the clause matched to the last partition key.
* 'exprs' contains expressions which comprise the lookup key to be passed to
* the partition bound search function. 'cmpfns' contains the OIDs of
* comparison function used to compare aforementioned expressions with
* partition bounds. Both 'exprs' and 'cmpfns' contain the same number of
* items up to partnatts items.
*
* Once we find the offset of a partition bound using the lookup key, we
* determine which partitions to include in the result based on the value of
* 'opstrategy'. For example, if it were equality, we'd return just the
* partition that would contain that key or a set of partitions if the key
* didn't consist of all partitioning columns. For non-equality strategies,
* we'd need to include other partitions as appropriate.
*
* 'nullkeys' is the set containing the offset of the partition keys (0 to
* partnatts - 1) that were matched to an IS NULL clause. This is only
* considered for hash partitioning as we need to pass which keys are null
* to the hash partition bound search function. It is never possible to
* have an expression be present in 'exprs' for a given partition key and
* the corresponding bit set in 'nullkeys'.
*----------
*/
typedef struct PartitionPruneStepOp
{
PartitionPruneStep step;
StrategyNumber opstrategy;
List *exprs;
List *cmpfns;
Bitmapset *nullkeys;
} PartitionPruneStepOp;
/*----------
* PartitionPruneStepCombine - Information to prune using a BoolExpr clause
*
* For BoolExpr clauses, we combine the set of partitions determined for each
* of its argument clauses.
*----------
*/
typedef enum PartitionPruneCombineOp
{
PARTPRUNE_COMBINE_UNION,
PARTPRUNE_COMBINE_INTERSECT
} PartitionPruneCombineOp;
typedef struct PartitionPruneStepCombine
{
PartitionPruneStep step;
PartitionPruneCombineOp combineOp;
List *source_stepids;
} PartitionPruneStepCombine;
#endif /* PRIMNODES_H */
......@@ -15,6 +15,7 @@
#define RELATION_H
#include "access/sdir.h"
#include "fmgr.h"
#include "lib/stringinfo.h"
#include "nodes/params.h"
#include "nodes/parsenodes.h"
......@@ -253,8 +254,6 @@ typedef struct PlannerInfo
List *append_rel_list; /* list of AppendRelInfos */
List *pcinfo_list; /* list of PartitionedChildRelInfos */
List *rowMarks; /* list of PlanRowMarks */
List *placeholder_list; /* list of PlaceHolderInfos */
......@@ -319,6 +318,9 @@ typedef struct PlannerInfo
/* optional private data for join_search_hook, e.g., GEQO */
void *join_search_private;
/* Does this query modify any partition key columns? */
bool partColsUpdated;
} PlannerInfo;
......@@ -356,6 +358,9 @@ typedef struct PartitionSchemeData
/* Cached information about partition key data types. */
int16 *parttyplen;
bool *parttypbyval;
/* Cached information about partition comparison functions. */
FmgrInfo *partsupfunc;
} PartitionSchemeData;
typedef struct PartitionSchemeData *PartitionScheme;
......@@ -528,11 +533,15 @@ typedef struct PartitionSchemeData *PartitionScheme;
*
* If the relation is partitioned, these fields will be set:
*
* part_scheme - Partitioning scheme of the relation
* boundinfo - Partition bounds
* nparts - Number of partitions
* part_rels - RelOptInfos for each partition
* partexprs, nullable_partexprs - Partition key expressions
* part_scheme - Partitioning scheme of the relation
* nparts - Number of partitions
* boundinfo - Partition bounds
* partition_qual - Partition constraint if not the root
* part_rels - RelOptInfos for each partition
* partexprs, nullable_partexprs - Partition key expressions
* partitioned_child_rels - RT indexes of unpruned partitions of
* relation that are partitioned tables
* themselves
*
* Note: A base relation always has only one set of partition keys, but a join
* relation may have as many sets of partition keys as the number of relations
......@@ -663,10 +672,12 @@ typedef struct RelOptInfo
PartitionScheme part_scheme; /* Partitioning scheme. */
int nparts; /* number of partitions */
struct PartitionBoundInfoData *boundinfo; /* Partition bounds */
List *partition_qual; /* partition constraint */
struct RelOptInfo **part_rels; /* Array of RelOptInfos of partitions,
* stored in the same order of bounds */
List **partexprs; /* Non-nullable partition key expressions. */
List **nullable_partexprs; /* Nullable partition key expressions. */
List *partitioned_child_rels; /* List of RT indexes. */
} RelOptInfo;
/*
......@@ -1686,7 +1697,7 @@ typedef struct ModifyTablePath
List *partitioned_rels;
bool partColsUpdated; /* some part key in hierarchy updated */
List *resultRelations; /* integer list of RT indexes */
Index mergeTargetRelation;/* RT index of merge target relation */
Index mergeTargetRelation; /* RT index of merge target relation */
List *subpaths; /* Path(s) producing source data */
List *subroots; /* per-target-table PlannerInfos */
List *withCheckOptionLists; /* per-target-table WCO lists */
......@@ -2121,27 +2132,6 @@ typedef struct AppendRelInfo
Oid parent_reloid; /* OID of parent relation */
} AppendRelInfo;
/*
* For a partitioned table, this maps its RT index to the list of RT indexes
* of the partitioned child tables in the partition tree. We need to
* separately store this information, because we do not create AppendRelInfos
* for the partitioned child tables of a parent table, since AppendRelInfos
* contain information that is unnecessary for the partitioned child tables.
* The child_rels list must contain at least one element, because the parent
* partitioned table is itself counted as a child.
*
* These structs are kept in the PlannerInfo node's pcinfo_list.
*/
typedef struct PartitionedChildRelInfo
{
NodeTag type;
Index parent_relid;
List *child_rels;
bool part_cols_updated; /* is the partition key of any of
* the partitioned tables updated? */
} PartitionedChildRelInfo;
/*
* For each distinct placeholder expression generated during planning, we
* store a PlaceHolderInfo node in the PlannerInfo node's placeholder_list.
......
......@@ -59,9 +59,4 @@ extern Expr *preprocess_phv_expression(PlannerInfo *root, Expr *expr);
extern bool plan_cluster_use_sort(Oid tableOid, Oid indexOid);
extern int plan_create_index_workers(Oid tableOid, Oid indexOid);
extern List *get_partitioned_child_rels(PlannerInfo *root, Index rti,
bool *part_cols_updated);
extern List *get_partitioned_child_rels_for_join(PlannerInfo *root,
Relids join_relids);
#endif /* PLANNER_H */
/*-------------------------------------------------------------------------
*
* partbounds.h
*
* Copyright (c) 2007-2018, PostgreSQL Global Development Group
*
* src/include/partitioning/partbounds.h
*
*-------------------------------------------------------------------------
*/
#ifndef PARTBOUNDS_H
#define PARTBOUNDS_H
#include "catalog/partition.h"
/*
* PartitionBoundInfoData encapsulates a set of partition bounds. It is
* usually associated with partitioned tables as part of its partition
* descriptor, but may also be used to represent a virtual partitioned
* table such as a partitioned joinrel within the planner.
*
* A list partition datum that is known to be NULL is never put into the
* datums array. Instead, it is tracked using the null_index field.
*
* In the case of range partitioning, ndatums will typically be far less than
* 2 * nparts, because a partition's upper bound and the next partition's lower
* bound are the same in most common cases, and we only store one of them (the
* upper bound). In case of hash partitioning, ndatums will be same as the
* number of partitions.
*
* For range and list partitioned tables, datums is an array of datum-tuples
* with key->partnatts datums each. For hash partitioned tables, it is an array
* of datum-tuples with 2 datums, modulus and remainder, corresponding to a
* given partition.
*
* The datums in datums array are arranged in increasing order as defined by
* functions qsort_partition_rbound_cmp(), qsort_partition_list_value_cmp() and
* qsort_partition_hbound_cmp() for range, list and hash partitioned tables
* respectively. For range and list partitions this simply means that the
* datums in the datums array are arranged in increasing order as defined by
* the partition key's operator classes and collations.
*
* In the case of list partitioning, the indexes array stores one entry for
* every datum, which is the index of the partition that accepts a given datum.
* In case of range partitioning, it stores one entry per distinct range
* datum, which is the index of the partition for which a given datum
* is an upper bound. In the case of hash partitioning, the number of the
* entries in the indexes array is same as the greatest modulus amongst all
* partitions. For a given partition key datum-tuple, the index of the
* partition which would accept that datum-tuple would be given by the entry
* pointed by remainder produced when hash value of the datum-tuple is divided
* by the greatest modulus.
*/
typedef struct PartitionBoundInfoData
{
char strategy; /* hash, list or range? */
int ndatums; /* Length of the datums following array */
Datum **datums;
PartitionRangeDatumKind **kind; /* The kind of each range bound datum;
* NULL for hash and list partitioned
* tables */
int *indexes; /* Partition indexes */
int null_index; /* Index of the null-accepting partition; -1
* if there isn't one */
int default_index; /* Index of the default partition; -1 if there
* isn't one */
} PartitionBoundInfoData;
#define partition_bound_accepts_nulls(bi) ((bi)->null_index != -1)
#define partition_bound_has_default(bi) ((bi)->default_index != -1)
/*
* When qsort'ing partition bounds after reading from the catalog, each bound
* is represented with one of the following structs.
*/
/* One bound of a hash partition */
typedef struct PartitionHashBound
{
int modulus;
int remainder;
int index;
} PartitionHashBound;
/* One value coming from some (index'th) list partition */
typedef struct PartitionListValue
{
int index;
Datum value;
} PartitionListValue;
/* One bound of a range partition */
typedef struct PartitionRangeBound
{
int index;
Datum *datums; /* range bound datums */
PartitionRangeDatumKind *kind; /* the kind of each datum */
bool lower; /* this is the lower (vs upper) bound */
} PartitionRangeBound;
extern int get_hash_partition_greatest_modulus(PartitionBoundInfo b);
extern int partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
PartitionBoundInfo boundinfo,
Datum value, bool *is_equal);
extern int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
Oid *partcollation,
PartitionBoundInfo boundinfo,
PartitionRangeBound *probe, bool *is_equal);
extern int partition_range_datum_bsearch(FmgrInfo *partsupfunc,
Oid *partcollation,
PartitionBoundInfo boundinfo,
int nvalues, Datum *values, bool *is_equal);
extern int partition_hash_bsearch(PartitionBoundInfo boundinfo,
int modulus, int remainder);
extern uint64 compute_hash_value(int partnatts, FmgrInfo *partsupfunc,
Datum *values, bool *isnull);
extern int32 partition_rbound_datum_cmp(FmgrInfo *partsupfunc,
Oid *partcollation,
Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
Datum *tuple_datums, int n_tuple_datums);
#endif /* PARTBOUNDS_H */
/*-------------------------------------------------------------------------
*
* partprune.h
* prototypes for partprune.c
*
*
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/partitioning/partprune.h
*
*-------------------------------------------------------------------------
*/
#ifndef PARTPRUNE_H
#define PARTPRUNE_H
#include "catalog/partition.h"
#include "nodes/relation.h"
/*
* PartitionPruneContext
*
* Information about a partitioned table needed to perform partition pruning.
*/
typedef struct PartitionPruneContext
{
/* Partition key information */
char strategy;
int partnatts;
Oid *partopfamily;
Oid *partopcintype;
Oid *partcollation;
FmgrInfo *partsupfunc;
/* Number of partitions */
int nparts;
/* Partition boundary info */
PartitionBoundInfo boundinfo;
} PartitionPruneContext;
extern Relids prune_append_rel_partitions(RelOptInfo *rel);
extern Bitmapset *get_matching_partitions(PartitionPruneContext *context,
List *pruning_steps);
extern List *gen_partprune_steps(RelOptInfo *rel, List *clauses,
bool *contradictory);
#endif /* PARTPRUNE_H */
......@@ -1951,11 +1951,13 @@ explain (costs off) select * from mcrparted where abs(b) = 5; -- scans all parti
Filter: (abs(b) = 5)
-> Seq Scan on mcrparted3
Filter: (abs(b) = 5)
-> Seq Scan on mcrparted4
Filter: (abs(b) = 5)
-> Seq Scan on mcrparted5
Filter: (abs(b) = 5)
-> Seq Scan on mcrparted_def
Filter: (abs(b) = 5)
(13 rows)
(15 rows)
explain (costs off) select * from mcrparted where a > -1; -- scans all partitions
QUERY PLAN
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment