Commit 9fdb675f authored by Alvaro Herrera's avatar Alvaro Herrera

Faster partition pruning

Add a new module backend/partitioning/partprune.c, implementing a more
sophisticated algorithm for partition pruning.  The new module uses each
partition's "boundinfo" for pruning instead of constraint exclusion,
based on an idea proposed by Robert Haas of a "pruning program": a list
of steps generated from the query quals which are run iteratively to
obtain a list of partitions that must be scanned in order to satisfy
those quals.

At present, this targets planner-time partition pruning, but there exist
further patches to apply partition pruning at execution time as well.

This commit also moves some definitions from include/catalog/partition.h
to a new file include/partitioning/partbounds.h, in an attempt to
rationalize partitioning related code.

Authors: Amit Langote, David Rowley, Dilip Kumar
Reviewers: Robert Haas, Kyotaro Horiguchi, Ashutosh Bapat, Jesper Pedersen.
Discussion: https://postgr.es/m/098b9c71-1915-1a2a-8d52-1a7a50ce79e8@lab.ntt.co.jp
parent 11523e86
...@@ -18,7 +18,8 @@ top_builddir = ../.. ...@@ -18,7 +18,8 @@ top_builddir = ../..
include $(top_builddir)/src/Makefile.global include $(top_builddir)/src/Makefile.global
SUBDIRS = access bootstrap catalog parser commands executor foreign lib libpq \ SUBDIRS = access bootstrap catalog parser commands executor foreign lib libpq \
main nodes optimizer port postmaster regex replication rewrite \ main nodes optimizer partitioning port postmaster \
regex replication rewrite \
statistics storage tcop tsearch utils $(top_builddir)/src/timezone \ statistics storage tcop tsearch utils $(top_builddir)/src/timezone \
jit jit
......
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
#include "optimizer/prep.h" #include "optimizer/prep.h"
#include "optimizer/var.h" #include "optimizer/var.h"
#include "parser/parse_coerce.h" #include "parser/parse_coerce.h"
#include "partitioning/partbounds.h"
#include "rewrite/rewriteManip.h" #include "rewrite/rewriteManip.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
#include "utils/array.h" #include "utils/array.h"
...@@ -55,89 +56,6 @@ ...@@ -55,89 +56,6 @@
#include "utils/ruleutils.h" #include "utils/ruleutils.h"
#include "utils/syscache.h" #include "utils/syscache.h"
/*
* Information about bounds of a partitioned relation
*
* A list partition datum that is known to be NULL is never put into the
* datums array. Instead, it is tracked using the null_index field.
*
* In the case of range partitioning, ndatums will typically be far less than
* 2 * nparts, because a partition's upper bound and the next partition's lower
* bound are the same in most common cases, and we only store one of them (the
* upper bound). In case of hash partitioning, ndatums will be same as the
* number of partitions.
*
* For range and list partitioned tables, datums is an array of datum-tuples
* with key->partnatts datums each. For hash partitioned tables, it is an array
* of datum-tuples with 2 datums, modulus and remainder, corresponding to a
* given partition.
*
* The datums in datums array are arranged in increasing order as defined by
* functions qsort_partition_rbound_cmp(), qsort_partition_list_value_cmp() and
* qsort_partition_hbound_cmp() for range, list and hash partitioned tables
* respectively. For range and list partitions this simply means that the
* datums in the datums array are arranged in increasing order as defined by
* the partition key's operator classes and collations.
*
* In the case of list partitioning, the indexes array stores one entry for
* every datum, which is the index of the partition that accepts a given datum.
* In case of range partitioning, it stores one entry per distinct range
* datum, which is the index of the partition for which a given datum
* is an upper bound. In the case of hash partitioning, the number of the
* entries in the indexes array is same as the greatest modulus amongst all
* partitions. For a given partition key datum-tuple, the index of the
* partition which would accept that datum-tuple would be given by the entry
* pointed by remainder produced when hash value of the datum-tuple is divided
* by the greatest modulus.
*/
typedef struct PartitionBoundInfoData
{
char strategy; /* hash, list or range? */
int ndatums; /* Length of the datums following array */
Datum **datums;
PartitionRangeDatumKind **kind; /* The kind of each range bound datum;
* NULL for hash and list partitioned
* tables */
int *indexes; /* Partition indexes */
int null_index; /* Index of the null-accepting partition; -1
* if there isn't one */
int default_index; /* Index of the default partition; -1 if there
* isn't one */
} PartitionBoundInfoData;
#define partition_bound_accepts_nulls(bi) ((bi)->null_index != -1)
#define partition_bound_has_default(bi) ((bi)->default_index != -1)
/*
* When qsort'ing partition bounds after reading from the catalog, each bound
* is represented with one of the following structs.
*/
/* One bound of a hash partition */
typedef struct PartitionHashBound
{
int modulus;
int remainder;
int index;
} PartitionHashBound;
/* One value coming from some (index'th) list partition */
typedef struct PartitionListValue
{
int index;
Datum value;
} PartitionListValue;
/* One bound of a range partition */
typedef struct PartitionRangeBound
{
int index;
Datum *datums; /* range bound datums */
PartitionRangeDatumKind *kind; /* the kind of each datum */
bool lower; /* this is the lower (vs upper) bound */
} PartitionRangeBound;
static Oid get_partition_parent_worker(Relation inhRel, Oid relid); static Oid get_partition_parent_worker(Relation inhRel, Oid relid);
static void get_partition_ancestors_worker(Relation inhRel, Oid relid, static void get_partition_ancestors_worker(Relation inhRel, Oid relid,
...@@ -173,29 +91,9 @@ static int32 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc, ...@@ -173,29 +91,9 @@ static int32 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
Oid *partcollation, Datum *datums1, Oid *partcollation, Datum *datums1,
PartitionRangeDatumKind *kind1, bool lower1, PartitionRangeDatumKind *kind1, bool lower1,
PartitionRangeBound *b2); PartitionRangeBound *b2);
static int32 partition_rbound_datum_cmp(FmgrInfo *partsupfunc,
Oid *partcollation,
Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
Datum *tuple_datums, int n_tuple_datums);
static int partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
PartitionBoundInfo boundinfo,
Datum value, bool *is_equal);
static int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
Oid *partcollation,
PartitionBoundInfo boundinfo,
PartitionRangeBound *probe, bool *is_equal);
static int partition_range_datum_bsearch(FmgrInfo *partsupfunc,
Oid *partcollation,
PartitionBoundInfo boundinfo,
int nvalues, Datum *values, bool *is_equal);
static int partition_hash_bsearch(PartitionBoundInfo boundinfo,
int modulus, int remainder);
static int get_partition_bound_num_indexes(PartitionBoundInfo b); static int get_partition_bound_num_indexes(PartitionBoundInfo b);
static int get_greatest_modulus(PartitionBoundInfo b);
static uint64 compute_hash_value(int partnatts, FmgrInfo *partsupfunc,
Datum *values, bool *isnull);
/* /*
* RelationBuildPartitionDesc * RelationBuildPartitionDesc
...@@ -765,13 +663,13 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval, ...@@ -765,13 +663,13 @@ partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
if (b1->strategy == PARTITION_STRATEGY_HASH) if (b1->strategy == PARTITION_STRATEGY_HASH)
{ {
int greatest_modulus = get_greatest_modulus(b1); int greatest_modulus = get_hash_partition_greatest_modulus(b1);
/* /*
* If two hash partitioned tables have different greatest moduli, * If two hash partitioned tables have different greatest moduli,
* their partition schemes don't match. * their partition schemes don't match.
*/ */
if (greatest_modulus != get_greatest_modulus(b2)) if (greatest_modulus != get_hash_partition_greatest_modulus(b2))
return false; return false;
/* /*
...@@ -1029,7 +927,7 @@ check_new_partition_bound(char *relname, Relation parent, ...@@ -1029,7 +927,7 @@ check_new_partition_bound(char *relname, Relation parent,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION), (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("every hash partition modulus must be a factor of the next larger modulus"))); errmsg("every hash partition modulus must be a factor of the next larger modulus")));
greatest_modulus = get_greatest_modulus(boundinfo); greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
remainder = spec->remainder; remainder = spec->remainder;
/* /*
...@@ -1620,7 +1518,6 @@ get_partition_qual_relid(Oid relid) ...@@ -1620,7 +1518,6 @@ get_partition_qual_relid(Oid relid)
return result; return result;
} }
/* Module-local functions */
/* /*
* get_partition_operator * get_partition_operator
...@@ -2637,7 +2534,7 @@ get_partition_for_tuple(Relation relation, Datum *values, bool *isnull) ...@@ -2637,7 +2534,7 @@ get_partition_for_tuple(Relation relation, Datum *values, bool *isnull)
case PARTITION_STRATEGY_HASH: case PARTITION_STRATEGY_HASH:
{ {
PartitionBoundInfo boundinfo = partdesc->boundinfo; PartitionBoundInfo boundinfo = partdesc->boundinfo;
int greatest_modulus = get_greatest_modulus(boundinfo); int greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
uint64 rowHash = compute_hash_value(key->partnatts, uint64 rowHash = compute_hash_value(key->partnatts,
key->partsupfunc, key->partsupfunc,
values, isnull); values, isnull);
...@@ -2971,7 +2868,7 @@ partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation, ...@@ -2971,7 +2868,7 @@ partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation,
* of attributes resp. * of attributes resp.
* *
*/ */
static int32 int32
partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation, partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
Datum *rb_datums, PartitionRangeDatumKind *rb_kind, Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
Datum *tuple_datums, int n_tuple_datums) Datum *tuple_datums, int n_tuple_datums)
...@@ -3005,7 +2902,7 @@ partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation, ...@@ -3005,7 +2902,7 @@ partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
* *is_equal is set to true if the bound datum at the returned index is equal * *is_equal is set to true if the bound datum at the returned index is equal
* to the input value. * to the input value.
*/ */
static int int
partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
PartitionBoundInfo boundinfo, PartitionBoundInfo boundinfo,
Datum value, bool *is_equal) Datum value, bool *is_equal)
...@@ -3048,7 +2945,7 @@ partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, ...@@ -3048,7 +2945,7 @@ partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
* *is_equal is set to true if the range bound at the returned index is equal * *is_equal is set to true if the range bound at the returned index is equal
* to the input range bound * to the input range bound
*/ */
static int int
partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc, partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
Oid *partcollation, Oid *partcollation,
PartitionBoundInfo boundinfo, PartitionBoundInfo boundinfo,
...@@ -3093,7 +2990,7 @@ partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc, ...@@ -3093,7 +2990,7 @@ partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
* *is_equal is set to true if the range bound at the returned index is equal * *is_equal is set to true if the range bound at the returned index is equal
* to the input tuple. * to the input tuple.
*/ */
static int int
partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
PartitionBoundInfo boundinfo, PartitionBoundInfo boundinfo,
int nvalues, Datum *values, bool *is_equal) int nvalues, Datum *values, bool *is_equal)
...@@ -3136,7 +3033,7 @@ partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation, ...@@ -3136,7 +3033,7 @@ partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
* less than or equal to the given (modulus, remainder) pair or -1 if * less than or equal to the given (modulus, remainder) pair or -1 if
* all of them are greater * all of them are greater
*/ */
static int int
partition_hash_bsearch(PartitionBoundInfo boundinfo, partition_hash_bsearch(PartitionBoundInfo boundinfo,
int modulus, int remainder) int modulus, int remainder)
{ {
...@@ -3294,7 +3191,7 @@ get_partition_bound_num_indexes(PartitionBoundInfo bound) ...@@ -3294,7 +3191,7 @@ get_partition_bound_num_indexes(PartitionBoundInfo bound)
* The number of the entries in the indexes array is same as the * The number of the entries in the indexes array is same as the
* greatest modulus. * greatest modulus.
*/ */
num_indexes = get_greatest_modulus(bound); num_indexes = get_hash_partition_greatest_modulus(bound);
break; break;
case PARTITION_STRATEGY_LIST: case PARTITION_STRATEGY_LIST:
...@@ -3315,14 +3212,14 @@ get_partition_bound_num_indexes(PartitionBoundInfo bound) ...@@ -3315,14 +3212,14 @@ get_partition_bound_num_indexes(PartitionBoundInfo bound)
} }
/* /*
* get_greatest_modulus * get_hash_partition_greatest_modulus
* *
* Returns the greatest modulus of the hash partition bound. The greatest * Returns the greatest modulus of the hash partition bound. The greatest
* modulus will be at the end of the datums array because hash partitions are * modulus will be at the end of the datums array because hash partitions are
* arranged in the ascending order of their modulus and remainders. * arranged in the ascending order of their modulus and remainders.
*/ */
static int int
get_greatest_modulus(PartitionBoundInfo bound) get_hash_partition_greatest_modulus(PartitionBoundInfo bound)
{ {
Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH); Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH);
Assert(bound->datums && bound->ndatums > 0); Assert(bound->datums && bound->ndatums > 0);
...@@ -3336,7 +3233,7 @@ get_greatest_modulus(PartitionBoundInfo bound) ...@@ -3336,7 +3233,7 @@ get_greatest_modulus(PartitionBoundInfo bound)
* *
* Compute the hash value for given not null partition key values. * Compute the hash value for given not null partition key values.
*/ */
static uint64 uint64
compute_hash_value(int partnatts, FmgrInfo *partsupfunc, compute_hash_value(int partnatts, FmgrInfo *partsupfunc,
Datum *values, bool *isnull) Datum *values, bool *isnull)
{ {
......
...@@ -2150,6 +2150,38 @@ _copyMergeAction(const MergeAction *from) ...@@ -2150,6 +2150,38 @@ _copyMergeAction(const MergeAction *from)
return newnode; return newnode;
} }
/*
* _copyPartitionPruneStepOp
*/
static PartitionPruneStepOp *
_copyPartitionPruneStepOp(const PartitionPruneStepOp *from)
{
PartitionPruneStepOp *newnode = makeNode(PartitionPruneStepOp);
COPY_SCALAR_FIELD(step.step_id);
COPY_SCALAR_FIELD(opstrategy);
COPY_NODE_FIELD(exprs);
COPY_NODE_FIELD(cmpfns);
COPY_BITMAPSET_FIELD(nullkeys);
return newnode;
}
/*
* _copyPartitionPruneStepCombine
*/
static PartitionPruneStepCombine *
_copyPartitionPruneStepCombine(const PartitionPruneStepCombine *from)
{
PartitionPruneStepCombine *newnode = makeNode(PartitionPruneStepCombine);
COPY_SCALAR_FIELD(step.step_id);
COPY_SCALAR_FIELD(combineOp);
COPY_NODE_FIELD(source_stepids);
return newnode;
}
/* **************************************************************** /* ****************************************************************
* relation.h copy functions * relation.h copy functions
* *
...@@ -2277,21 +2309,6 @@ _copyAppendRelInfo(const AppendRelInfo *from) ...@@ -2277,21 +2309,6 @@ _copyAppendRelInfo(const AppendRelInfo *from)
return newnode; return newnode;
} }
/*
* _copyPartitionedChildRelInfo
*/
static PartitionedChildRelInfo *
_copyPartitionedChildRelInfo(const PartitionedChildRelInfo *from)
{
PartitionedChildRelInfo *newnode = makeNode(PartitionedChildRelInfo);
COPY_SCALAR_FIELD(parent_relid);
COPY_NODE_FIELD(child_rels);
COPY_SCALAR_FIELD(part_cols_updated);
return newnode;
}
/* /*
* _copyPlaceHolderInfo * _copyPlaceHolderInfo
*/ */
...@@ -5076,6 +5093,12 @@ copyObjectImpl(const void *from) ...@@ -5076,6 +5093,12 @@ copyObjectImpl(const void *from)
case T_MergeAction: case T_MergeAction:
retval = _copyMergeAction(from); retval = _copyMergeAction(from);
break; break;
case T_PartitionPruneStepOp:
retval = _copyPartitionPruneStepOp(from);
break;
case T_PartitionPruneStepCombine:
retval = _copyPartitionPruneStepCombine(from);
break;
/* /*
* RELATION NODES * RELATION NODES
...@@ -5095,9 +5118,6 @@ copyObjectImpl(const void *from) ...@@ -5095,9 +5118,6 @@ copyObjectImpl(const void *from)
case T_AppendRelInfo: case T_AppendRelInfo:
retval = _copyAppendRelInfo(from); retval = _copyAppendRelInfo(from);
break; break;
case T_PartitionedChildRelInfo:
retval = _copyPartitionedChildRelInfo(from);
break;
case T_PlaceHolderInfo: case T_PlaceHolderInfo:
retval = _copyPlaceHolderInfo(from); retval = _copyPlaceHolderInfo(from);
break; break;
......
...@@ -915,16 +915,6 @@ _equalAppendRelInfo(const AppendRelInfo *a, const AppendRelInfo *b) ...@@ -915,16 +915,6 @@ _equalAppendRelInfo(const AppendRelInfo *a, const AppendRelInfo *b)
return true; return true;
} }
static bool
_equalPartitionedChildRelInfo(const PartitionedChildRelInfo *a, const PartitionedChildRelInfo *b)
{
COMPARE_SCALAR_FIELD(parent_relid);
COMPARE_NODE_FIELD(child_rels);
COMPARE_SCALAR_FIELD(part_cols_updated);
return true;
}
static bool static bool
_equalPlaceHolderInfo(const PlaceHolderInfo *a, const PlaceHolderInfo *b) _equalPlaceHolderInfo(const PlaceHolderInfo *a, const PlaceHolderInfo *b)
{ {
...@@ -3230,9 +3220,6 @@ equal(const void *a, const void *b) ...@@ -3230,9 +3220,6 @@ equal(const void *a, const void *b)
case T_AppendRelInfo: case T_AppendRelInfo:
retval = _equalAppendRelInfo(a, b); retval = _equalAppendRelInfo(a, b);
break; break;
case T_PartitionedChildRelInfo:
retval = _equalPartitionedChildRelInfo(a, b);
break;
case T_PlaceHolderInfo: case T_PlaceHolderInfo:
retval = _equalPlaceHolderInfo(a, b); retval = _equalPlaceHolderInfo(a, b);
break; break;
......
...@@ -2156,6 +2156,17 @@ expression_tree_walker(Node *node, ...@@ -2156,6 +2156,17 @@ expression_tree_walker(Node *node,
return true; return true;
} }
break; break;
case T_PartitionPruneStepOp:
{
PartitionPruneStepOp *opstep = (PartitionPruneStepOp *) node;
if (walker((Node *) opstep->exprs, context))
return true;
}
break;
case T_PartitionPruneStepCombine:
/* no expression subnodes */
break;
case T_JoinExpr: case T_JoinExpr:
{ {
JoinExpr *join = (JoinExpr *) node; JoinExpr *join = (JoinExpr *) node;
...@@ -2958,6 +2969,20 @@ expression_tree_mutator(Node *node, ...@@ -2958,6 +2969,20 @@ expression_tree_mutator(Node *node,
return (Node *) newnode; return (Node *) newnode;
} }
break; break;
case T_PartitionPruneStepOp:
{
PartitionPruneStepOp *opstep = (PartitionPruneStepOp *) node;
PartitionPruneStepOp *newnode;
FLATCOPY(newnode, opstep, PartitionPruneStepOp);
MUTATE(newnode->exprs, opstep->exprs, List *);
return (Node *) newnode;
}
break;
case T_PartitionPruneStepCombine:
/* no expression sub-nodes */
return (Node *) copyObject(node);
case T_JoinExpr: case T_JoinExpr:
{ {
JoinExpr *join = (JoinExpr *) node; JoinExpr *join = (JoinExpr *) node;
......
...@@ -1710,6 +1710,28 @@ _outFromExpr(StringInfo str, const FromExpr *node) ...@@ -1710,6 +1710,28 @@ _outFromExpr(StringInfo str, const FromExpr *node)
WRITE_NODE_FIELD(quals); WRITE_NODE_FIELD(quals);
} }
static void
_outPartitionPruneStepOp(StringInfo str, const PartitionPruneStepOp *node)
{
WRITE_NODE_TYPE("PARTITIONPRUNESTEPOP");
WRITE_INT_FIELD(step.step_id);
WRITE_INT_FIELD(opstrategy);
WRITE_NODE_FIELD(exprs);
WRITE_NODE_FIELD(cmpfns);
WRITE_BITMAPSET_FIELD(nullkeys);
}
static void
_outPartitionPruneStepCombine(StringInfo str, const PartitionPruneStepCombine *node)
{
WRITE_NODE_TYPE("PARTITIONPRUNESTEPCOMBINE");
WRITE_INT_FIELD(step.step_id);
WRITE_ENUM_FIELD(combineOp, PartitionPruneCombineOp);
WRITE_NODE_FIELD(source_stepids);
}
static void static void
_outOnConflictExpr(StringInfo str, const OnConflictExpr *node) _outOnConflictExpr(StringInfo str, const OnConflictExpr *node)
{ {
...@@ -2261,7 +2283,6 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node) ...@@ -2261,7 +2283,6 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node)
WRITE_NODE_FIELD(full_join_clauses); WRITE_NODE_FIELD(full_join_clauses);
WRITE_NODE_FIELD(join_info_list); WRITE_NODE_FIELD(join_info_list);
WRITE_NODE_FIELD(append_rel_list); WRITE_NODE_FIELD(append_rel_list);
WRITE_NODE_FIELD(pcinfo_list);
WRITE_NODE_FIELD(rowMarks); WRITE_NODE_FIELD(rowMarks);
WRITE_NODE_FIELD(placeholder_list); WRITE_NODE_FIELD(placeholder_list);
WRITE_NODE_FIELD(fkey_list); WRITE_NODE_FIELD(fkey_list);
...@@ -2286,6 +2307,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node) ...@@ -2286,6 +2307,7 @@ _outPlannerInfo(StringInfo str, const PlannerInfo *node)
WRITE_INT_FIELD(wt_param_id); WRITE_INT_FIELD(wt_param_id);
WRITE_BITMAPSET_FIELD(curOuterRels); WRITE_BITMAPSET_FIELD(curOuterRels);
WRITE_NODE_FIELD(curOuterParams); WRITE_NODE_FIELD(curOuterParams);
WRITE_BOOL_FIELD(partColsUpdated);
} }
static void static void
...@@ -2335,6 +2357,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node) ...@@ -2335,6 +2357,7 @@ _outRelOptInfo(StringInfo str, const RelOptInfo *node)
WRITE_NODE_FIELD(joininfo); WRITE_NODE_FIELD(joininfo);
WRITE_BOOL_FIELD(has_eclass_joins); WRITE_BOOL_FIELD(has_eclass_joins);
WRITE_BITMAPSET_FIELD(top_parent_relids); WRITE_BITMAPSET_FIELD(top_parent_relids);
WRITE_NODE_FIELD(partitioned_child_rels);
} }
static void static void
...@@ -2559,16 +2582,6 @@ _outAppendRelInfo(StringInfo str, const AppendRelInfo *node) ...@@ -2559,16 +2582,6 @@ _outAppendRelInfo(StringInfo str, const AppendRelInfo *node)
WRITE_OID_FIELD(parent_reloid); WRITE_OID_FIELD(parent_reloid);
} }
static void
_outPartitionedChildRelInfo(StringInfo str, const PartitionedChildRelInfo *node)
{
WRITE_NODE_TYPE("PARTITIONEDCHILDRELINFO");
WRITE_UINT_FIELD(parent_relid);
WRITE_NODE_FIELD(child_rels);
WRITE_BOOL_FIELD(part_cols_updated);
}
static void static void
_outPlaceHolderInfo(StringInfo str, const PlaceHolderInfo *node) _outPlaceHolderInfo(StringInfo str, const PlaceHolderInfo *node)
{ {
...@@ -3973,6 +3986,12 @@ outNode(StringInfo str, const void *obj) ...@@ -3973,6 +3986,12 @@ outNode(StringInfo str, const void *obj)
case T_MergeAction: case T_MergeAction:
_outMergeAction(str, obj); _outMergeAction(str, obj);
break; break;
case T_PartitionPruneStepOp:
_outPartitionPruneStepOp(str, obj);
break;
case T_PartitionPruneStepCombine:
_outPartitionPruneStepCombine(str, obj);
break;
case T_Path: case T_Path:
_outPath(str, obj); _outPath(str, obj);
break; break;
...@@ -4114,9 +4133,6 @@ outNode(StringInfo str, const void *obj) ...@@ -4114,9 +4133,6 @@ outNode(StringInfo str, const void *obj)
case T_AppendRelInfo: case T_AppendRelInfo:
_outAppendRelInfo(str, obj); _outAppendRelInfo(str, obj);
break; break;
case T_PartitionedChildRelInfo:
_outPartitionedChildRelInfo(str, obj);
break;
case T_PlaceHolderInfo: case T_PlaceHolderInfo:
_outPlaceHolderInfo(str, obj); _outPlaceHolderInfo(str, obj);
break; break;
......
...@@ -1331,6 +1331,32 @@ _readOnConflictExpr(void) ...@@ -1331,6 +1331,32 @@ _readOnConflictExpr(void)
READ_DONE(); READ_DONE();
} }
static PartitionPruneStepOp *
_readPartitionPruneStepOp(void)
{
READ_LOCALS(PartitionPruneStepOp);
READ_INT_FIELD(step.step_id);
READ_INT_FIELD(opstrategy);
READ_NODE_FIELD(exprs);
READ_NODE_FIELD(cmpfns);
READ_BITMAPSET_FIELD(nullkeys);
READ_DONE();
}
static PartitionPruneStepCombine *
_readPartitionPruneStepCombine(void)
{
READ_LOCALS(PartitionPruneStepCombine);
READ_INT_FIELD(step.step_id);
READ_ENUM_FIELD(combineOp, PartitionPruneCombineOp);
READ_NODE_FIELD(source_stepids);
READ_DONE();
}
/* /*
* _readMergeAction * _readMergeAction
*/ */
...@@ -2615,6 +2641,10 @@ parseNodeString(void) ...@@ -2615,6 +2641,10 @@ parseNodeString(void)
return_value = _readOnConflictExpr(); return_value = _readOnConflictExpr();
else if (MATCH("MERGEACTION", 11)) else if (MATCH("MERGEACTION", 11))
return_value = _readMergeAction(); return_value = _readMergeAction();
else if (MATCH("PARTITIONPRUNESTEPOP", 20))
return_value = _readPartitionPruneStepOp();
else if (MATCH("PARTITIONPRUNESTEPCOMBINE", 25))
return_value = _readPartitionPruneStepCombine();
else if (MATCH("RTE", 3)) else if (MATCH("RTE", 3))
return_value = _readRangeTblEntry(); return_value = _readRangeTblEntry();
else if (MATCH("RANGETBLFUNCTION", 16)) else if (MATCH("RANGETBLFUNCTION", 16))
......
...@@ -43,6 +43,7 @@ ...@@ -43,6 +43,7 @@
#include "optimizer/var.h" #include "optimizer/var.h"
#include "parser/parse_clause.h" #include "parser/parse_clause.h"
#include "parser/parsetree.h" #include "parser/parsetree.h"
#include "partitioning/partprune.h"
#include "rewrite/rewriteManip.h" #include "rewrite/rewriteManip.h"
#include "utils/lsyscache.h" #include "utils/lsyscache.h"
...@@ -874,12 +875,39 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, ...@@ -874,12 +875,39 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
double *parent_attrsizes; double *parent_attrsizes;
int nattrs; int nattrs;
ListCell *l; ListCell *l;
Relids live_children = NULL;
bool did_pruning = false;
/* Guard against stack overflow due to overly deep inheritance tree. */ /* Guard against stack overflow due to overly deep inheritance tree. */
check_stack_depth(); check_stack_depth();
Assert(IS_SIMPLE_REL(rel)); Assert(IS_SIMPLE_REL(rel));
/*
* Initialize partitioned_child_rels to contain this RT index.
*
* Note that during the set_append_rel_pathlist() phase, we will bubble up
* the indexes of partitioned relations that appear down in the tree, so
* that when we've created Paths for all the children, the root
* partitioned table's list will contain all such indexes.
*/
if (rte->relkind == RELKIND_PARTITIONED_TABLE)
rel->partitioned_child_rels = list_make1_int(rti);
/*
* If the partitioned relation has any baserestrictinfo quals then we
* attempt to use these quals to prune away partitions that cannot
* possibly contain any tuples matching these quals. In this case we'll
* store the relids of all partitions which could possibly contain a
* matching tuple, and skip anything else in the loop below.
*/
if (rte->relkind == RELKIND_PARTITIONED_TABLE &&
rel->baserestrictinfo != NIL)
{
live_children = prune_append_rel_partitions(rel);
did_pruning = true;
}
/* /*
* Initialize to compute size estimates for whole append relation. * Initialize to compute size estimates for whole append relation.
* *
...@@ -1128,6 +1156,13 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel, ...@@ -1128,6 +1156,13 @@ set_append_rel_size(PlannerInfo *root, RelOptInfo *rel,
continue; continue;
} }
if (did_pruning && !bms_is_member(appinfo->child_relid, live_children))
{
/* This partition was pruned; skip it. */
set_dummy_rel_pathlist(childrel);
continue;
}
if (relation_excluded_by_constraints(root, childrel, childRTE)) if (relation_excluded_by_constraints(root, childrel, childRTE))
{ {
/* /*
...@@ -1309,6 +1344,12 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, ...@@ -1309,6 +1344,12 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
if (IS_DUMMY_REL(childrel)) if (IS_DUMMY_REL(childrel))
continue; continue;
/* Bubble up childrel's partitioned children. */
if (rel->part_scheme)
rel->partitioned_child_rels =
list_concat(rel->partitioned_child_rels,
list_copy(childrel->partitioned_child_rels));
/* /*
* Child is live, so add it to the live_childrels list for use below. * Child is live, so add it to the live_childrels list for use below.
*/ */
...@@ -1346,49 +1387,55 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, ...@@ -1346,49 +1387,55 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
List *all_child_outers = NIL; List *all_child_outers = NIL;
ListCell *l; ListCell *l;
List *partitioned_rels = NIL; List *partitioned_rels = NIL;
RangeTblEntry *rte;
bool build_partitioned_rels = false; bool build_partitioned_rels = false;
double partial_rows = -1; double partial_rows = -1;
/*
* AppendPath generated for partitioned tables must record the RT indexes
* of partitioned tables that are direct or indirect children of this
* Append rel.
*
* AppendPath may be for a sub-query RTE (UNION ALL), in which case, 'rel'
* itself does not represent a partitioned relation, but the child sub-
* queries may contain references to partitioned relations. The loop
* below will look for such children and collect them in a list to be
* passed to the path creation function. (This assumes that we don't need
* to look through multiple levels of subquery RTEs; if we ever do, we
* could consider stuffing the list we generate here into sub-query RTE's
* RelOptInfo, just like we do for partitioned rels, which would be used
* when populating our parent rel with paths. For the present, that
* appears to be unnecessary.)
*/
if (rel->part_scheme != NULL)
{
if (IS_SIMPLE_REL(rel)) if (IS_SIMPLE_REL(rel))
partitioned_rels = rel->partitioned_child_rels;
else if (IS_JOIN_REL(rel))
{ {
int relid = -1;
/* /*
* A root partition will already have a PartitionedChildRelInfo, and a * For a partitioned joinrel, concatenate the component rels'
* non-root partitioned table doesn't need one, because its Append * partitioned_child_rels lists.
* paths will get flattened into the parent anyway. For a subquery
* RTE, no PartitionedChildRelInfo exists; we collect all
* partitioned_rels associated with any child. (This assumes that we
* don't need to look through multiple levels of subquery RTEs; if we
* ever do, we could create a PartitionedChildRelInfo with the
* accumulated list of partitioned_rels which would then be found when
* populated our parent rel with paths. For the present, that appears
* to be unnecessary.)
*/ */
rte = planner_rt_fetch(rel->relid, root); while ((relid = bms_next_member(rel->relids, relid)) >= 0)
switch (rte->rtekind)
{ {
case RTE_RELATION: RelOptInfo *component;
if (rte->relkind == RELKIND_PARTITIONED_TABLE)
Assert(relid >= 1 && relid < root->simple_rel_array_size);
component = root->simple_rel_array[relid];
Assert(component->part_scheme != NULL);
Assert(list_length(component->partitioned_child_rels) >= 1);
partitioned_rels = partitioned_rels =
get_partitioned_child_rels(root, rel->relid, NULL); list_concat(partitioned_rels,
break; list_copy(component->partitioned_child_rels));
case RTE_SUBQUERY:
build_partitioned_rels = true;
break;
default:
elog(ERROR, "unexpected rtekind: %d", (int) rte->rtekind);
} }
} }
else if (rel->reloptkind == RELOPT_JOINREL && rel->part_scheme)
{ Assert(list_length(partitioned_rels) >= 1);
/*
* Associate PartitionedChildRelInfo of the root partitioned tables
* being joined with the root partitioned join (indicated by
* RELOPT_JOINREL).
*/
partitioned_rels = get_partitioned_child_rels_for_join(root,
rel->relids);
} }
else if (rel->rtekind == RTE_SUBQUERY)
build_partitioned_rels = true;
/* /*
* For every non-dummy child, remember the cheapest path. Also, identify * For every non-dummy child, remember the cheapest path. Also, identify
...@@ -1407,9 +1454,8 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, ...@@ -1407,9 +1454,8 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel,
*/ */
if (build_partitioned_rels) if (build_partitioned_rels)
{ {
List *cprels; List *cprels = childrel->partitioned_child_rels;
cprels = get_partitioned_child_rels(root, childrel->relid, NULL);
partitioned_rels = list_concat(partitioned_rels, partitioned_rels = list_concat(partitioned_rels,
list_copy(cprels)); list_copy(cprels));
} }
......
...@@ -40,9 +40,7 @@ ...@@ -40,9 +40,7 @@
#include "utils/selfuncs.h" #include "utils/selfuncs.h"
#define IsBooleanOpfamily(opfamily) \ /* XXX see PartCollMatchesExprColl */
((opfamily) == BOOL_BTREE_FAM_OID || (opfamily) == BOOL_HASH_FAM_OID)
#define IndexCollMatchesExprColl(idxcollation, exprcollation) \ #define IndexCollMatchesExprColl(idxcollation, exprcollation) \
((idxcollation) == InvalidOid || (idxcollation) == (exprcollation)) ((idxcollation) == InvalidOid || (idxcollation) == (exprcollation))
......
...@@ -616,7 +616,6 @@ subquery_planner(PlannerGlobal *glob, Query *parse, ...@@ -616,7 +616,6 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
root->multiexpr_params = NIL; root->multiexpr_params = NIL;
root->eq_classes = NIL; root->eq_classes = NIL;
root->append_rel_list = NIL; root->append_rel_list = NIL;
root->pcinfo_list = NIL;
root->rowMarks = NIL; root->rowMarks = NIL;
memset(root->upper_rels, 0, sizeof(root->upper_rels)); memset(root->upper_rels, 0, sizeof(root->upper_rels));
memset(root->upper_targets, 0, sizeof(root->upper_targets)); memset(root->upper_targets, 0, sizeof(root->upper_targets));
...@@ -631,6 +630,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse, ...@@ -631,6 +630,7 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
else else
root->wt_param_id = -1; root->wt_param_id = -1;
root->non_recursive_path = NULL; root->non_recursive_path = NULL;
root->partColsUpdated = false;
/* /*
* If there is a WITH list, process each WITH query and build an initplan * If there is a WITH list, process each WITH query and build an initplan
...@@ -1191,12 +1191,12 @@ inheritance_planner(PlannerInfo *root) ...@@ -1191,12 +1191,12 @@ inheritance_planner(PlannerInfo *root)
ListCell *lc; ListCell *lc;
Index rti; Index rti;
RangeTblEntry *parent_rte; RangeTblEntry *parent_rte;
Relids partitioned_relids = NULL;
List *partitioned_rels = NIL; List *partitioned_rels = NIL;
PlannerInfo *parent_root; PlannerInfo *parent_root;
Query *parent_parse; Query *parent_parse;
Bitmapset *parent_relids = bms_make_singleton(top_parentRTindex); Bitmapset *parent_relids = bms_make_singleton(top_parentRTindex);
PlannerInfo **parent_roots = NULL; PlannerInfo **parent_roots = NULL;
bool partColsUpdated = false;
Assert(parse->commandType != CMD_INSERT); Assert(parse->commandType != CMD_INSERT);
...@@ -1268,10 +1268,12 @@ inheritance_planner(PlannerInfo *root) ...@@ -1268,10 +1268,12 @@ inheritance_planner(PlannerInfo *root)
if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE) if (parent_rte->relkind == RELKIND_PARTITIONED_TABLE)
{ {
nominalRelation = top_parentRTindex; nominalRelation = top_parentRTindex;
partitioned_rels = get_partitioned_child_rels(root, top_parentRTindex,
&partColsUpdated); /*
/* The root partitioned table is included as a child rel */ * Root parent's RT index is always present in the partitioned_rels of
Assert(list_length(partitioned_rels) >= 1); * the ModifyTable node, if one is needed at all.
*/
partitioned_relids = bms_make_singleton(top_parentRTindex);
} }
/* /*
...@@ -1502,6 +1504,15 @@ inheritance_planner(PlannerInfo *root) ...@@ -1502,6 +1504,15 @@ inheritance_planner(PlannerInfo *root)
if (IS_DUMMY_PATH(subpath)) if (IS_DUMMY_PATH(subpath))
continue; continue;
/*
* Add the current parent's RT index to the partitione_rels set if
* we're going to create the ModifyTable path for a partitioned root
* table.
*/
if (partitioned_relids)
partitioned_relids = bms_add_member(partitioned_relids,
appinfo->parent_relid);
/* /*
* If this is the first non-excluded child, its post-planning rtable * If this is the first non-excluded child, its post-planning rtable
* becomes the initial contents of final_rtable; otherwise, append * becomes the initial contents of final_rtable; otherwise, append
...@@ -1603,6 +1614,21 @@ inheritance_planner(PlannerInfo *root) ...@@ -1603,6 +1614,21 @@ inheritance_planner(PlannerInfo *root)
else else
rowMarks = root->rowMarks; rowMarks = root->rowMarks;
if (partitioned_relids)
{
int i;
i = -1;
while ((i = bms_next_member(partitioned_relids, i)) >= 0)
partitioned_rels = lappend_int(partitioned_rels, i);
/*
* If we're going to create ModifyTable at all, the list should
* contain at least one member, that is, the root parent's index.
*/
Assert(list_length(partitioned_rels) >= 1);
}
/* Create Path representing a ModifyTable to do the UPDATE/DELETE work */ /* Create Path representing a ModifyTable to do the UPDATE/DELETE work */
add_path(final_rel, (Path *) add_path(final_rel, (Path *)
create_modifytable_path(root, final_rel, create_modifytable_path(root, final_rel,
...@@ -1610,7 +1636,7 @@ inheritance_planner(PlannerInfo *root) ...@@ -1610,7 +1636,7 @@ inheritance_planner(PlannerInfo *root)
parse->canSetTag, parse->canSetTag,
nominalRelation, nominalRelation,
partitioned_rels, partitioned_rels,
partColsUpdated, root->partColsUpdated,
resultRelations, resultRelations,
0, 0,
subpaths, subpaths,
...@@ -6144,65 +6170,6 @@ done: ...@@ -6144,65 +6170,6 @@ done:
return parallel_workers; return parallel_workers;
} }
/*
* get_partitioned_child_rels
* Returns a list of the RT indexes of the partitioned child relations
* with rti as the root parent RT index. Also sets
* *part_cols_updated to true if any of the root rte's updated
* columns is used in the partition key either of the relation whose RTI
* is specified or of any child relation.
*
* Note: This function might get called even for range table entries that
* are not partitioned tables; in such a case, it will simply return NIL.
*/
List *
get_partitioned_child_rels(PlannerInfo *root, Index rti,
bool *part_cols_updated)
{
List *result = NIL;
ListCell *l;
if (part_cols_updated)
*part_cols_updated = false;
foreach(l, root->pcinfo_list)
{
PartitionedChildRelInfo *pc = lfirst_node(PartitionedChildRelInfo, l);
if (pc->parent_relid == rti)
{
result = pc->child_rels;
if (part_cols_updated)
*part_cols_updated = pc->part_cols_updated;
break;
}
}
return result;
}
/*
* get_partitioned_child_rels_for_join
* Build and return a list containing the RTI of every partitioned
* relation which is a child of some rel included in the join.
*/
List *
get_partitioned_child_rels_for_join(PlannerInfo *root, Relids join_relids)
{
List *result = NIL;
ListCell *l;
foreach(l, root->pcinfo_list)
{
PartitionedChildRelInfo *pc = lfirst(l);
if (bms_is_member(pc->parent_relid, join_relids))
result = list_concat(result, list_copy(pc->child_rels));
}
return result;
}
/* /*
* add_paths_to_grouping_rel * add_paths_to_grouping_rel
* *
......
...@@ -104,8 +104,7 @@ static void expand_partitioned_rtentry(PlannerInfo *root, ...@@ -104,8 +104,7 @@ static void expand_partitioned_rtentry(PlannerInfo *root,
RangeTblEntry *parentrte, RangeTblEntry *parentrte,
Index parentRTindex, Relation parentrel, Index parentRTindex, Relation parentrel,
PlanRowMark *top_parentrc, LOCKMODE lockmode, PlanRowMark *top_parentrc, LOCKMODE lockmode,
List **appinfos, List **partitioned_child_rels, List **appinfos);
bool *part_cols_updated);
static void expand_single_inheritance_child(PlannerInfo *root, static void expand_single_inheritance_child(PlannerInfo *root,
RangeTblEntry *parentrte, RangeTblEntry *parentrte,
Index parentRTindex, Relation parentrel, Index parentRTindex, Relation parentrel,
...@@ -1587,9 +1586,6 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) ...@@ -1587,9 +1586,6 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
/* Scan the inheritance set and expand it */ /* Scan the inheritance set and expand it */
if (RelationGetPartitionDesc(oldrelation) != NULL) if (RelationGetPartitionDesc(oldrelation) != NULL)
{ {
List *partitioned_child_rels = NIL;
bool part_cols_updated = false;
Assert(rte->relkind == RELKIND_PARTITIONED_TABLE); Assert(rte->relkind == RELKIND_PARTITIONED_TABLE);
/* /*
...@@ -1598,28 +1594,7 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) ...@@ -1598,28 +1594,7 @@ expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti)
* extract the partition key columns of all the partitioned tables. * extract the partition key columns of all the partitioned tables.
*/ */
expand_partitioned_rtentry(root, rte, rti, oldrelation, oldrc, expand_partitioned_rtentry(root, rte, rti, oldrelation, oldrc,
lockmode, &root->append_rel_list, lockmode, &root->append_rel_list);
&partitioned_child_rels,
&part_cols_updated);
/*
* We keep a list of objects in root, each of which maps a root
* partitioned parent RT index to the list of RT indexes of descendant
* partitioned child tables. When creating an Append or a ModifyTable
* path for the parent, we copy the child RT index list verbatim to
* the path so that it could be carried over to the executor so that
* the latter could identify the partitioned child tables.
*/
if (rte->inh && partitioned_child_rels != NIL)
{
PartitionedChildRelInfo *pcinfo;
pcinfo = makeNode(PartitionedChildRelInfo);
pcinfo->parent_relid = rti;
pcinfo->child_rels = partitioned_child_rels;
pcinfo->part_cols_updated = part_cols_updated;
root->pcinfo_list = lappend(root->pcinfo_list, pcinfo);
}
} }
else else
{ {
...@@ -1694,8 +1669,7 @@ static void ...@@ -1694,8 +1669,7 @@ static void
expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte, expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
Index parentRTindex, Relation parentrel, Index parentRTindex, Relation parentrel,
PlanRowMark *top_parentrc, LOCKMODE lockmode, PlanRowMark *top_parentrc, LOCKMODE lockmode,
List **appinfos, List **partitioned_child_rels, List **appinfos)
bool *part_cols_updated)
{ {
int i; int i;
RangeTblEntry *childrte; RangeTblEntry *childrte;
...@@ -1717,8 +1691,8 @@ expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte, ...@@ -1717,8 +1691,8 @@ expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
* parentrte already has the root partrel's updatedCols translated to match * parentrte already has the root partrel's updatedCols translated to match
* the attribute ordering of parentrel. * the attribute ordering of parentrel.
*/ */
if (!*part_cols_updated) if (!root->partColsUpdated)
*part_cols_updated = root->partColsUpdated =
has_partition_attrs(parentrel, parentrte->updatedCols, NULL); has_partition_attrs(parentrel, parentrte->updatedCols, NULL);
/* First expand the partitioned table itself. */ /* First expand the partitioned table itself. */
...@@ -1726,14 +1700,6 @@ expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte, ...@@ -1726,14 +1700,6 @@ expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
top_parentrc, parentrel, top_parentrc, parentrel,
appinfos, &childrte, &childRTindex); appinfos, &childrte, &childRTindex);
/*
* The partitioned table does not have data for itself but still need to
* be locked. Update given list of partitioned children with RTI of this
* partitioned relation.
*/
*partitioned_child_rels = lappend_int(*partitioned_child_rels,
childRTindex);
for (i = 0; i < partdesc->nparts; i++) for (i = 0; i < partdesc->nparts; i++)
{ {
Oid childOID = partdesc->oids[i]; Oid childOID = partdesc->oids[i];
...@@ -1760,8 +1726,7 @@ expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte, ...@@ -1760,8 +1726,7 @@ expand_partitioned_rtentry(PlannerInfo *root, RangeTblEntry *parentrte,
if (childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) if (childrel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
expand_partitioned_rtentry(root, childrte, childRTindex, expand_partitioned_rtentry(root, childrte, childRTindex,
childrel, top_parentrc, lockmode, childrel, top_parentrc, lockmode,
appinfos, partitioned_child_rels, appinfos);
part_cols_updated);
/* Close child relation, but keep locks */ /* Close child relation, but keep locks */
heap_close(childrel, NoLock); heap_close(childrel, NoLock);
......
...@@ -1171,7 +1171,6 @@ get_relation_constraints(PlannerInfo *root, ...@@ -1171,7 +1171,6 @@ get_relation_constraints(PlannerInfo *root,
Index varno = rel->relid; Index varno = rel->relid;
Relation relation; Relation relation;
TupleConstr *constr; TupleConstr *constr;
List *pcqual;
/* /*
* We assume the relation has already been safely locked. * We assume the relation has already been safely locked.
...@@ -1257,16 +1256,25 @@ get_relation_constraints(PlannerInfo *root, ...@@ -1257,16 +1256,25 @@ get_relation_constraints(PlannerInfo *root,
} }
} }
/* Append partition predicates, if any */ /*
pcqual = RelationGetPartitionQual(relation); * Append partition predicates, if any.
*
* For selects, partition pruning uses the parent table's partition bound
* descriptor, instead of constraint exclusion which is driven by the
* individual partition's partition constraint.
*/
if (root->parse->commandType != CMD_SELECT)
{
List *pcqual = RelationGetPartitionQual(relation);
if (pcqual) if (pcqual)
{ {
/* /*
* Run the partition quals through const-simplification similar to * Run the partition quals through const-simplification similar to
* check constraints. We skip canonicalize_qual, though, because * check constraints. We skip canonicalize_qual, though, because
* partition quals should be in canonical form already; also, since * partition quals should be in canonical form already; also,
* the qual is in implicit-AND format, we'd have to explicitly convert * since the qual is in implicit-AND format, we'd have to
* it to explicit-AND format and back again. * explicitly convert it to explicit-AND format and back again.
*/ */
pcqual = (List *) eval_const_expressions(root, (Node *) pcqual); pcqual = (List *) eval_const_expressions(root, (Node *) pcqual);
...@@ -1276,6 +1284,7 @@ get_relation_constraints(PlannerInfo *root, ...@@ -1276,6 +1284,7 @@ get_relation_constraints(PlannerInfo *root,
result = list_concat(result, pcqual); result = list_concat(result, pcqual);
} }
}
heap_close(relation, NoLock); heap_close(relation, NoLock);
...@@ -1869,6 +1878,7 @@ set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel, ...@@ -1869,6 +1878,7 @@ set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel,
rel->boundinfo = partition_bounds_copy(partdesc->boundinfo, partkey); rel->boundinfo = partition_bounds_copy(partdesc->boundinfo, partkey);
rel->nparts = partdesc->nparts; rel->nparts = partdesc->nparts;
set_baserel_partition_key_exprs(relation, rel); set_baserel_partition_key_exprs(relation, rel);
rel->partition_qual = RelationGetPartitionQual(relation);
} }
/* /*
...@@ -1881,7 +1891,8 @@ find_partition_scheme(PlannerInfo *root, Relation relation) ...@@ -1881,7 +1891,8 @@ find_partition_scheme(PlannerInfo *root, Relation relation)
{ {
PartitionKey partkey = RelationGetPartitionKey(relation); PartitionKey partkey = RelationGetPartitionKey(relation);
ListCell *lc; ListCell *lc;
int partnatts; int partnatts,
i;
PartitionScheme part_scheme; PartitionScheme part_scheme;
/* A partitioned table should have a partition key. */ /* A partitioned table should have a partition key. */
...@@ -1899,7 +1910,7 @@ find_partition_scheme(PlannerInfo *root, Relation relation) ...@@ -1899,7 +1910,7 @@ find_partition_scheme(PlannerInfo *root, Relation relation)
partnatts != part_scheme->partnatts) partnatts != part_scheme->partnatts)
continue; continue;
/* Match the partition key types. */ /* Match partition key type properties. */
if (memcmp(partkey->partopfamily, part_scheme->partopfamily, if (memcmp(partkey->partopfamily, part_scheme->partopfamily,
sizeof(Oid) * partnatts) != 0 || sizeof(Oid) * partnatts) != 0 ||
memcmp(partkey->partopcintype, part_scheme->partopcintype, memcmp(partkey->partopcintype, part_scheme->partopcintype,
...@@ -1917,6 +1928,19 @@ find_partition_scheme(PlannerInfo *root, Relation relation) ...@@ -1917,6 +1928,19 @@ find_partition_scheme(PlannerInfo *root, Relation relation)
Assert(memcmp(partkey->parttypbyval, part_scheme->parttypbyval, Assert(memcmp(partkey->parttypbyval, part_scheme->parttypbyval,
sizeof(bool) * partnatts) == 0); sizeof(bool) * partnatts) == 0);
/*
* If partopfamily and partopcintype matched, must have the same
* partition comparison functions. Note that we cannot reliably
* Assert the equality of function structs themselves for they might
* be different across PartitionKey's, so just Assert for the function
* OIDs.
*/
#ifdef USE_ASSERT_CHECKING
for (i = 0; i < partkey->partnatts; i++)
Assert(partkey->partsupfunc[i].fn_oid ==
part_scheme->partsupfunc[i].fn_oid);
#endif
/* Found matching partition scheme. */ /* Found matching partition scheme. */
return part_scheme; return part_scheme;
} }
...@@ -1951,6 +1975,12 @@ find_partition_scheme(PlannerInfo *root, Relation relation) ...@@ -1951,6 +1975,12 @@ find_partition_scheme(PlannerInfo *root, Relation relation)
memcpy(part_scheme->parttypbyval, partkey->parttypbyval, memcpy(part_scheme->parttypbyval, partkey->parttypbyval,
sizeof(bool) * partnatts); sizeof(bool) * partnatts);
part_scheme->partsupfunc = (FmgrInfo *)
palloc(sizeof(FmgrInfo) * partnatts);
for (i = 0; i < partnatts; i++)
fmgr_info_copy(&part_scheme->partsupfunc[i], &partkey->partsupfunc[i],
CurrentMemoryContext);
/* Add the partitioning scheme to PlannerInfo. */ /* Add the partitioning scheme to PlannerInfo. */
root->part_schemes = lappend(root->part_schemes, part_scheme); root->part_schemes = lappend(root->part_schemes, part_scheme);
......
...@@ -154,9 +154,11 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) ...@@ -154,9 +154,11 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
rel->part_scheme = NULL; rel->part_scheme = NULL;
rel->nparts = 0; rel->nparts = 0;
rel->boundinfo = NULL; rel->boundinfo = NULL;
rel->partition_qual = NIL;
rel->part_rels = NULL; rel->part_rels = NULL;
rel->partexprs = NULL; rel->partexprs = NULL;
rel->nullable_partexprs = NULL; rel->nullable_partexprs = NULL;
rel->partitioned_child_rels = NIL;
/* /*
* Pass top parent's relids down the inheritance hierarchy. If the parent * Pass top parent's relids down the inheritance hierarchy. If the parent
...@@ -567,9 +569,11 @@ build_join_rel(PlannerInfo *root, ...@@ -567,9 +569,11 @@ build_join_rel(PlannerInfo *root,
joinrel->part_scheme = NULL; joinrel->part_scheme = NULL;
joinrel->nparts = 0; joinrel->nparts = 0;
joinrel->boundinfo = NULL; joinrel->boundinfo = NULL;
joinrel->partition_qual = NIL;
joinrel->part_rels = NULL; joinrel->part_rels = NULL;
joinrel->partexprs = NULL; joinrel->partexprs = NULL;
joinrel->nullable_partexprs = NULL; joinrel->nullable_partexprs = NULL;
joinrel->partitioned_child_rels = NIL;
/* Compute information relevant to the foreign relations. */ /* Compute information relevant to the foreign relations. */
set_foreign_rel_properties(joinrel, outer_rel, inner_rel); set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
...@@ -734,9 +738,13 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, ...@@ -734,9 +738,13 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
joinrel->has_eclass_joins = false; joinrel->has_eclass_joins = false;
joinrel->top_parent_relids = NULL; joinrel->top_parent_relids = NULL;
joinrel->part_scheme = NULL; joinrel->part_scheme = NULL;
joinrel->nparts = 0;
joinrel->boundinfo = NULL;
joinrel->partition_qual = NIL;
joinrel->part_rels = NULL; joinrel->part_rels = NULL;
joinrel->partexprs = NULL; joinrel->partexprs = NULL;
joinrel->nullable_partexprs = NULL; joinrel->nullable_partexprs = NULL;
joinrel->partitioned_child_rels = NIL;
joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids, joinrel->top_parent_relids = bms_union(outer_rel->top_parent_relids,
inner_rel->top_parent_relids); inner_rel->top_parent_relids);
......
#-------------------------------------------------------------------------
#
# Makefile--
# Makefile for backend/partitioning
#
# IDENTIFICATION
# src/backend/partitioning/Makefile
#
#-------------------------------------------------------------------------
subdir = src/backend/partitioning
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
OBJS = partprune.o
include $(top_srcdir)/src/backend/common.mk
This diff is collapsed.
...@@ -53,6 +53,6 @@ ...@@ -53,6 +53,6 @@
*/ */
/* yyyymmddN */ /* yyyymmddN */
#define CATALOG_VERSION_NO 201804052 #define CATALOG_VERSION_NO 201804061
#endif #endif
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
* PartitionBoundInfo encapsulates a set of partition bounds. It is usually * PartitionBoundInfo encapsulates a set of partition bounds. It is usually
* associated with partitioned tables as part of its partition descriptor. * associated with partitioned tables as part of its partition descriptor.
* *
* The internal structure is opaque outside partition.c. * The internal structure appears in partbounds.h.
*/ */
typedef struct PartitionBoundInfoData *PartitionBoundInfo; typedef struct PartitionBoundInfoData *PartitionBoundInfo;
...@@ -70,7 +70,6 @@ extern void check_default_allows_bound(Relation parent, Relation defaultRel, ...@@ -70,7 +70,6 @@ extern void check_default_allows_bound(Relation parent, Relation defaultRel,
PartitionBoundSpec *new_spec); PartitionBoundSpec *new_spec);
extern List *get_proposed_default_constraint(List *new_part_constaints); extern List *get_proposed_default_constraint(List *new_part_constaints);
/* For tuple routing */
extern int get_partition_for_tuple(Relation relation, Datum *values, extern int get_partition_for_tuple(Relation relation, Datum *values,
bool *isnull); bool *isnull);
......
...@@ -53,6 +53,9 @@ typedef FormData_pg_opfamily *Form_pg_opfamily; ...@@ -53,6 +53,9 @@ typedef FormData_pg_opfamily *Form_pg_opfamily;
#define Anum_pg_opfamily_opfnamespace 3 #define Anum_pg_opfamily_opfnamespace 3
#define Anum_pg_opfamily_opfowner 4 #define Anum_pg_opfamily_opfowner 4
#define IsBooleanOpfamily(opfamily) \
((opfamily) == BOOL_BTREE_FAM_OID || (opfamily) == BOOL_HASH_FAM_OID)
/* ---------------- /* ----------------
* initial contents of pg_opfamily * initial contents of pg_opfamily
* ---------------- * ----------------
......
...@@ -193,6 +193,9 @@ typedef enum NodeTag ...@@ -193,6 +193,9 @@ typedef enum NodeTag
T_FromExpr, T_FromExpr,
T_OnConflictExpr, T_OnConflictExpr,
T_IntoClause, T_IntoClause,
T_PartitionPruneStep,
T_PartitionPruneStepOp,
T_PartitionPruneStepCombine,
/* /*
* TAGS FOR EXPRESSION STATE NODES (execnodes.h) * TAGS FOR EXPRESSION STATE NODES (execnodes.h)
...@@ -262,7 +265,6 @@ typedef enum NodeTag ...@@ -262,7 +265,6 @@ typedef enum NodeTag
T_PlaceHolderVar, T_PlaceHolderVar,
T_SpecialJoinInfo, T_SpecialJoinInfo,
T_AppendRelInfo, T_AppendRelInfo,
T_PartitionedChildRelInfo,
T_PlaceHolderInfo, T_PlaceHolderInfo,
T_MinMaxAggInfo, T_MinMaxAggInfo,
T_PlannerParamItem, T_PlannerParamItem,
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#define PRIMNODES_H #define PRIMNODES_H
#include "access/attnum.h" #include "access/attnum.h"
#include "access/stratnum.h"
#include "nodes/bitmapset.h" #include "nodes/bitmapset.h"
#include "nodes/pg_list.h" #include "nodes/pg_list.h"
...@@ -1506,4 +1507,78 @@ typedef struct OnConflictExpr ...@@ -1506,4 +1507,78 @@ typedef struct OnConflictExpr
List *exclRelTlist; /* tlist of the EXCLUDED pseudo relation */ List *exclRelTlist; /* tlist of the EXCLUDED pseudo relation */
} OnConflictExpr; } OnConflictExpr;
/*
* Node types to represent a partition pruning step.
*/
/*
* The base Node type. step_id is the global identifier of a given step
* within a given pruning context.
*/
typedef struct PartitionPruneStep
{
NodeTag type;
int step_id;
} PartitionPruneStep;
/*----------
* PartitionPruneStepOp - Information to prune using a set of mutually AND'd
* OpExpr clauses
*
* This contains information extracted from up to partnatts OpExpr clauses,
* where partnatts is the number of partition key columns. 'opstrategy' is the
* strategy of the operator in the clause matched to the last partition key.
* 'exprs' contains expressions which comprise the lookup key to be passed to
* the partition bound search function. 'cmpfns' contains the OIDs of
* comparison function used to compare aforementioned expressions with
* partition bounds. Both 'exprs' and 'cmpfns' contain the same number of
* items up to partnatts items.
*
* Once we find the offset of a partition bound using the lookup key, we
* determine which partitions to include in the result based on the value of
* 'opstrategy'. For example, if it were equality, we'd return just the
* partition that would contain that key or a set of partitions if the key
* didn't consist of all partitioning columns. For non-equality strategies,
* we'd need to include other partitions as appropriate.
*
* 'nullkeys' is the set containing the offset of the partition keys (0 to
* partnatts - 1) that were matched to an IS NULL clause. This is only
* considered for hash partitioning as we need to pass which keys are null
* to the hash partition bound search function. It is never possible to
* have an expression be present in 'exprs' for a given partition key and
* the corresponding bit set in 'nullkeys'.
*----------
*/
typedef struct PartitionPruneStepOp
{
PartitionPruneStep step;
StrategyNumber opstrategy;
List *exprs;
List *cmpfns;
Bitmapset *nullkeys;
} PartitionPruneStepOp;
/*----------
* PartitionPruneStepCombine - Information to prune using a BoolExpr clause
*
* For BoolExpr clauses, we combine the set of partitions determined for each
* of its argument clauses.
*----------
*/
typedef enum PartitionPruneCombineOp
{
PARTPRUNE_COMBINE_UNION,
PARTPRUNE_COMBINE_INTERSECT
} PartitionPruneCombineOp;
typedef struct PartitionPruneStepCombine
{
PartitionPruneStep step;
PartitionPruneCombineOp combineOp;
List *source_stepids;
} PartitionPruneStepCombine;
#endif /* PRIMNODES_H */ #endif /* PRIMNODES_H */
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#define RELATION_H #define RELATION_H
#include "access/sdir.h" #include "access/sdir.h"
#include "fmgr.h"
#include "lib/stringinfo.h" #include "lib/stringinfo.h"
#include "nodes/params.h" #include "nodes/params.h"
#include "nodes/parsenodes.h" #include "nodes/parsenodes.h"
...@@ -253,8 +254,6 @@ typedef struct PlannerInfo ...@@ -253,8 +254,6 @@ typedef struct PlannerInfo
List *append_rel_list; /* list of AppendRelInfos */ List *append_rel_list; /* list of AppendRelInfos */
List *pcinfo_list; /* list of PartitionedChildRelInfos */
List *rowMarks; /* list of PlanRowMarks */ List *rowMarks; /* list of PlanRowMarks */
List *placeholder_list; /* list of PlaceHolderInfos */ List *placeholder_list; /* list of PlaceHolderInfos */
...@@ -319,6 +318,9 @@ typedef struct PlannerInfo ...@@ -319,6 +318,9 @@ typedef struct PlannerInfo
/* optional private data for join_search_hook, e.g., GEQO */ /* optional private data for join_search_hook, e.g., GEQO */
void *join_search_private; void *join_search_private;
/* Does this query modify any partition key columns? */
bool partColsUpdated;
} PlannerInfo; } PlannerInfo;
...@@ -356,6 +358,9 @@ typedef struct PartitionSchemeData ...@@ -356,6 +358,9 @@ typedef struct PartitionSchemeData
/* Cached information about partition key data types. */ /* Cached information about partition key data types. */
int16 *parttyplen; int16 *parttyplen;
bool *parttypbyval; bool *parttypbyval;
/* Cached information about partition comparison functions. */
FmgrInfo *partsupfunc;
} PartitionSchemeData; } PartitionSchemeData;
typedef struct PartitionSchemeData *PartitionScheme; typedef struct PartitionSchemeData *PartitionScheme;
...@@ -529,10 +534,14 @@ typedef struct PartitionSchemeData *PartitionScheme; ...@@ -529,10 +534,14 @@ typedef struct PartitionSchemeData *PartitionScheme;
* If the relation is partitioned, these fields will be set: * If the relation is partitioned, these fields will be set:
* *
* part_scheme - Partitioning scheme of the relation * part_scheme - Partitioning scheme of the relation
* boundinfo - Partition bounds
* nparts - Number of partitions * nparts - Number of partitions
* boundinfo - Partition bounds
* partition_qual - Partition constraint if not the root
* part_rels - RelOptInfos for each partition * part_rels - RelOptInfos for each partition
* partexprs, nullable_partexprs - Partition key expressions * partexprs, nullable_partexprs - Partition key expressions
* partitioned_child_rels - RT indexes of unpruned partitions of
* relation that are partitioned tables
* themselves
* *
* Note: A base relation always has only one set of partition keys, but a join * Note: A base relation always has only one set of partition keys, but a join
* relation may have as many sets of partition keys as the number of relations * relation may have as many sets of partition keys as the number of relations
...@@ -663,10 +672,12 @@ typedef struct RelOptInfo ...@@ -663,10 +672,12 @@ typedef struct RelOptInfo
PartitionScheme part_scheme; /* Partitioning scheme. */ PartitionScheme part_scheme; /* Partitioning scheme. */
int nparts; /* number of partitions */ int nparts; /* number of partitions */
struct PartitionBoundInfoData *boundinfo; /* Partition bounds */ struct PartitionBoundInfoData *boundinfo; /* Partition bounds */
List *partition_qual; /* partition constraint */
struct RelOptInfo **part_rels; /* Array of RelOptInfos of partitions, struct RelOptInfo **part_rels; /* Array of RelOptInfos of partitions,
* stored in the same order of bounds */ * stored in the same order of bounds */
List **partexprs; /* Non-nullable partition key expressions. */ List **partexprs; /* Non-nullable partition key expressions. */
List **nullable_partexprs; /* Nullable partition key expressions. */ List **nullable_partexprs; /* Nullable partition key expressions. */
List *partitioned_child_rels; /* List of RT indexes. */
} RelOptInfo; } RelOptInfo;
/* /*
...@@ -1686,7 +1697,7 @@ typedef struct ModifyTablePath ...@@ -1686,7 +1697,7 @@ typedef struct ModifyTablePath
List *partitioned_rels; List *partitioned_rels;
bool partColsUpdated; /* some part key in hierarchy updated */ bool partColsUpdated; /* some part key in hierarchy updated */
List *resultRelations; /* integer list of RT indexes */ List *resultRelations; /* integer list of RT indexes */
Index mergeTargetRelation;/* RT index of merge target relation */ Index mergeTargetRelation; /* RT index of merge target relation */
List *subpaths; /* Path(s) producing source data */ List *subpaths; /* Path(s) producing source data */
List *subroots; /* per-target-table PlannerInfos */ List *subroots; /* per-target-table PlannerInfos */
List *withCheckOptionLists; /* per-target-table WCO lists */ List *withCheckOptionLists; /* per-target-table WCO lists */
...@@ -2121,27 +2132,6 @@ typedef struct AppendRelInfo ...@@ -2121,27 +2132,6 @@ typedef struct AppendRelInfo
Oid parent_reloid; /* OID of parent relation */ Oid parent_reloid; /* OID of parent relation */
} AppendRelInfo; } AppendRelInfo;
/*
* For a partitioned table, this maps its RT index to the list of RT indexes
* of the partitioned child tables in the partition tree. We need to
* separately store this information, because we do not create AppendRelInfos
* for the partitioned child tables of a parent table, since AppendRelInfos
* contain information that is unnecessary for the partitioned child tables.
* The child_rels list must contain at least one element, because the parent
* partitioned table is itself counted as a child.
*
* These structs are kept in the PlannerInfo node's pcinfo_list.
*/
typedef struct PartitionedChildRelInfo
{
NodeTag type;
Index parent_relid;
List *child_rels;
bool part_cols_updated; /* is the partition key of any of
* the partitioned tables updated? */
} PartitionedChildRelInfo;
/* /*
* For each distinct placeholder expression generated during planning, we * For each distinct placeholder expression generated during planning, we
* store a PlaceHolderInfo node in the PlannerInfo node's placeholder_list. * store a PlaceHolderInfo node in the PlannerInfo node's placeholder_list.
......
...@@ -59,9 +59,4 @@ extern Expr *preprocess_phv_expression(PlannerInfo *root, Expr *expr); ...@@ -59,9 +59,4 @@ extern Expr *preprocess_phv_expression(PlannerInfo *root, Expr *expr);
extern bool plan_cluster_use_sort(Oid tableOid, Oid indexOid); extern bool plan_cluster_use_sort(Oid tableOid, Oid indexOid);
extern int plan_create_index_workers(Oid tableOid, Oid indexOid); extern int plan_create_index_workers(Oid tableOid, Oid indexOid);
extern List *get_partitioned_child_rels(PlannerInfo *root, Index rti,
bool *part_cols_updated);
extern List *get_partitioned_child_rels_for_join(PlannerInfo *root,
Relids join_relids);
#endif /* PLANNER_H */ #endif /* PLANNER_H */
/*-------------------------------------------------------------------------
*
* partbounds.h
*
* Copyright (c) 2007-2018, PostgreSQL Global Development Group
*
* src/include/partitioning/partbounds.h
*
*-------------------------------------------------------------------------
*/
#ifndef PARTBOUNDS_H
#define PARTBOUNDS_H
#include "catalog/partition.h"
/*
* PartitionBoundInfoData encapsulates a set of partition bounds. It is
* usually associated with partitioned tables as part of its partition
* descriptor, but may also be used to represent a virtual partitioned
* table such as a partitioned joinrel within the planner.
*
* A list partition datum that is known to be NULL is never put into the
* datums array. Instead, it is tracked using the null_index field.
*
* In the case of range partitioning, ndatums will typically be far less than
* 2 * nparts, because a partition's upper bound and the next partition's lower
* bound are the same in most common cases, and we only store one of them (the
* upper bound). In case of hash partitioning, ndatums will be same as the
* number of partitions.
*
* For range and list partitioned tables, datums is an array of datum-tuples
* with key->partnatts datums each. For hash partitioned tables, it is an array
* of datum-tuples with 2 datums, modulus and remainder, corresponding to a
* given partition.
*
* The datums in datums array are arranged in increasing order as defined by
* functions qsort_partition_rbound_cmp(), qsort_partition_list_value_cmp() and
* qsort_partition_hbound_cmp() for range, list and hash partitioned tables
* respectively. For range and list partitions this simply means that the
* datums in the datums array are arranged in increasing order as defined by
* the partition key's operator classes and collations.
*
* In the case of list partitioning, the indexes array stores one entry for
* every datum, which is the index of the partition that accepts a given datum.
* In case of range partitioning, it stores one entry per distinct range
* datum, which is the index of the partition for which a given datum
* is an upper bound. In the case of hash partitioning, the number of the
* entries in the indexes array is same as the greatest modulus amongst all
* partitions. For a given partition key datum-tuple, the index of the
* partition which would accept that datum-tuple would be given by the entry
* pointed by remainder produced when hash value of the datum-tuple is divided
* by the greatest modulus.
*/
typedef struct PartitionBoundInfoData
{
char strategy; /* hash, list or range? */
int ndatums; /* Length of the datums following array */
Datum **datums;
PartitionRangeDatumKind **kind; /* The kind of each range bound datum;
* NULL for hash and list partitioned
* tables */
int *indexes; /* Partition indexes */
int null_index; /* Index of the null-accepting partition; -1
* if there isn't one */
int default_index; /* Index of the default partition; -1 if there
* isn't one */
} PartitionBoundInfoData;
#define partition_bound_accepts_nulls(bi) ((bi)->null_index != -1)
#define partition_bound_has_default(bi) ((bi)->default_index != -1)
/*
* When qsort'ing partition bounds after reading from the catalog, each bound
* is represented with one of the following structs.
*/
/* One bound of a hash partition */
typedef struct PartitionHashBound
{
int modulus;
int remainder;
int index;
} PartitionHashBound;
/* One value coming from some (index'th) list partition */
typedef struct PartitionListValue
{
int index;
Datum value;
} PartitionListValue;
/* One bound of a range partition */
typedef struct PartitionRangeBound
{
int index;
Datum *datums; /* range bound datums */
PartitionRangeDatumKind *kind; /* the kind of each datum */
bool lower; /* this is the lower (vs upper) bound */
} PartitionRangeBound;
extern int get_hash_partition_greatest_modulus(PartitionBoundInfo b);
extern int partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
PartitionBoundInfo boundinfo,
Datum value, bool *is_equal);
extern int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
Oid *partcollation,
PartitionBoundInfo boundinfo,
PartitionRangeBound *probe, bool *is_equal);
extern int partition_range_datum_bsearch(FmgrInfo *partsupfunc,
Oid *partcollation,
PartitionBoundInfo boundinfo,
int nvalues, Datum *values, bool *is_equal);
extern int partition_hash_bsearch(PartitionBoundInfo boundinfo,
int modulus, int remainder);
extern uint64 compute_hash_value(int partnatts, FmgrInfo *partsupfunc,
Datum *values, bool *isnull);
extern int32 partition_rbound_datum_cmp(FmgrInfo *partsupfunc,
Oid *partcollation,
Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
Datum *tuple_datums, int n_tuple_datums);
#endif /* PARTBOUNDS_H */
/*-------------------------------------------------------------------------
*
* partprune.h
* prototypes for partprune.c
*
*
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/partitioning/partprune.h
*
*-------------------------------------------------------------------------
*/
#ifndef PARTPRUNE_H
#define PARTPRUNE_H
#include "catalog/partition.h"
#include "nodes/relation.h"
/*
* PartitionPruneContext
*
* Information about a partitioned table needed to perform partition pruning.
*/
typedef struct PartitionPruneContext
{
/* Partition key information */
char strategy;
int partnatts;
Oid *partopfamily;
Oid *partopcintype;
Oid *partcollation;
FmgrInfo *partsupfunc;
/* Number of partitions */
int nparts;
/* Partition boundary info */
PartitionBoundInfo boundinfo;
} PartitionPruneContext;
extern Relids prune_append_rel_partitions(RelOptInfo *rel);
extern Bitmapset *get_matching_partitions(PartitionPruneContext *context,
List *pruning_steps);
extern List *gen_partprune_steps(RelOptInfo *rel, List *clauses,
bool *contradictory);
#endif /* PARTPRUNE_H */
...@@ -1951,11 +1951,13 @@ explain (costs off) select * from mcrparted where abs(b) = 5; -- scans all parti ...@@ -1951,11 +1951,13 @@ explain (costs off) select * from mcrparted where abs(b) = 5; -- scans all parti
Filter: (abs(b) = 5) Filter: (abs(b) = 5)
-> Seq Scan on mcrparted3 -> Seq Scan on mcrparted3
Filter: (abs(b) = 5) Filter: (abs(b) = 5)
-> Seq Scan on mcrparted4
Filter: (abs(b) = 5)
-> Seq Scan on mcrparted5 -> Seq Scan on mcrparted5
Filter: (abs(b) = 5) Filter: (abs(b) = 5)
-> Seq Scan on mcrparted_def -> Seq Scan on mcrparted_def
Filter: (abs(b) = 5) Filter: (abs(b) = 5)
(13 rows) (15 rows)
explain (costs off) select * from mcrparted where a > -1; -- scans all partitions explain (costs off) select * from mcrparted where a > -1; -- scans all partitions
QUERY PLAN QUERY PLAN
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment