Commit 9140cf82 authored by Robert Haas's avatar Robert Haas

Associate partitioning information with each RelOptInfo.

This is not used for anything yet, but it is necessary infrastructure
for partition-wise join and for partition pruning without constraint
exclusion.

Ashutosh Bapat, reviewed by Amit Langote and with quite a few changes,
mostly cosmetic, by me.  Additional review and testing of this patch
series by Antonin Houska, Amit Khandekar, Rafia Sabih, Rajkumar
Raghuwanshi, Thomas Munro, and Dilip Kumar.

Discussion: http://postgr.es/m/CAFjFpRfneFG3H+F6BaiXemMrKF+FY-POpx3Ocy+RiH3yBmXSNw@mail.gmail.com
parent 7b86c2ac
......@@ -68,6 +68,10 @@ static List *get_relation_constraints(PlannerInfo *root,
static List *build_index_tlist(PlannerInfo *root, IndexOptInfo *index,
Relation heapRelation);
static List *get_relation_statistics(RelOptInfo *rel, Relation relation);
static void set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel,
Relation relation);
static PartitionScheme find_partition_scheme(PlannerInfo *root, Relation rel);
static List **build_baserel_partition_key_exprs(Relation relation, Index varno);
/*
* get_relation_info -
......@@ -420,6 +424,13 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
/* Collect info about relation's foreign keys, if relevant */
get_relation_foreign_keys(root, rel, relation, inhparent);
/*
* Collect info about relation's partitioning scheme, if any. Only
* inheritance parents may be partitioned.
*/
if (inhparent && relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
set_relation_partition_info(root, rel, relation);
heap_close(relation, NoLock);
/*
......@@ -1802,3 +1813,151 @@ has_row_triggers(PlannerInfo *root, Index rti, CmdType event)
heap_close(relation, NoLock);
return result;
}
/*
* set_relation_partition_info
*
* Set partitioning scheme and related information for a partitioned table.
*/
static void
set_relation_partition_info(PlannerInfo *root, RelOptInfo *rel,
Relation relation)
{
PartitionDesc partdesc;
Assert(relation->rd_rel->relkind == RELKIND_PARTITIONED_TABLE);
partdesc = RelationGetPartitionDesc(relation);
rel->part_scheme = find_partition_scheme(root, relation);
Assert(partdesc != NULL && rel->part_scheme != NULL);
rel->boundinfo = partdesc->boundinfo;
rel->nparts = partdesc->nparts;
rel->partexprs = build_baserel_partition_key_exprs(relation, rel->relid);
}
/*
* find_partition_scheme
*
* Find or create a PartitionScheme for this Relation.
*/
static PartitionScheme
find_partition_scheme(PlannerInfo *root, Relation relation)
{
PartitionKey partkey = RelationGetPartitionKey(relation);
ListCell *lc;
int partnatts;
PartitionScheme part_scheme;
/* A partitioned table should have a partition key. */
Assert(partkey != NULL);
partnatts = partkey->partnatts;
/* Search for a matching partition scheme and return if found one. */
foreach(lc, root->part_schemes)
{
part_scheme = lfirst(lc);
/* Match partitioning strategy and number of keys. */
if (partkey->strategy != part_scheme->strategy ||
partnatts != part_scheme->partnatts)
continue;
/* Match the partition key types. */
if (memcmp(partkey->partopfamily, part_scheme->partopfamily,
sizeof(Oid) * partnatts) != 0 ||
memcmp(partkey->partopcintype, part_scheme->partopcintype,
sizeof(Oid) * partnatts) != 0 ||
memcmp(partkey->parttypcoll, part_scheme->parttypcoll,
sizeof(Oid) * partnatts) != 0)
continue;
/*
* Length and byval information should match when partopcintype
* matches.
*/
Assert(memcmp(partkey->parttyplen, part_scheme->parttyplen,
sizeof(int16) * partnatts) == 0);
Assert(memcmp(partkey->parttypbyval, part_scheme->parttypbyval,
sizeof(bool) * partnatts) == 0);
/* Found matching partition scheme. */
return part_scheme;
}
/*
* Did not find matching partition scheme. Create one copying relevant
* information from the relcache. Instead of copying whole arrays, copy
* the pointers in relcache. It's safe to do so since
* RelationClearRelation() wouldn't change it while planner is using it.
*/
part_scheme = (PartitionScheme) palloc0(sizeof(PartitionSchemeData));
part_scheme->strategy = partkey->strategy;
part_scheme->partnatts = partkey->partnatts;
part_scheme->partopfamily = partkey->partopfamily;
part_scheme->partopcintype = partkey->partopcintype;
part_scheme->parttypcoll = partkey->parttypcoll;
part_scheme->parttyplen = partkey->parttyplen;
part_scheme->parttypbyval = partkey->parttypbyval;
/* Add the partitioning scheme to PlannerInfo. */
root->part_schemes = lappend(root->part_schemes, part_scheme);
return part_scheme;
}
/*
* build_baserel_partition_key_exprs
*
* Collects partition key expressions for a given base relation. Any single
* column partition keys are converted to Var nodes. All Var nodes are set
* to the given varno. The partition key expressions are returned as an array
* of single element lists to be stored in RelOptInfo of the base relation.
*/
static List **
build_baserel_partition_key_exprs(Relation relation, Index varno)
{
PartitionKey partkey = RelationGetPartitionKey(relation);
int partnatts;
int cnt;
List **partexprs;
ListCell *lc;
/* A partitioned table should have a partition key. */
Assert(partkey != NULL);
partnatts = partkey->partnatts;
partexprs = (List **) palloc(sizeof(List *) * partnatts);
lc = list_head(partkey->partexprs);
for (cnt = 0; cnt < partnatts; cnt++)
{
Expr *partexpr;
AttrNumber attno = partkey->partattrs[cnt];
if (attno != InvalidAttrNumber)
{
/* Single column partition key is stored as a Var node. */
Assert(attno > 0);
partexpr = (Expr *) makeVar(varno, attno,
partkey->parttypid[cnt],
partkey->parttypmod[cnt],
partkey->parttypcoll[cnt], 0);
}
else
{
if (lc == NULL)
elog(ERROR, "wrong number of partition key expressions");
/* Re-stamp the expression with given varno. */
partexpr = (Expr *) copyObject(lfirst(lc));
ChangeVarNodes((Node *) partexpr, 1, varno, 0);
lc = lnext(lc);
}
partexprs[cnt] = list_make1(partexpr);
}
return partexprs;
}
......@@ -146,6 +146,11 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
rel->baserestrict_min_security = UINT_MAX;
rel->joininfo = NIL;
rel->has_eclass_joins = false;
rel->part_scheme = NULL;
rel->nparts = 0;
rel->boundinfo = NULL;
rel->part_rels = NULL;
rel->partexprs = NULL;
/*
* Pass top parent's relids down the inheritance hierarchy. If the parent
......@@ -218,18 +223,41 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
if (rte->inh)
{
ListCell *l;
int nparts = rel->nparts;
int cnt_parts = 0;
if (nparts > 0)
rel->part_rels = (RelOptInfo **)
palloc(sizeof(RelOptInfo *) * nparts);
foreach(l, root->append_rel_list)
{
AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
RelOptInfo *childrel;
/* append_rel_list contains all append rels; ignore others */
if (appinfo->parent_relid != relid)
continue;
(void) build_simple_rel(root, appinfo->child_relid,
rel);
childrel = build_simple_rel(root, appinfo->child_relid,
rel);
/* Nothing more to do for an unpartitioned table. */
if (!rel->part_scheme)
continue;
/*
* The order of partition OIDs in append_rel_list is the same as
* the order in the PartitionDesc, so the order of part_rels will
* also match the PartitionDesc. See expand_partitioned_rtentry.
*/
Assert(cnt_parts < nparts);
rel->part_rels[cnt_parts] = childrel;
cnt_parts++;
}
/* We should have seen all the child partitions. */
Assert(cnt_parts == nparts);
}
return rel;
......@@ -527,6 +555,11 @@ build_join_rel(PlannerInfo *root,
joinrel->joininfo = NIL;
joinrel->has_eclass_joins = false;
joinrel->top_parent_relids = NULL;
joinrel->part_scheme = NULL;
joinrel->nparts = 0;
joinrel->boundinfo = NULL;
joinrel->part_rels = NULL;
joinrel->partexprs = NULL;
/* Compute information relevant to the foreign relations. */
set_foreign_rel_properties(joinrel, outer_rel, inner_rel);
......
......@@ -266,6 +266,9 @@ typedef struct PlannerInfo
List *distinct_pathkeys; /* distinctClause pathkeys, if any */
List *sort_pathkeys; /* sortClause pathkeys, if any */
List *part_schemes; /* Canonicalised partition schemes used in the
* query. */
List *initial_rels; /* RelOptInfos we are now trying to join */
/* Use fetch_upper_rel() to get any particular upper rel */
......@@ -326,6 +329,34 @@ typedef struct PlannerInfo
((root)->simple_rte_array ? (root)->simple_rte_array[rti] : \
rt_fetch(rti, (root)->parse->rtable))
/*
* If multiple relations are partitioned the same way, all such partitions
* will have a pointer to the same PartitionScheme. A list of PartitionScheme
* objects is attached to the PlannerInfo. By design, the partition scheme
* incorporates only the general properties of the partition method (LIST vs.
* RANGE, number of partitioning columns and the type information for each)
* and not the specific bounds.
*
* We store the opclass-declared input data types instead of the partition key
* datatypes since the former rather than the latter are used to compare
* partition bounds. Since partition key data types and the opclass declared
* input data types are expected to be binary compatible (per ResolveOpClass),
* both of those should have same byval and length properties.
*/
typedef struct PartitionSchemeData
{
char strategy; /* partition strategy */
int16 partnatts; /* number of partition attributes */
Oid *partopfamily; /* OIDs of operator families */
Oid *partopcintype; /* OIDs of opclass declared input data types */
Oid *parttypcoll; /* OIDs of collations of partition keys. */
/* Cached information about partition key data types. */
int16 *parttyplen;
bool *parttypbyval;
} PartitionSchemeData;
typedef struct PartitionSchemeData *PartitionScheme;
/*----------
* RelOptInfo
......@@ -456,7 +487,7 @@ typedef struct PlannerInfo
* other rels for which we have tried and failed to prove
* this one unique
*
* The presence of the remaining fields depends on the restrictions
* The presence of the following fields depends on the restrictions
* and joins that the relation participates in:
*
* baserestrictinfo - List of RestrictInfo nodes, containing info about
......@@ -487,6 +518,21 @@ typedef struct PlannerInfo
* We store baserestrictcost in the RelOptInfo (for base relations) because
* we know we will need it at least once (to price the sequential scan)
* and may need it multiple times to price index scans.
*
* If the relation is partitioned, these fields will be set:
*
* part_scheme - Partitioning scheme of the relation
* boundinfo - Partition bounds
* nparts - Number of partitions
* part_rels - RelOptInfos for each partition
* partexprs - Partition key expressions
*
* Note: A base relation always has only one set of partition keys, but a join
* relation may have as many sets of partition keys as the number of relations
* being joined. partexprs is an array containing part_scheme->partnatts
* elements, each of which is a list of partition key expressions. For a base
* relation each list contains only one expression, but for a join relation
* there can be one per baserel.
*----------
*/
typedef enum RelOptKind
......@@ -592,6 +638,14 @@ typedef struct RelOptInfo
/* used by "other" relations */
Relids top_parent_relids; /* Relids of topmost parents */
/* used for partitioned relations */
PartitionScheme part_scheme; /* Partitioning scheme. */
int nparts; /* number of partitions */
struct PartitionBoundInfoData *boundinfo; /* Partition bounds */
struct RelOptInfo **part_rels; /* Array of RelOptInfos of partitions,
* stored in the same order of bounds */
List **partexprs; /* Partition key expressions. */
} RelOptInfo;
/*
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment