Commit da6f3e45 authored by Alvaro Herrera's avatar Alvaro Herrera

Reorganize partitioning code

There's been a massive addition of partitioning code in PostgreSQL 11,
with little oversight on its placement, resulting in a
catalog/partition.c with poorly defined boundaries and responsibilities.
This commit tries to set a couple of distinct modules to separate things
a little bit.  There are no code changes here, only code movement.

There are three new files:
  src/backend/utils/cache/partcache.c
  src/include/partitioning/partdefs.h
  src/include/utils/partcache.h

The previous arrangement of #including catalog/partition.h almost
everywhere is no more.

Authors: Amit Langote and Álvaro Herrera
Discussion: https://postgr.es/m/98e8d509-790a-128c-be7f-e48a5b2d8d97@lab.ntt.co.jp
	https://postgr.es/m/11aa0c50-316b-18bb-722d-c23814f39059@lab.ntt.co.jp
	https://postgr.es/m/143ed9a4-6038-76d4-9a55-502035815e68@lab.ntt.co.jp
	https://postgr.es/m/20180413193503.nynq7bnmgh6vs5vm@alvherre.pgsql
parent b39fd897
......@@ -19,6 +19,7 @@
#include "funcapi.h"
#include "miscadmin.h"
#include "utils/builtins.h"
#include "utils/rel.h"
PG_FUNCTION_INFO_V1(hash_page_type);
PG_FUNCTION_INFO_V1(hash_page_stats);
......
......@@ -76,6 +76,7 @@
#include "storage/ipc.h"
#include "storage/spin.h"
#include "tcop/utility.h"
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/memutils.h"
......
......@@ -77,6 +77,7 @@
#include "utils/fmgroids.h"
#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/partcache.h"
#include "utils/rel.h"
#include "utils/ruleutils.h"
#include "utils/snapmgr.h"
......
......@@ -12,2613 +12,184 @@
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/hash.h"
#include "access/genam.h"
#include "access/heapam.h"
#include "access/htup_details.h"
#include "access/nbtree.h"
#include "access/tupconvert.h"
#include "access/sysattr.h"
#include "catalog/dependency.h"
#include "catalog/indexing.h"
#include "catalog/objectaddress.h"
#include "catalog/partition.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_inherits.h"
#include "catalog/pg_opclass.h"
#include "catalog/pg_partitioned_table.h"
#include "catalog/pg_type.h"
#include "commands/tablecmds.h"
#include "executor/executor.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "nodes/parsenodes.h"
#include "optimizer/clauses.h"
#include "optimizer/planmain.h"
#include "optimizer/prep.h"
#include "optimizer/var.h"
#include "parser/parse_coerce.h"
#include "partitioning/partbounds.h"
#include "rewrite/rewriteManip.h"
#include "storage/lmgr.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/fmgroids.h"
#include "utils/hashutils.h"
#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/partcache.h"
#include "utils/rel.h"
#include "utils/ruleutils.h"
#include "utils/syscache.h"
static Oid get_partition_parent_worker(Relation inhRel, Oid relid);
static void get_partition_ancestors_worker(Relation inhRel, Oid relid,
List **ancestors);
static int32 qsort_partition_hbound_cmp(const void *a, const void *b);
static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
void *arg);
static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
void *arg);
static Oid get_partition_operator(PartitionKey key, int col,
StrategyNumber strategy, bool *need_relabel);
static Expr *make_partition_op_expr(PartitionKey key, int keynum,
uint16 strategy, Expr *arg1, Expr *arg2);
static void get_range_key_properties(PartitionKey key, int keynum,
PartitionRangeDatum *ldatum,
PartitionRangeDatum *udatum,
ListCell **partexprs_item,
Expr **keyCol,
Const **lower_val, Const **upper_val);
static List *get_qual_for_hash(Relation parent, PartitionBoundSpec *spec);
static List *get_qual_for_list(Relation parent, PartitionBoundSpec *spec);
static List *get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
bool for_default);
static List *get_range_nulltest(PartitionKey key);
static List *generate_partition_qual(Relation rel);
static PartitionRangeBound *make_one_range_bound(PartitionKey key, int index,
List *datums, bool lower);
static int32 partition_hbound_cmp(int modulus1, int remainder1, int modulus2,
int remainder2);
static int32 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
Oid *partcollation, Datum *datums1,
PartitionRangeDatumKind *kind1, bool lower1,
PartitionRangeBound *b2);
static int get_partition_bound_num_indexes(PartitionBoundInfo b);
/*
* RelationBuildPartitionDesc
* Form rel's partition descriptor
* get_partition_parent
* Obtain direct parent of given relation
*
* Returns inheritance parent of a partition by scanning pg_inherits
*
* Not flushed from the cache by RelationClearRelation() unless changed because
* of addition or removal of partition.
* Note: Because this function assumes that the relation whose OID is passed
* as an argument will have precisely one parent, it should only be called
* when it is known that the relation is a partition.
*/
void
RelationBuildPartitionDesc(Relation rel)
Oid
get_partition_parent(Oid relid)
{
List *inhoids,
*partoids;
Oid *oids = NULL;
List *boundspecs = NIL;
ListCell *cell;
int i,
nparts;
PartitionKey key = RelationGetPartitionKey(rel);
PartitionDesc result;
MemoryContext oldcxt;
int ndatums = 0;
int default_index = -1;
/* Hash partitioning specific */
PartitionHashBound **hbounds = NULL;
/* List partitioning specific */
PartitionListValue **all_values = NULL;
int null_index = -1;
/* Range partitioning specific */
PartitionRangeBound **rbounds = NULL;
/* Get partition oids from pg_inherits */
inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock);
/* Collect bound spec nodes in a list */
i = 0;
partoids = NIL;
foreach(cell, inhoids)
{
Oid inhrelid = lfirst_oid(cell);
HeapTuple tuple;
Datum datum;
bool isnull;
Node *boundspec;
Relation catalogRelation;
Oid result;
tuple = SearchSysCache1(RELOID, inhrelid);
if (!HeapTupleIsValid(tuple))
elog(ERROR, "cache lookup failed for relation %u", inhrelid);
catalogRelation = heap_open(InheritsRelationId, AccessShareLock);
/*
* It is possible that the pg_class tuple of a partition has not been
* updated yet to set its relpartbound field. The only case where
* this happens is when we open the parent relation to check using its
* partition descriptor that a new partition's bound does not overlap
* some existing partition.
*/
if (!((Form_pg_class) GETSTRUCT(tuple))->relispartition)
{
ReleaseSysCache(tuple);
continue;
}
result = get_partition_parent_worker(catalogRelation, relid);
datum = SysCacheGetAttr(RELOID, tuple,
Anum_pg_class_relpartbound,
&isnull);
Assert(!isnull);
boundspec = (Node *) stringToNode(TextDatumGetCString(datum));
if (!OidIsValid(result))
elog(ERROR, "could not find tuple for parent of relation %u", relid);
/*
* Sanity check: If the PartitionBoundSpec says this is the default
* partition, its OID should correspond to whatever's stored in
* pg_partitioned_table.partdefid; if not, the catalog is corrupt.
*/
if (castNode(PartitionBoundSpec, boundspec)->is_default)
{
Oid partdefid;
heap_close(catalogRelation, AccessShareLock);
partdefid = get_default_partition_oid(RelationGetRelid(rel));
if (partdefid != inhrelid)
elog(ERROR, "expected partdefid %u, but got %u",
inhrelid, partdefid);
}
return result;
}
boundspecs = lappend(boundspecs, boundspec);
partoids = lappend_oid(partoids, inhrelid);
ReleaseSysCache(tuple);
}
/*
* get_partition_parent_worker
* Scan the pg_inherits relation to return the OID of the parent of the
* given relation
*/
static Oid
get_partition_parent_worker(Relation inhRel, Oid relid)
{
SysScanDesc scan;
ScanKeyData key[2];
Oid result = InvalidOid;
HeapTuple tuple;
nparts = list_length(partoids);
ScanKeyInit(&key[0],
Anum_pg_inherits_inhrelid,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(relid));
ScanKeyInit(&key[1],
Anum_pg_inherits_inhseqno,
BTEqualStrategyNumber, F_INT4EQ,
Int32GetDatum(1));
if (nparts > 0)
scan = systable_beginscan(inhRel, InheritsRelidSeqnoIndexId, true,
NULL, 2, key);
tuple = systable_getnext(scan);
if (HeapTupleIsValid(tuple))
{
oids = (Oid *) palloc(nparts * sizeof(Oid));
i = 0;
foreach(cell, partoids)
oids[i++] = lfirst_oid(cell);
/* Convert from node to the internal representation */
if (key->strategy == PARTITION_STRATEGY_HASH)
{
ndatums = nparts;
hbounds = (PartitionHashBound **)
palloc(nparts * sizeof(PartitionHashBound *));
i = 0;
foreach(cell, boundspecs)
{
PartitionBoundSpec *spec = castNode(PartitionBoundSpec,
lfirst(cell));
if (spec->strategy != PARTITION_STRATEGY_HASH)
elog(ERROR, "invalid strategy in partition bound spec");
hbounds[i] = (PartitionHashBound *)
palloc(sizeof(PartitionHashBound));
hbounds[i]->modulus = spec->modulus;
hbounds[i]->remainder = spec->remainder;
hbounds[i]->index = i;
i++;
}
/* Sort all the bounds in ascending order */
qsort(hbounds, nparts, sizeof(PartitionHashBound *),
qsort_partition_hbound_cmp);
}
else if (key->strategy == PARTITION_STRATEGY_LIST)
{
List *non_null_values = NIL;
/*
* Create a unified list of non-null values across all partitions.
*/
i = 0;
null_index = -1;
foreach(cell, boundspecs)
{
PartitionBoundSpec *spec = castNode(PartitionBoundSpec,
lfirst(cell));
ListCell *c;
if (spec->strategy != PARTITION_STRATEGY_LIST)
elog(ERROR, "invalid strategy in partition bound spec");
/*
* Note the index of the partition bound spec for the default
* partition. There's no datum to add to the list of non-null
* datums for this partition.
*/
if (spec->is_default)
{
default_index = i;
i++;
continue;
}
foreach(c, spec->listdatums)
{
Const *val = castNode(Const, lfirst(c));
PartitionListValue *list_value = NULL;
if (!val->constisnull)
{
list_value = (PartitionListValue *)
palloc0(sizeof(PartitionListValue));
list_value->index = i;
list_value->value = val->constvalue;
}
else
{
/*
* Never put a null into the values array, flag
* instead for the code further down below where we
* construct the actual relcache struct.
*/
if (null_index != -1)
elog(ERROR, "found null more than once");
null_index = i;
}
if (list_value)
non_null_values = lappend(non_null_values,
list_value);
}
i++;
}
ndatums = list_length(non_null_values);
/*
* Collect all list values in one array. Alongside the value, we
* also save the index of partition the value comes from.
*/
all_values = (PartitionListValue **) palloc(ndatums *
sizeof(PartitionListValue *));
i = 0;
foreach(cell, non_null_values)
{
PartitionListValue *src = lfirst(cell);
all_values[i] = (PartitionListValue *)
palloc(sizeof(PartitionListValue));
all_values[i]->value = src->value;
all_values[i]->index = src->index;
i++;
}
qsort_arg(all_values, ndatums, sizeof(PartitionListValue *),
qsort_partition_list_value_cmp, (void *) key);
}
else if (key->strategy == PARTITION_STRATEGY_RANGE)
{
int k;
PartitionRangeBound **all_bounds,
*prev;
all_bounds = (PartitionRangeBound **) palloc0(2 * nparts *
sizeof(PartitionRangeBound *));
/*
* Create a unified list of range bounds across all the
* partitions.
*/
i = ndatums = 0;
foreach(cell, boundspecs)
{
PartitionBoundSpec *spec = castNode(PartitionBoundSpec,
lfirst(cell));
PartitionRangeBound *lower,
*upper;
if (spec->strategy != PARTITION_STRATEGY_RANGE)
elog(ERROR, "invalid strategy in partition bound spec");
/*
* Note the index of the partition bound spec for the default
* partition. There's no datum to add to the allbounds array
* for this partition.
*/
if (spec->is_default)
{
default_index = i++;
continue;
}
lower = make_one_range_bound(key, i, spec->lowerdatums,
true);
upper = make_one_range_bound(key, i, spec->upperdatums,
false);
all_bounds[ndatums++] = lower;
all_bounds[ndatums++] = upper;
i++;
}
Assert(ndatums == nparts * 2 ||
(default_index != -1 && ndatums == (nparts - 1) * 2));
/* Sort all the bounds in ascending order */
qsort_arg(all_bounds, ndatums,
sizeof(PartitionRangeBound *),
qsort_partition_rbound_cmp,
(void *) key);
/* Save distinct bounds from all_bounds into rbounds. */
rbounds = (PartitionRangeBound **)
palloc(ndatums * sizeof(PartitionRangeBound *));
k = 0;
prev = NULL;
for (i = 0; i < ndatums; i++)
{
PartitionRangeBound *cur = all_bounds[i];
bool is_distinct = false;
int j;
/* Is the current bound distinct from the previous one? */
for (j = 0; j < key->partnatts; j++)
{
Datum cmpval;
if (prev == NULL || cur->kind[j] != prev->kind[j])
{
is_distinct = true;
break;
}
/*
* If the bounds are both MINVALUE or MAXVALUE, stop now
* and treat them as equal, since any values after this
* point must be ignored.
*/
if (cur->kind[j] != PARTITION_RANGE_DATUM_VALUE)
break;
cmpval = FunctionCall2Coll(&key->partsupfunc[j],
key->partcollation[j],
cur->datums[j],
prev->datums[j]);
if (DatumGetInt32(cmpval) != 0)
{
is_distinct = true;
break;
}
}
/*
* Only if the bound is distinct save it into a temporary
* array i.e. rbounds which is later copied into boundinfo
* datums array.
*/
if (is_distinct)
rbounds[k++] = all_bounds[i];
prev = cur;
}
Form_pg_inherits form = (Form_pg_inherits) GETSTRUCT(tuple);
/* Update ndatums to hold the count of distinct datums. */
ndatums = k;
}
else
elog(ERROR, "unexpected partition strategy: %d",
(int) key->strategy);
result = form->inhparent;
}
/* Now build the actual relcache partition descriptor */
rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext,
"partition descriptor",
ALLOCSET_DEFAULT_SIZES);
MemoryContextCopyAndSetIdentifier(rel->rd_pdcxt, RelationGetRelationName(rel));
oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt);
result = (PartitionDescData *) palloc0(sizeof(PartitionDescData));
result->nparts = nparts;
if (nparts > 0)
{
PartitionBoundInfo boundinfo;
int *mapping;
int next_index = 0;
result->oids = (Oid *) palloc0(nparts * sizeof(Oid));
boundinfo = (PartitionBoundInfoData *)
palloc0(sizeof(PartitionBoundInfoData));
boundinfo->strategy = key->strategy;
boundinfo->default_index = -1;
boundinfo->ndatums = ndatums;
boundinfo->null_index = -1;
boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
/* Initialize mapping array with invalid values */
mapping = (int *) palloc(sizeof(int) * nparts);
for (i = 0; i < nparts; i++)
mapping[i] = -1;
switch (key->strategy)
{
case PARTITION_STRATEGY_HASH:
{
/* Modulus are stored in ascending order */
int greatest_modulus = hbounds[ndatums - 1]->modulus;
boundinfo->indexes = (int *) palloc(greatest_modulus *
sizeof(int));
for (i = 0; i < greatest_modulus; i++)
boundinfo->indexes[i] = -1;
for (i = 0; i < nparts; i++)
{
int modulus = hbounds[i]->modulus;
int remainder = hbounds[i]->remainder;
boundinfo->datums[i] = (Datum *) palloc(2 *
sizeof(Datum));
boundinfo->datums[i][0] = Int32GetDatum(modulus);
boundinfo->datums[i][1] = Int32GetDatum(remainder);
while (remainder < greatest_modulus)
{
/* overlap? */
Assert(boundinfo->indexes[remainder] == -1);
boundinfo->indexes[remainder] = i;
remainder += modulus;
}
mapping[hbounds[i]->index] = i;
pfree(hbounds[i]);
}
pfree(hbounds);
break;
}
case PARTITION_STRATEGY_LIST:
{
boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
/*
* Copy values. Indexes of individual values are mapped
* to canonical values so that they match for any two list
* partitioned tables with same number of partitions and
* same lists per partition. One way to canonicalize is
* to assign the index in all_values[] of the smallest
* value of each partition, as the index of all of the
* partition's values.
*/
for (i = 0; i < ndatums; i++)
{
boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum));
boundinfo->datums[i][0] = datumCopy(all_values[i]->value,
key->parttypbyval[0],
key->parttyplen[0]);
/* If the old index has no mapping, assign one */
if (mapping[all_values[i]->index] == -1)
mapping[all_values[i]->index] = next_index++;
boundinfo->indexes[i] = mapping[all_values[i]->index];
}
/*
* If null-accepting partition has no mapped index yet,
* assign one. This could happen if such partition
* accepts only null and hence not covered in the above
* loop which only handled non-null values.
*/
if (null_index != -1)
{
Assert(null_index >= 0);
if (mapping[null_index] == -1)
mapping[null_index] = next_index++;
boundinfo->null_index = mapping[null_index];
}
/* Assign mapped index for the default partition. */
if (default_index != -1)
{
/*
* The default partition accepts any value not
* specified in the lists of other partitions, hence
* it should not get mapped index while assigning
* those for non-null datums.
*/
Assert(default_index >= 0 &&
mapping[default_index] == -1);
mapping[default_index] = next_index++;
boundinfo->default_index = mapping[default_index];
}
/* All partition must now have a valid mapping */
Assert(next_index == nparts);
break;
}
case PARTITION_STRATEGY_RANGE:
{
boundinfo->kind = (PartitionRangeDatumKind **)
palloc(ndatums *
sizeof(PartitionRangeDatumKind *));
boundinfo->indexes = (int *) palloc((ndatums + 1) *
sizeof(int));
for (i = 0; i < ndatums; i++)
{
int j;
boundinfo->datums[i] = (Datum *) palloc(key->partnatts *
sizeof(Datum));
boundinfo->kind[i] = (PartitionRangeDatumKind *)
palloc(key->partnatts *
sizeof(PartitionRangeDatumKind));
for (j = 0; j < key->partnatts; j++)
{
if (rbounds[i]->kind[j] == PARTITION_RANGE_DATUM_VALUE)
boundinfo->datums[i][j] =
datumCopy(rbounds[i]->datums[j],
key->parttypbyval[j],
key->parttyplen[j]);
boundinfo->kind[i][j] = rbounds[i]->kind[j];
}
/*
* There is no mapping for invalid indexes.
*
* Any lower bounds in the rbounds array have invalid
* indexes assigned, because the values between the
* previous bound (if there is one) and this (lower)
* bound are not part of the range of any existing
* partition.
*/
if (rbounds[i]->lower)
boundinfo->indexes[i] = -1;
else
{
int orig_index = rbounds[i]->index;
/* If the old index has no mapping, assign one */
if (mapping[orig_index] == -1)
mapping[orig_index] = next_index++;
boundinfo->indexes[i] = mapping[orig_index];
}
}
/* Assign mapped index for the default partition. */
if (default_index != -1)
{
Assert(default_index >= 0 && mapping[default_index] == -1);
mapping[default_index] = next_index++;
boundinfo->default_index = mapping[default_index];
}
boundinfo->indexes[i] = -1;
break;
}
default:
elog(ERROR, "unexpected partition strategy: %d",
(int) key->strategy);
}
result->boundinfo = boundinfo;
/*
* Now assign OIDs from the original array into mapped indexes of the
* result array. Order of OIDs in the former is defined by the
* catalog scan that retrieved them, whereas that in the latter is
* defined by canonicalized representation of the partition bounds.
*/
for (i = 0; i < nparts; i++)
result->oids[mapping[i]] = oids[i];
pfree(mapping);
}
systable_endscan(scan);
MemoryContextSwitchTo(oldcxt);
rel->rd_partdesc = result;
return result;
}
/*
* Are two partition bound collections logically equal?
* get_partition_ancestors
* Obtain ancestors of given relation
*
* Used in the keep logic of relcache.c (ie, in RelationClearRelation()).
* This is also useful when b1 and b2 are bound collections of two separate
* relations, respectively, because PartitionBoundInfo is a canonical
* representation of partition bounds.
* Returns a list of ancestors of the given relation.
*
* Note: Because this function assumes that the relation whose OID is passed
* as an argument and each ancestor will have precisely one parent, it should
* only be called when it is known that the relation is a partition.
*/
bool
partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
PartitionBoundInfo b1, PartitionBoundInfo b2)
List *
get_partition_ancestors(Oid relid)
{
int i;
if (b1->strategy != b2->strategy)
return false;
if (b1->ndatums != b2->ndatums)
return false;
if (b1->null_index != b2->null_index)
return false;
if (b1->default_index != b2->default_index)
return false;
if (b1->strategy == PARTITION_STRATEGY_HASH)
{
int greatest_modulus = get_hash_partition_greatest_modulus(b1);
/*
* If two hash partitioned tables have different greatest moduli,
* their partition schemes don't match.
*/
if (greatest_modulus != get_hash_partition_greatest_modulus(b2))
return false;
/*
* We arrange the partitions in the ascending order of their modulus
* and remainders. Also every modulus is factor of next larger
* modulus. Therefore we can safely store index of a given partition
* in indexes array at remainder of that partition. Also entries at
* (remainder + N * modulus) positions in indexes array are all same
* for (modulus, remainder) specification for any partition. Thus
* datums array from both the given bounds are same, if and only if
* their indexes array will be same. So, it suffices to compare
* indexes array.
*/
for (i = 0; i < greatest_modulus; i++)
if (b1->indexes[i] != b2->indexes[i])
return false;
#ifdef USE_ASSERT_CHECKING
/*
* Nonetheless make sure that the bounds are indeed same when the
* indexes match. Hash partition bound stores modulus and remainder
* at b1->datums[i][0] and b1->datums[i][1] position respectively.
*/
for (i = 0; i < b1->ndatums; i++)
Assert((b1->datums[i][0] == b2->datums[i][0] &&
b1->datums[i][1] == b2->datums[i][1]));
#endif
}
else
{
for (i = 0; i < b1->ndatums; i++)
{
int j;
for (j = 0; j < partnatts; j++)
{
/* For range partitions, the bounds might not be finite. */
if (b1->kind != NULL)
{
/* The different kinds of bound all differ from each other */
if (b1->kind[i][j] != b2->kind[i][j])
return false;
List *result = NIL;
Relation inhRel;
/*
* Non-finite bounds are equal without further
* examination.
*/
if (b1->kind[i][j] != PARTITION_RANGE_DATUM_VALUE)
continue;
}
inhRel = heap_open(InheritsRelationId, AccessShareLock);
/*
* Compare the actual values. Note that it would be both
* incorrect and unsafe to invoke the comparison operator
* derived from the partitioning specification here. It would
* be incorrect because we want the relcache entry to be
* updated for ANY change to the partition bounds, not just
* those that the partitioning operator thinks are
* significant. It would be unsafe because we might reach
* this code in the context of an aborted transaction, and an
* arbitrary partitioning operator might not be safe in that
* context. datumIsEqual() should be simple enough to be
* safe.
*/
if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j],
parttypbyval[j], parttyplen[j]))
return false;
}
get_partition_ancestors_worker(inhRel, relid, &result);
if (b1->indexes[i] != b2->indexes[i])
return false;
}
heap_close(inhRel, AccessShareLock);
/* There are ndatums+1 indexes in case of range partitions */
if (b1->strategy == PARTITION_STRATEGY_RANGE &&
b1->indexes[i] != b2->indexes[i])
return false;
}
return true;
return result;
}
/*
* Return a copy of given PartitionBoundInfo structure. The data types of bounds
* are described by given partition key specification.
* get_partition_ancestors_worker
* recursive worker for get_partition_ancestors
*/
PartitionBoundInfo
partition_bounds_copy(PartitionBoundInfo src,
PartitionKey key)
static void
get_partition_ancestors_worker(Relation inhRel, Oid relid, List **ancestors)
{
PartitionBoundInfo dest;
int i;
int ndatums;
int partnatts;
int num_indexes;
dest = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData));
dest->strategy = src->strategy;
ndatums = dest->ndatums = src->ndatums;
partnatts = key->partnatts;
num_indexes = get_partition_bound_num_indexes(src);
/* List partitioned tables have only a single partition key. */
Assert(key->strategy != PARTITION_STRATEGY_LIST || partnatts == 1);
dest->datums = (Datum **) palloc(sizeof(Datum *) * ndatums);
if (src->kind != NULL)
{
dest->kind = (PartitionRangeDatumKind **) palloc(ndatums *
sizeof(PartitionRangeDatumKind *));
for (i = 0; i < ndatums; i++)
{
dest->kind[i] = (PartitionRangeDatumKind *) palloc(partnatts *
sizeof(PartitionRangeDatumKind));
memcpy(dest->kind[i], src->kind[i],
sizeof(PartitionRangeDatumKind) * key->partnatts);
}
}
else
dest->kind = NULL;
for (i = 0; i < ndatums; i++)
{
int j;
/*
* For a corresponding to hash partition, datums array will have two
* elements - modulus and remainder.
*/
bool hash_part = (key->strategy == PARTITION_STRATEGY_HASH);
int natts = hash_part ? 2 : partnatts;
dest->datums[i] = (Datum *) palloc(sizeof(Datum) * natts);
for (j = 0; j < natts; j++)
{
bool byval;
int typlen;
if (hash_part)
{
typlen = sizeof(int32); /* Always int4 */
byval = true; /* int4 is pass-by-value */
}
else
{
byval = key->parttypbyval[j];
typlen = key->parttyplen[j];
}
if (dest->kind == NULL ||
dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE)
dest->datums[i][j] = datumCopy(src->datums[i][j],
byval, typlen);
}
}
dest->indexes = (int *) palloc(sizeof(int) * num_indexes);
memcpy(dest->indexes, src->indexes, sizeof(int) * num_indexes);
Oid parentOid;
dest->null_index = src->null_index;
dest->default_index = src->default_index;
/* Recursion ends at the topmost level, ie., when there's no parent */
parentOid = get_partition_parent_worker(inhRel, relid);
if (parentOid == InvalidOid)
return;
return dest;
*ancestors = lappend_oid(*ancestors, parentOid);
get_partition_ancestors_worker(inhRel, parentOid, ancestors);
}
/*
* check_new_partition_bound
* map_partition_varattnos - maps varattno of any Vars in expr from the
* attno's of 'from_rel' to the attno's of 'to_rel' partition, each of which
* may be either a leaf partition or a partitioned table, but both of which
* must be from the same partitioning hierarchy.
*
* Even though all of the same column names must be present in all relations
* in the hierarchy, and they must also have the same types, the attnos may
* be different.
*
* Checks if the new partition's bound overlaps any of the existing partitions
* of parent. Also performs additional checks as necessary per strategy.
*/
void
check_new_partition_bound(char *relname, Relation parent,
PartitionBoundSpec *spec)
{
PartitionKey key = RelationGetPartitionKey(parent);
PartitionDesc partdesc = RelationGetPartitionDesc(parent);
PartitionBoundInfo boundinfo = partdesc->boundinfo;
ParseState *pstate = make_parsestate(NULL);
int with = -1;
bool overlap = false;
if (spec->is_default)
{
/*
* The default partition bound never conflicts with any other
* partition's; if that's what we're attaching, the only possible
* problem is that one already exists, so check for that and we're
* done.
*/
if (boundinfo == NULL || !partition_bound_has_default(boundinfo))
return;
/* Default partition already exists, error out. */
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("partition \"%s\" conflicts with existing default partition \"%s\"",
relname, get_rel_name(partdesc->oids[boundinfo->default_index])),
parser_errposition(pstate, spec->location)));
}
switch (key->strategy)
{
case PARTITION_STRATEGY_HASH:
{
Assert(spec->strategy == PARTITION_STRATEGY_HASH);
Assert(spec->remainder >= 0 && spec->remainder < spec->modulus);
if (partdesc->nparts > 0)
{
PartitionBoundInfo boundinfo = partdesc->boundinfo;
Datum **datums = boundinfo->datums;
int ndatums = boundinfo->ndatums;
int greatest_modulus;
int remainder;
int offset;
bool valid_modulus = true;
int prev_modulus, /* Previous largest modulus */
next_modulus; /* Next largest modulus */
/*
* Check rule that every modulus must be a factor of the
* next larger modulus. For example, if you have a bunch
* of partitions that all have modulus 5, you can add a
* new partition with modulus 10 or a new partition with
* modulus 15, but you cannot add both a partition with
* modulus 10 and a partition with modulus 15, because 10
* is not a factor of 15.
*
* Get the greatest (modulus, remainder) pair contained in
* boundinfo->datums that is less than or equal to the
* (spec->modulus, spec->remainder) pair.
*/
offset = partition_hash_bsearch(boundinfo,
spec->modulus,
spec->remainder);
if (offset < 0)
{
next_modulus = DatumGetInt32(datums[0][0]);
valid_modulus = (next_modulus % spec->modulus) == 0;
}
else
{
prev_modulus = DatumGetInt32(datums[offset][0]);
valid_modulus = (spec->modulus % prev_modulus) == 0;
if (valid_modulus && (offset + 1) < ndatums)
{
next_modulus = DatumGetInt32(datums[offset + 1][0]);
valid_modulus = (next_modulus % spec->modulus) == 0;
}
}
if (!valid_modulus)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("every hash partition modulus must be a factor of the next larger modulus")));
greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
remainder = spec->remainder;
/*
* Normally, the lowest remainder that could conflict with
* the new partition is equal to the remainder specified
* for the new partition, but when the new partition has a
* modulus higher than any used so far, we need to adjust.
*/
if (remainder >= greatest_modulus)
remainder = remainder % greatest_modulus;
/* Check every potentially-conflicting remainder. */
do
{
if (boundinfo->indexes[remainder] != -1)
{
overlap = true;
with = boundinfo->indexes[remainder];
break;
}
remainder += spec->modulus;
} while (remainder < greatest_modulus);
}
break;
}
case PARTITION_STRATEGY_LIST:
{
Assert(spec->strategy == PARTITION_STRATEGY_LIST);
if (partdesc->nparts > 0)
{
ListCell *cell;
Assert(boundinfo &&
boundinfo->strategy == PARTITION_STRATEGY_LIST &&
(boundinfo->ndatums > 0 ||
partition_bound_accepts_nulls(boundinfo) ||
partition_bound_has_default(boundinfo)));
foreach(cell, spec->listdatums)
{
Const *val = castNode(Const, lfirst(cell));
if (!val->constisnull)
{
int offset;
bool equal;
offset = partition_list_bsearch(key->partsupfunc,
key->partcollation,
boundinfo,
val->constvalue,
&equal);
if (offset >= 0 && equal)
{
overlap = true;
with = boundinfo->indexes[offset];
break;
}
}
else if (partition_bound_accepts_nulls(boundinfo))
{
overlap = true;
with = boundinfo->null_index;
break;
}
}
}
break;
}
case PARTITION_STRATEGY_RANGE:
{
PartitionRangeBound *lower,
*upper;
Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
lower = make_one_range_bound(key, -1, spec->lowerdatums, true);
upper = make_one_range_bound(key, -1, spec->upperdatums, false);
/*
* First check if the resulting range would be empty with
* specified lower and upper bounds
*/
if (partition_rbound_cmp(key->partnatts, key->partsupfunc,
key->partcollation, lower->datums,
lower->kind, true, upper) >= 0)
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("empty range bound specified for partition \"%s\"",
relname),
errdetail("Specified lower bound %s is greater than or equal to upper bound %s.",
get_range_partbound_string(spec->lowerdatums),
get_range_partbound_string(spec->upperdatums)),
parser_errposition(pstate, spec->location)));
}
if (partdesc->nparts > 0)
{
PartitionBoundInfo boundinfo = partdesc->boundinfo;
int offset;
bool equal;
Assert(boundinfo &&
boundinfo->strategy == PARTITION_STRATEGY_RANGE &&
(boundinfo->ndatums > 0 ||
partition_bound_has_default(boundinfo)));
/*
* Test whether the new lower bound (which is treated
* inclusively as part of the new partition) lies inside
* an existing partition, or in a gap.
*
* If it's inside an existing partition, the bound at
* offset + 1 will be the upper bound of that partition,
* and its index will be >= 0.
*
* If it's in a gap, the bound at offset + 1 will be the
* lower bound of the next partition, and its index will
* be -1. This is also true if there is no next partition,
* since the index array is initialised with an extra -1
* at the end.
*/
offset = partition_range_bsearch(key->partnatts,
key->partsupfunc,
key->partcollation,
boundinfo, lower,
&equal);
if (boundinfo->indexes[offset + 1] < 0)
{
/*
* Check that the new partition will fit in the gap.
* For it to fit, the new upper bound must be less
* than or equal to the lower bound of the next
* partition, if there is one.
*/
if (offset + 1 < boundinfo->ndatums)
{
int32 cmpval;
Datum *datums;
PartitionRangeDatumKind *kind;
bool is_lower;
datums = boundinfo->datums[offset + 1];
kind = boundinfo->kind[offset + 1];
is_lower = (boundinfo->indexes[offset + 1] == -1);
cmpval = partition_rbound_cmp(key->partnatts,
key->partsupfunc,
key->partcollation,
datums, kind,
is_lower, upper);
if (cmpval < 0)
{
/*
* The new partition overlaps with the
* existing partition between offset + 1 and
* offset + 2.
*/
overlap = true;
with = boundinfo->indexes[offset + 2];
}
}
}
else
{
/*
* The new partition overlaps with the existing
* partition between offset and offset + 1.
*/
overlap = true;
with = boundinfo->indexes[offset + 1];
}
}
break;
}
default:
elog(ERROR, "unexpected partition strategy: %d",
(int) key->strategy);
}
if (overlap)
{
Assert(with >= 0);
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("partition \"%s\" would overlap partition \"%s\"",
relname, get_rel_name(partdesc->oids[with])),
parser_errposition(pstate, spec->location)));
}
}
/*
* check_default_allows_bound
*
* This function checks if there exists a row in the default partition that
* would properly belong to the new partition being added. If it finds one,
* it throws an error.
*/
void
check_default_allows_bound(Relation parent, Relation default_rel,
PartitionBoundSpec *new_spec)
{
List *new_part_constraints;
List *def_part_constraints;
List *all_parts;
ListCell *lc;
new_part_constraints = (new_spec->strategy == PARTITION_STRATEGY_LIST)
? get_qual_for_list(parent, new_spec)
: get_qual_for_range(parent, new_spec, false);
def_part_constraints =
get_proposed_default_constraint(new_part_constraints);
/*
* If the existing constraints on the default partition imply that it will
* not contain any row that would belong to the new partition, we can
* avoid scanning the default partition.
*/
if (PartConstraintImpliedByRelConstraint(default_rel, def_part_constraints))
{
ereport(INFO,
(errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints",
RelationGetRelationName(default_rel))));
return;
}
/*
* Scan the default partition and its subpartitions, and check for rows
* that do not satisfy the revised partition constraints.
*/
if (default_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
all_parts = find_all_inheritors(RelationGetRelid(default_rel),
AccessExclusiveLock, NULL);
else
all_parts = list_make1_oid(RelationGetRelid(default_rel));
foreach(lc, all_parts)
{
Oid part_relid = lfirst_oid(lc);
Relation part_rel;
Expr *constr;
Expr *partition_constraint;
EState *estate;
HeapTuple tuple;
ExprState *partqualstate = NULL;
Snapshot snapshot;
TupleDesc tupdesc;
ExprContext *econtext;
HeapScanDesc scan;
MemoryContext oldCxt;
TupleTableSlot *tupslot;
/* Lock already taken above. */
if (part_relid != RelationGetRelid(default_rel))
{
part_rel = heap_open(part_relid, NoLock);
/*
* If the partition constraints on default partition child imply
* that it will not contain any row that would belong to the new
* partition, we can avoid scanning the child table.
*/
if (PartConstraintImpliedByRelConstraint(part_rel,
def_part_constraints))
{
ereport(INFO,
(errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints",
RelationGetRelationName(part_rel))));
heap_close(part_rel, NoLock);
continue;
}
}
else
part_rel = default_rel;
/*
* Only RELKIND_RELATION relations (i.e. leaf partitions) need to be
* scanned.
*/
if (part_rel->rd_rel->relkind != RELKIND_RELATION)
{
if (part_rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
ereport(WARNING,
(errcode(ERRCODE_CHECK_VIOLATION),
errmsg("skipped scanning foreign table \"%s\" which is a partition of default partition \"%s\"",
RelationGetRelationName(part_rel),
RelationGetRelationName(default_rel))));
if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
heap_close(part_rel, NoLock);
continue;
}
tupdesc = CreateTupleDescCopy(RelationGetDescr(part_rel));
constr = linitial(def_part_constraints);
partition_constraint = (Expr *)
map_partition_varattnos((List *) constr,
1, part_rel, parent, NULL);
estate = CreateExecutorState();
/* Build expression execution states for partition check quals */
partqualstate = ExecPrepareExpr(partition_constraint, estate);
econtext = GetPerTupleExprContext(estate);
snapshot = RegisterSnapshot(GetLatestSnapshot());
scan = heap_beginscan(part_rel, snapshot, 0, NULL);
tupslot = MakeSingleTupleTableSlot(tupdesc);
/*
* Switch to per-tuple memory context and reset it for each tuple
* produced, so we don't leak memory.
*/
oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
{
ExecStoreTuple(tuple, tupslot, InvalidBuffer, false);
econtext->ecxt_scantuple = tupslot;
if (!ExecCheck(partqualstate, econtext))
ereport(ERROR,
(errcode(ERRCODE_CHECK_VIOLATION),
errmsg("updated partition constraint for default partition \"%s\" would be violated by some row",
RelationGetRelationName(default_rel))));
ResetExprContext(econtext);
CHECK_FOR_INTERRUPTS();
}
MemoryContextSwitchTo(oldCxt);
heap_endscan(scan);
UnregisterSnapshot(snapshot);
ExecDropSingleTupleTableSlot(tupslot);
FreeExecutorState(estate);
if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
heap_close(part_rel, NoLock); /* keep the lock until commit */
}
}
/*
* get_partition_parent
* Obtain direct parent of given relation
*
* Returns inheritance parent of a partition by scanning pg_inherits
*
* Note: Because this function assumes that the relation whose OID is passed
* as an argument will have precisely one parent, it should only be called
* when it is known that the relation is a partition.
*/
Oid
get_partition_parent(Oid relid)
{
Relation catalogRelation;
Oid result;
catalogRelation = heap_open(InheritsRelationId, AccessShareLock);
result = get_partition_parent_worker(catalogRelation, relid);
if (!OidIsValid(result))
elog(ERROR, "could not find tuple for parent of relation %u", relid);
heap_close(catalogRelation, AccessShareLock);
return result;
}
/*
* get_partition_parent_worker
* Scan the pg_inherits relation to return the OID of the parent of the
* given relation
*/
static Oid
get_partition_parent_worker(Relation inhRel, Oid relid)
{
SysScanDesc scan;
ScanKeyData key[2];
Oid result = InvalidOid;
HeapTuple tuple;
ScanKeyInit(&key[0],
Anum_pg_inherits_inhrelid,
BTEqualStrategyNumber, F_OIDEQ,
ObjectIdGetDatum(relid));
ScanKeyInit(&key[1],
Anum_pg_inherits_inhseqno,
BTEqualStrategyNumber, F_INT4EQ,
Int32GetDatum(1));
scan = systable_beginscan(inhRel, InheritsRelidSeqnoIndexId, true,
NULL, 2, key);
tuple = systable_getnext(scan);
if (HeapTupleIsValid(tuple))
{
Form_pg_inherits form = (Form_pg_inherits) GETSTRUCT(tuple);
result = form->inhparent;
}
systable_endscan(scan);
return result;
}
/*
* get_partition_ancestors
* Obtain ancestors of given relation
*
* Returns a list of ancestors of the given relation.
*
* Note: Because this function assumes that the relation whose OID is passed
* as an argument and each ancestor will have precisely one parent, it should
* only be called when it is known that the relation is a partition.
*/
List *
get_partition_ancestors(Oid relid)
{
List *result = NIL;
Relation inhRel;
inhRel = heap_open(InheritsRelationId, AccessShareLock);
get_partition_ancestors_worker(inhRel, relid, &result);
heap_close(inhRel, AccessShareLock);
return result;
}
/*
* get_partition_ancestors_worker
* recursive worker for get_partition_ancestors
*/
static void
get_partition_ancestors_worker(Relation inhRel, Oid relid, List **ancestors)
{
Oid parentOid;
/* Recursion ends at the topmost level, ie., when there's no parent */
parentOid = get_partition_parent_worker(inhRel, relid);
if (parentOid == InvalidOid)
return;
*ancestors = lappend_oid(*ancestors, parentOid);
get_partition_ancestors_worker(inhRel, parentOid, ancestors);
}
/*
* get_qual_from_partbound
* Given a parser node for partition bound, return the list of executable
* expressions as partition constraint
*/
List *
get_qual_from_partbound(Relation rel, Relation parent,
PartitionBoundSpec *spec)
{
PartitionKey key = RelationGetPartitionKey(parent);
List *my_qual = NIL;
Assert(key != NULL);
switch (key->strategy)
{
case PARTITION_STRATEGY_HASH:
Assert(spec->strategy == PARTITION_STRATEGY_HASH);
my_qual = get_qual_for_hash(parent, spec);
break;
case PARTITION_STRATEGY_LIST:
Assert(spec->strategy == PARTITION_STRATEGY_LIST);
my_qual = get_qual_for_list(parent, spec);
break;
case PARTITION_STRATEGY_RANGE:
Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
my_qual = get_qual_for_range(parent, spec, false);
break;
default:
elog(ERROR, "unexpected partition strategy: %d",
(int) key->strategy);
}
return my_qual;
}
/*
* map_partition_varattnos - maps varattno of any Vars in expr from the
* attno's of 'from_rel' to the attno's of 'to_rel' partition, each of which
* may be either a leaf partition or a partitioned table, but both of which
* must be from the same partitioning hierarchy.
*
* Even though all of the same column names must be present in all relations
* in the hierarchy, and they must also have the same types, the attnos may
* be different.
*
* If found_whole_row is not NULL, *found_whole_row returns whether a
* whole-row variable was found in the input expression.
*
* Note: this will work on any node tree, so really the argument and result
* should be declared "Node *". But a substantial majority of the callers
* are working on Lists, so it's less messy to do the casts internally.
*/
List *
map_partition_varattnos(List *expr, int fromrel_varno,
Relation to_rel, Relation from_rel,
bool *found_whole_row)
{
bool my_found_whole_row = false;
if (expr != NIL)
{
AttrNumber *part_attnos;
part_attnos = convert_tuples_by_name_map(RelationGetDescr(to_rel),
RelationGetDescr(from_rel),
gettext_noop("could not convert row type"));
expr = (List *) map_variable_attnos((Node *) expr,
fromrel_varno, 0,
part_attnos,
RelationGetDescr(from_rel)->natts,
RelationGetForm(to_rel)->reltype,
&my_found_whole_row);
}
if (found_whole_row)
*found_whole_row = my_found_whole_row;
return expr;
}
/*
* RelationGetPartitionQual
*
* Returns a list of partition quals
* If found_whole_row is not NULL, *found_whole_row returns whether a
* whole-row variable was found in the input expression.
*
* Note: this will work on any node tree, so really the argument and result
* should be declared "Node *". But a substantial majority of the callers
* are working on Lists, so it's less messy to do the casts internally.
*/
List *
RelationGetPartitionQual(Relation rel)
{
/* Quick exit */
if (!rel->rd_rel->relispartition)
return NIL;
return generate_partition_qual(rel);
}
/*
* get_partition_qual_relid
*
* Returns an expression tree describing the passed-in relation's partition
* constraint. If there is no partition constraint returns NULL; this can
* happen if the default partition is the only partition.
*/
Expr *
get_partition_qual_relid(Oid relid)
{
Relation rel = heap_open(relid, AccessShareLock);
Expr *result = NULL;
List *and_args;
/* Do the work only if this relation is a partition. */
if (rel->rd_rel->relispartition)
{
and_args = generate_partition_qual(rel);
if (and_args == NIL)
result = NULL;
else if (list_length(and_args) > 1)
result = makeBoolExpr(AND_EXPR, and_args, -1);
else
result = linitial(and_args);
}
/* Keep the lock. */
heap_close(rel, NoLock);
return result;
}
/*
* get_partition_operator
*
* Return oid of the operator of given strategy for a given partition key
* column.
*/
static Oid
get_partition_operator(PartitionKey key, int col, StrategyNumber strategy,
bool *need_relabel)
{
Oid operoid;
/*
* First check if there exists an operator of the given strategy, with
* this column's type as both its lefttype and righttype, in the
* partitioning operator family specified for the column.
*/
operoid = get_opfamily_member(key->partopfamily[col],
key->parttypid[col],
key->parttypid[col],
strategy);
/*
* If one doesn't exist, we must resort to using an operator in the same
* operator family but with the operator class declared input type. It is
* OK to do so, because the column's type is known to be binary-coercible
* with the operator class input type (otherwise, the operator class in
* question would not have been accepted as the partitioning operator
* class). We must however inform the caller to wrap the non-Const
* expression with a RelabelType node to denote the implicit coercion. It
* ensures that the resulting expression structurally matches similarly
* processed expressions within the optimizer.
*/
if (!OidIsValid(operoid))
{
operoid = get_opfamily_member(key->partopfamily[col],
key->partopcintype[col],
key->partopcintype[col],
strategy);
if (!OidIsValid(operoid))
elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
strategy, key->partopcintype[col], key->partopcintype[col],
key->partopfamily[col]);
*need_relabel = true;
}
else
*need_relabel = false;
return operoid;
}
/*
* make_partition_op_expr
* Returns an Expr for the given partition key column with arg1 and
* arg2 as its leftop and rightop, respectively
*/
static Expr *
make_partition_op_expr(PartitionKey key, int keynum,
uint16 strategy, Expr *arg1, Expr *arg2)
{
Oid operoid;
bool need_relabel = false;
Expr *result = NULL;
/* Get the correct btree operator for this partitioning column */
operoid = get_partition_operator(key, keynum, strategy, &need_relabel);
/*
* Chosen operator may be such that the non-Const operand needs to be
* coerced, so apply the same; see the comment in
* get_partition_operator().
*/
if (!IsA(arg1, Const) &&
(need_relabel ||
key->partcollation[keynum] != key->parttypcoll[keynum]))
arg1 = (Expr *) makeRelabelType(arg1,
key->partopcintype[keynum],
-1,
key->partcollation[keynum],
COERCE_EXPLICIT_CAST);
/* Generate the actual expression */
switch (key->strategy)
{
case PARTITION_STRATEGY_LIST:
{
List *elems = (List *) arg2;
int nelems = list_length(elems);
Assert(nelems >= 1);
Assert(keynum == 0);
if (nelems > 1 &&
!type_is_array(key->parttypid[keynum]))
{
ArrayExpr *arrexpr;
ScalarArrayOpExpr *saopexpr;
/* Construct an ArrayExpr for the right-hand inputs */
arrexpr = makeNode(ArrayExpr);
arrexpr->array_typeid =
get_array_type(key->parttypid[keynum]);
arrexpr->array_collid = key->parttypcoll[keynum];
arrexpr->element_typeid = key->parttypid[keynum];
arrexpr->elements = elems;
arrexpr->multidims = false;
arrexpr->location = -1;
/* Build leftop = ANY (rightop) */
saopexpr = makeNode(ScalarArrayOpExpr);
saopexpr->opno = operoid;
saopexpr->opfuncid = get_opcode(operoid);
saopexpr->useOr = true;
saopexpr->inputcollid = key->partcollation[keynum];
saopexpr->args = list_make2(arg1, arrexpr);
saopexpr->location = -1;
result = (Expr *) saopexpr;
}
else
{
List *elemops = NIL;
ListCell *lc;
foreach (lc, elems)
{
Expr *elem = lfirst(lc),
*elemop;
elemop = make_opclause(operoid,
BOOLOID,
false,
arg1, elem,
InvalidOid,
key->partcollation[keynum]);
elemops = lappend(elemops, elemop);
}
result = nelems > 1 ? makeBoolExpr(OR_EXPR, elemops, -1) : linitial(elemops);
}
break;
}
case PARTITION_STRATEGY_RANGE:
result = make_opclause(operoid,
BOOLOID,
false,
arg1, arg2,
InvalidOid,
key->partcollation[keynum]);
break;
default:
elog(ERROR, "invalid partitioning strategy");
break;
}
return result;
}
/*
* get_qual_for_hash
*
* Returns a CHECK constraint expression to use as a hash partition's
* constraint, given the parent relation and partition bound structure.
*
* The partition constraint for a hash partition is always a call to the
* built-in function satisfies_hash_partition().
*/
static List *
get_qual_for_hash(Relation parent, PartitionBoundSpec *spec)
{
PartitionKey key = RelationGetPartitionKey(parent);
FuncExpr *fexpr;
Node *relidConst;
Node *modulusConst;
Node *remainderConst;
List *args;
ListCell *partexprs_item;
int i;
/* Fixed arguments. */
relidConst = (Node *) makeConst(OIDOID,
-1,
InvalidOid,
sizeof(Oid),
ObjectIdGetDatum(RelationGetRelid(parent)),
false,
true);
modulusConst = (Node *) makeConst(INT4OID,
-1,
InvalidOid,
sizeof(int32),
Int32GetDatum(spec->modulus),
false,
true);
remainderConst = (Node *) makeConst(INT4OID,
-1,
InvalidOid,
sizeof(int32),
Int32GetDatum(spec->remainder),
false,
true);
args = list_make3(relidConst, modulusConst, remainderConst);
partexprs_item = list_head(key->partexprs);
/* Add an argument for each key column. */
for (i = 0; i < key->partnatts; i++)
{
Node *keyCol;
/* Left operand */
if (key->partattrs[i] != 0)
{
keyCol = (Node *) makeVar(1,
key->partattrs[i],
key->parttypid[i],
key->parttypmod[i],
key->parttypcoll[i],
0);
}
else
{
keyCol = (Node *) copyObject(lfirst(partexprs_item));
partexprs_item = lnext(partexprs_item);
}
args = lappend(args, keyCol);
}
fexpr = makeFuncExpr(F_SATISFIES_HASH_PARTITION,
BOOLOID,
args,
InvalidOid,
InvalidOid,
COERCE_EXPLICIT_CALL);
return list_make1(fexpr);
}
/*
* get_qual_for_list
*
* Returns an implicit-AND list of expressions to use as a list partition's
* constraint, given the parent relation and partition bound structure.
*
* The function returns NIL for a default partition when it's the only
* partition since in that case there is no constraint.
*/
static List *
get_qual_for_list(Relation parent, PartitionBoundSpec *spec)
{
PartitionKey key = RelationGetPartitionKey(parent);
List *result;
Expr *keyCol;
Expr *opexpr;
NullTest *nulltest;
ListCell *cell;
List *elems = NIL;
bool list_has_null = false;
/*
* Only single-column list partitioning is supported, so we are worried
* only about the partition key with index 0.
*/
Assert(key->partnatts == 1);
/* Construct Var or expression representing the partition column */
if (key->partattrs[0] != 0)
keyCol = (Expr *) makeVar(1,
key->partattrs[0],
key->parttypid[0],
key->parttypmod[0],
key->parttypcoll[0],
0);
else
keyCol = (Expr *) copyObject(linitial(key->partexprs));
/*
* For default list partition, collect datums for all the partitions. The
* default partition constraint should check that the partition key is
* equal to none of those.
*/
if (spec->is_default)
{
int i;
int ndatums = 0;
PartitionDesc pdesc = RelationGetPartitionDesc(parent);
PartitionBoundInfo boundinfo = pdesc->boundinfo;
if (boundinfo)
{
ndatums = boundinfo->ndatums;
if (partition_bound_accepts_nulls(boundinfo))
list_has_null = true;
}
/*
* If default is the only partition, there need not be any partition
* constraint on it.
*/
if (ndatums == 0 && !list_has_null)
return NIL;
for (i = 0; i < ndatums; i++)
{
Const *val;
/*
* Construct Const from known-not-null datum. We must be careful
* to copy the value, because our result has to be able to outlive
* the relcache entry we're copying from.
*/
val = makeConst(key->parttypid[0],
key->parttypmod[0],
key->parttypcoll[0],
key->parttyplen[0],
datumCopy(*boundinfo->datums[i],
key->parttypbyval[0],
key->parttyplen[0]),
false, /* isnull */
key->parttypbyval[0]);
elems = lappend(elems, val);
}
}
else
{
/*
* Create list of Consts for the allowed values, excluding any nulls.
*/
foreach(cell, spec->listdatums)
{
Const *val = castNode(Const, lfirst(cell));
if (val->constisnull)
list_has_null = true;
else
elems = lappend(elems, copyObject(val));
}
}
if (elems)
{
/*
* Generate the operator expression from the non-null partition
* values.
*/
opexpr = make_partition_op_expr(key, 0, BTEqualStrategyNumber,
keyCol, (Expr *) elems);
}
else
{
/*
* If there are no partition values, we don't need an operator
* expression.
*/
opexpr = NULL;
}
if (!list_has_null)
{
/*
* Gin up a "col IS NOT NULL" test that will be AND'd with the main
* expression. This might seem redundant, but the partition routing
* machinery needs it.
*/
nulltest = makeNode(NullTest);
nulltest->arg = keyCol;
nulltest->nulltesttype = IS_NOT_NULL;
nulltest->argisrow = false;
nulltest->location = -1;
result = opexpr ? list_make2(nulltest, opexpr) : list_make1(nulltest);
}
else
{
/*
* Gin up a "col IS NULL" test that will be OR'd with the main
* expression.
*/
nulltest = makeNode(NullTest);
nulltest->arg = keyCol;
nulltest->nulltesttype = IS_NULL;
nulltest->argisrow = false;
nulltest->location = -1;
if (opexpr)
{
Expr *or;
or = makeBoolExpr(OR_EXPR, list_make2(nulltest, opexpr), -1);
result = list_make1(or);
}
else
result = list_make1(nulltest);
}
/*
* Note that, in general, applying NOT to a constraint expression doesn't
* necessarily invert the set of rows it accepts, because NOT (NULL) is
* NULL. However, the partition constraints we construct here never
* evaluate to NULL, so applying NOT works as intended.
*/
if (spec->is_default)
{
result = list_make1(make_ands_explicit(result));
result = list_make1(makeBoolExpr(NOT_EXPR, result, -1));
}
return result;
}
/*
* get_range_key_properties
* Returns range partition key information for a given column
*
* This is a subroutine for get_qual_for_range, and its API is pretty
* specialized to that caller.
*
* Constructs an Expr for the key column (returned in *keyCol) and Consts
* for the lower and upper range limits (returned in *lower_val and
* *upper_val). For MINVALUE/MAXVALUE limits, NULL is returned instead of
* a Const. All of these structures are freshly palloc'd.
*
* *partexprs_item points to the cell containing the next expression in
* the key->partexprs list, or NULL. It may be advanced upon return.
*/
static void
get_range_key_properties(PartitionKey key, int keynum,
PartitionRangeDatum *ldatum,
PartitionRangeDatum *udatum,
ListCell **partexprs_item,
Expr **keyCol,
Const **lower_val, Const **upper_val)
{
/* Get partition key expression for this column */
if (key->partattrs[keynum] != 0)
{
*keyCol = (Expr *) makeVar(1,
key->partattrs[keynum],
key->parttypid[keynum],
key->parttypmod[keynum],
key->parttypcoll[keynum],
0);
}
else
{
if (*partexprs_item == NULL)
elog(ERROR, "wrong number of partition key expressions");
*keyCol = copyObject(lfirst(*partexprs_item));
*partexprs_item = lnext(*partexprs_item);
}
/* Get appropriate Const nodes for the bounds */
if (ldatum->kind == PARTITION_RANGE_DATUM_VALUE)
*lower_val = castNode(Const, copyObject(ldatum->value));
else
*lower_val = NULL;
if (udatum->kind == PARTITION_RANGE_DATUM_VALUE)
*upper_val = castNode(Const, copyObject(udatum->value));
else
*upper_val = NULL;
}
/*
* get_range_nulltest
*
* A non-default range partition table does not currently allow partition
* keys to be null, so emit an IS NOT NULL expression for each key column.
*/
static List *
get_range_nulltest(PartitionKey key)
{
List *result = NIL;
NullTest *nulltest;
ListCell *partexprs_item;
int i;
partexprs_item = list_head(key->partexprs);
for (i = 0; i < key->partnatts; i++)
{
Expr *keyCol;
if (key->partattrs[i] != 0)
{
keyCol = (Expr *) makeVar(1,
key->partattrs[i],
key->parttypid[i],
key->parttypmod[i],
key->parttypcoll[i],
0);
}
else
{
if (partexprs_item == NULL)
elog(ERROR, "wrong number of partition key expressions");
keyCol = copyObject(lfirst(partexprs_item));
partexprs_item = lnext(partexprs_item);
}
nulltest = makeNode(NullTest);
nulltest->arg = keyCol;
nulltest->nulltesttype = IS_NOT_NULL;
nulltest->argisrow = false;
nulltest->location = -1;
result = lappend(result, nulltest);
}
return result;
}
/*
* get_qual_for_range
*
* Returns an implicit-AND list of expressions to use as a range partition's
* constraint, given the parent relation and partition bound structure.
*
* For a multi-column range partition key, say (a, b, c), with (al, bl, cl)
* as the lower bound tuple and (au, bu, cu) as the upper bound tuple, we
* generate an expression tree of the following form:
*
* (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
* AND
* (a > al OR (a = al AND b > bl) OR (a = al AND b = bl AND c >= cl))
* AND
* (a < au OR (a = au AND b < bu) OR (a = au AND b = bu AND c < cu))
*
* It is often the case that a prefix of lower and upper bound tuples contains
* the same values, for example, (al = au), in which case, we will emit an
* expression tree of the following form:
*
* (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
* AND
* (a = al)
* AND
* (b > bl OR (b = bl AND c >= cl))
* AND
* (b < bu) OR (b = bu AND c < cu))
*
* If a bound datum is either MINVALUE or MAXVALUE, these expressions are
* simplified using the fact that any value is greater than MINVALUE and less
* than MAXVALUE. So, for example, if cu = MAXVALUE, c < cu is automatically
* true, and we need not emit any expression for it, and the last line becomes
*
* (b < bu) OR (b = bu), which is simplified to (b <= bu)
*
* In most common cases with only one partition column, say a, the following
* expression tree will be generated: a IS NOT NULL AND a >= al AND a < au
*
* For default partition, it returns the negation of the constraints of all
* the other partitions.
*
* External callers should pass for_default as false; we set it to true only
* when recursing.
*/
static List *
get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
bool for_default)
{
List *result = NIL;
ListCell *cell1,
*cell2,
*partexprs_item,
*partexprs_item_saved;
int i,
j;
PartitionRangeDatum *ldatum,
*udatum;
PartitionKey key = RelationGetPartitionKey(parent);
Expr *keyCol;
Const *lower_val,
*upper_val;
List *lower_or_arms,
*upper_or_arms;
int num_or_arms,
current_or_arm;
ListCell *lower_or_start_datum,
*upper_or_start_datum;
bool need_next_lower_arm,
need_next_upper_arm;
if (spec->is_default)
{
List *or_expr_args = NIL;
PartitionDesc pdesc = RelationGetPartitionDesc(parent);
Oid *inhoids = pdesc->oids;
int nparts = pdesc->nparts,
i;
for (i = 0; i < nparts; i++)
{
Oid inhrelid = inhoids[i];
HeapTuple tuple;
Datum datum;
bool isnull;
PartitionBoundSpec *bspec;
tuple = SearchSysCache1(RELOID, inhrelid);
if (!HeapTupleIsValid(tuple))
elog(ERROR, "cache lookup failed for relation %u", inhrelid);
datum = SysCacheGetAttr(RELOID, tuple,
Anum_pg_class_relpartbound,
&isnull);
Assert(!isnull);
bspec = (PartitionBoundSpec *)
stringToNode(TextDatumGetCString(datum));
if (!IsA(bspec, PartitionBoundSpec))
elog(ERROR, "expected PartitionBoundSpec");
if (!bspec->is_default)
{
List *part_qual;
part_qual = get_qual_for_range(parent, bspec, true);
/*
* AND the constraints of the partition and add to
* or_expr_args
*/
or_expr_args = lappend(or_expr_args, list_length(part_qual) > 1
? makeBoolExpr(AND_EXPR, part_qual, -1)
: linitial(part_qual));
}
ReleaseSysCache(tuple);
}
if (or_expr_args != NIL)
{
Expr *other_parts_constr;
/*
* Combine the constraints obtained for non-default partitions
* using OR. As requested, each of the OR's args doesn't include
* the NOT NULL test for partition keys (which is to avoid its
* useless repetition). Add the same now.
*/
other_parts_constr =
makeBoolExpr(AND_EXPR,
lappend(get_range_nulltest(key),
list_length(or_expr_args) > 1
? makeBoolExpr(OR_EXPR, or_expr_args,
-1)
: linitial(or_expr_args)),
-1);
/*
* Finally, the default partition contains everything *NOT*
* contained in the non-default partitions.
*/
result = list_make1(makeBoolExpr(NOT_EXPR,
list_make1(other_parts_constr), -1));
}
return result;
}
lower_or_start_datum = list_head(spec->lowerdatums);
upper_or_start_datum = list_head(spec->upperdatums);
num_or_arms = key->partnatts;
/*
* If it is the recursive call for default, we skip the get_range_nulltest
* to avoid accumulating the NullTest on the same keys for each partition.
*/
if (!for_default)
result = get_range_nulltest(key);
/*
* Iterate over the key columns and check if the corresponding lower and
* upper datums are equal using the btree equality operator for the
* column's type. If equal, we emit single keyCol = common_value
* expression. Starting from the first column for which the corresponding
* lower and upper bound datums are not equal, we generate OR expressions
* as shown in the function's header comment.
*/
i = 0;
partexprs_item = list_head(key->partexprs);
partexprs_item_saved = partexprs_item; /* placate compiler */
forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
{
EState *estate;
MemoryContext oldcxt;
Expr *test_expr;
ExprState *test_exprstate;
Datum test_result;
bool isNull;
ldatum = castNode(PartitionRangeDatum, lfirst(cell1));
udatum = castNode(PartitionRangeDatum, lfirst(cell2));
/*
* Since get_range_key_properties() modifies partexprs_item, and we
* might need to start over from the previous expression in the later
* part of this function, save away the current value.
*/
partexprs_item_saved = partexprs_item;
get_range_key_properties(key, i, ldatum, udatum,
&partexprs_item,
&keyCol,
&lower_val, &upper_val);
/*
* If either value is NULL, the corresponding partition bound is
* either MINVALUE or MAXVALUE, and we treat them as unequal, because
* even if they're the same, there is no common value to equate the
* key column with.
*/
if (!lower_val || !upper_val)
break;
/* Create the test expression */
estate = CreateExecutorState();
oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
test_expr = make_partition_op_expr(key, i, BTEqualStrategyNumber,
(Expr *) lower_val,
(Expr *) upper_val);
fix_opfuncids((Node *) test_expr);
test_exprstate = ExecInitExpr(test_expr, NULL);
test_result = ExecEvalExprSwitchContext(test_exprstate,
GetPerTupleExprContext(estate),
&isNull);
MemoryContextSwitchTo(oldcxt);
FreeExecutorState(estate);
/* If not equal, go generate the OR expressions */
if (!DatumGetBool(test_result))
break;
/*
* The bounds for the last key column can't be equal, because such a
* range partition would never be allowed to be defined (it would have
* an empty range otherwise).
*/
if (i == key->partnatts - 1)
elog(ERROR, "invalid range bound specification");
/* Equal, so generate keyCol = lower_val expression */
result = lappend(result,
make_partition_op_expr(key, i, BTEqualStrategyNumber,
keyCol, (Expr *) lower_val));
i++;
}
/* First pair of lower_val and upper_val that are not equal. */
lower_or_start_datum = cell1;
upper_or_start_datum = cell2;
/* OR will have as many arms as there are key columns left. */
num_or_arms = key->partnatts - i;
current_or_arm = 0;
lower_or_arms = upper_or_arms = NIL;
need_next_lower_arm = need_next_upper_arm = true;
while (current_or_arm < num_or_arms)
{
List *lower_or_arm_args = NIL,
*upper_or_arm_args = NIL;
/* Restart scan of columns from the i'th one */
j = i;
partexprs_item = partexprs_item_saved;
for_both_cell(cell1, lower_or_start_datum, cell2, upper_or_start_datum)
{
PartitionRangeDatum *ldatum_next = NULL,
*udatum_next = NULL;
ldatum = castNode(PartitionRangeDatum, lfirst(cell1));
if (lnext(cell1))
ldatum_next = castNode(PartitionRangeDatum,
lfirst(lnext(cell1)));
udatum = castNode(PartitionRangeDatum, lfirst(cell2));
if (lnext(cell2))
udatum_next = castNode(PartitionRangeDatum,
lfirst(lnext(cell2)));
get_range_key_properties(key, j, ldatum, udatum,
&partexprs_item,
&keyCol,
&lower_val, &upper_val);
if (need_next_lower_arm && lower_val)
{
uint16 strategy;
/*
* For the non-last columns of this arm, use the EQ operator.
* For the last column of this arm, use GT, unless this is the
* last column of the whole bound check, or the next bound
* datum is MINVALUE, in which case use GE.
*/
if (j - i < current_or_arm)
strategy = BTEqualStrategyNumber;
else if (j == key->partnatts - 1 ||
(ldatum_next &&
ldatum_next->kind == PARTITION_RANGE_DATUM_MINVALUE))
strategy = BTGreaterEqualStrategyNumber;
else
strategy = BTGreaterStrategyNumber;
lower_or_arm_args = lappend(lower_or_arm_args,
make_partition_op_expr(key, j,
strategy,
keyCol,
(Expr *) lower_val));
}
if (need_next_upper_arm && upper_val)
{
uint16 strategy;
/*
* For the non-last columns of this arm, use the EQ operator.
* For the last column of this arm, use LT, unless the next
* bound datum is MAXVALUE, in which case use LE.
*/
if (j - i < current_or_arm)
strategy = BTEqualStrategyNumber;
else if (udatum_next &&
udatum_next->kind == PARTITION_RANGE_DATUM_MAXVALUE)
strategy = BTLessEqualStrategyNumber;
else
strategy = BTLessStrategyNumber;
upper_or_arm_args = lappend(upper_or_arm_args,
make_partition_op_expr(key, j,
strategy,
keyCol,
(Expr *) upper_val));
}
/*
* Did we generate enough of OR's arguments? First arm considers
* the first of the remaining columns, second arm considers first
* two of the remaining columns, and so on.
*/
++j;
if (j - i > current_or_arm)
{
/*
* We must not emit any more arms if the new column that will
* be considered is unbounded, or this one was.
*/
if (!lower_val || !ldatum_next ||
ldatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
need_next_lower_arm = false;
if (!upper_val || !udatum_next ||
udatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
need_next_upper_arm = false;
break;
}
}
if (lower_or_arm_args != NIL)
lower_or_arms = lappend(lower_or_arms,
list_length(lower_or_arm_args) > 1
? makeBoolExpr(AND_EXPR, lower_or_arm_args, -1)
: linitial(lower_or_arm_args));
if (upper_or_arm_args != NIL)
upper_or_arms = lappend(upper_or_arms,
list_length(upper_or_arm_args) > 1
? makeBoolExpr(AND_EXPR, upper_or_arm_args, -1)
: linitial(upper_or_arm_args));
/* If no work to do in the next iteration, break away. */
if (!need_next_lower_arm && !need_next_upper_arm)
break;
++current_or_arm;
}
/*
* Generate the OR expressions for each of lower and upper bounds (if
* required), and append to the list of implicitly ANDed list of
* expressions.
*/
if (lower_or_arms != NIL)
result = lappend(result,
list_length(lower_or_arms) > 1
? makeBoolExpr(OR_EXPR, lower_or_arms, -1)
: linitial(lower_or_arms));
if (upper_or_arms != NIL)
result = lappend(result,
list_length(upper_or_arms) > 1
? makeBoolExpr(OR_EXPR, upper_or_arms, -1)
: linitial(upper_or_arms));
/*
* As noted above, for non-default, we return list with constant TRUE. If
* the result is NIL during the recursive call for default, it implies
* this is the only other partition which can hold every value of the key
* except NULL. Hence we return the NullTest result skipped earlier.
*/
if (result == NIL)
result = for_default
? get_range_nulltest(key)
: list_make1(makeBoolConst(true, false));
return result;
}
/*
* generate_partition_qual
*
* Generate partition predicate from rel's partition bound expression. The
* function returns a NIL list if there is no predicate.
*
* Result expression tree is stored CacheMemoryContext to ensure it survives
* as long as the relcache entry. But we should be running in a less long-lived
* working context. To avoid leaking cache memory if this routine fails partway
* through, we build in working memory and then copy the completed structure
* into cache memory.
*/
static List *
generate_partition_qual(Relation rel)
{
HeapTuple tuple;
MemoryContext oldcxt;
Datum boundDatum;
bool isnull;
PartitionBoundSpec *bound;
List *my_qual = NIL,
*result = NIL;
Relation parent;
bool found_whole_row;
/* Guard against stack overflow due to overly deep partition tree */
check_stack_depth();
/* Quick copy */
if (rel->rd_partcheck != NIL)
return copyObject(rel->rd_partcheck);
/* Grab at least an AccessShareLock on the parent table */
parent = heap_open(get_partition_parent(RelationGetRelid(rel)),
AccessShareLock);
/* Get pg_class.relpartbound */
tuple = SearchSysCache1(RELOID, RelationGetRelid(rel));
if (!HeapTupleIsValid(tuple))
elog(ERROR, "cache lookup failed for relation %u",
RelationGetRelid(rel));
boundDatum = SysCacheGetAttr(RELOID, tuple,
Anum_pg_class_relpartbound,
&isnull);
if (isnull) /* should not happen */
elog(ERROR, "relation \"%s\" has relpartbound = null",
RelationGetRelationName(rel));
bound = castNode(PartitionBoundSpec,
stringToNode(TextDatumGetCString(boundDatum)));
ReleaseSysCache(tuple);
my_qual = get_qual_from_partbound(rel, parent, bound);
/* Add the parent's quals to the list (if any) */
if (parent->rd_rel->relispartition)
result = list_concat(generate_partition_qual(parent), my_qual);
else
result = my_qual;
/*
* Change Vars to have partition's attnos instead of the parent's. We do
* this after we concatenate the parent's quals, because we want every Var
* in it to bear this relation's attnos. It's safe to assume varno = 1
* here.
*/
result = map_partition_varattnos(result, 1, rel, parent,
&found_whole_row);
/* There can never be a whole-row reference here */
if (found_whole_row)
elog(ERROR, "unexpected whole-row reference found in partition key");
/* Save a copy in the relcache */
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
rel->rd_partcheck = copyObject(result);
MemoryContextSwitchTo(oldcxt);
/* Keep the parent locked until commit */
heap_close(parent, NoLock);
return result;
}
/*
* get_partition_for_tuple
* Finds partition of relation which accepts the partition key specified
* in values and isnull
*
* Return value is index of the partition (>= 0 and < partdesc->nparts) if one
* found or -1 if none found.
*/
int
get_partition_for_tuple(Relation relation, Datum *values, bool *isnull)
{
int bound_offset;
int part_index = -1;
PartitionKey key = RelationGetPartitionKey(relation);
PartitionDesc partdesc = RelationGetPartitionDesc(relation);
/* Route as appropriate based on partitioning strategy. */
switch (key->strategy)
{
case PARTITION_STRATEGY_HASH:
{
PartitionBoundInfo boundinfo = partdesc->boundinfo;
int greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
uint64 rowHash = compute_hash_value(key->partnatts,
key->partsupfunc,
values, isnull);
part_index = boundinfo->indexes[rowHash % greatest_modulus];
}
break;
case PARTITION_STRATEGY_LIST:
if (isnull[0])
{
if (partition_bound_accepts_nulls(partdesc->boundinfo))
part_index = partdesc->boundinfo->null_index;
}
else
{
bool equal = false;
bound_offset = partition_list_bsearch(key->partsupfunc,
key->partcollation,
partdesc->boundinfo,
values[0], &equal);
if (bound_offset >= 0 && equal)
part_index = partdesc->boundinfo->indexes[bound_offset];
}
break;
case PARTITION_STRATEGY_RANGE:
{
bool equal = false,
range_partkey_has_null = false;
int i;
/*
* No range includes NULL, so this will be accepted by the
* default partition if there is one, and otherwise rejected.
*/
for (i = 0; i < key->partnatts; i++)
{
if (isnull[i])
{
range_partkey_has_null = true;
break;
}
}
if (!range_partkey_has_null)
{
bound_offset = partition_range_datum_bsearch(key->partsupfunc,
key->partcollation,
partdesc->boundinfo,
key->partnatts,
values,
&equal);
map_partition_varattnos(List *expr, int fromrel_varno,
Relation to_rel, Relation from_rel,
bool *found_whole_row)
{
bool my_found_whole_row = false;
/*
* The bound at bound_offset is less than or equal to the
* tuple value, so the bound at offset+1 is the upper
* bound of the partition we're looking for, if there
* actually exists one.
*/
part_index = partdesc->boundinfo->indexes[bound_offset + 1];
}
}
break;
if (expr != NIL)
{
AttrNumber *part_attnos;
default:
elog(ERROR, "unexpected partition strategy: %d",
(int) key->strategy);
part_attnos = convert_tuples_by_name_map(RelationGetDescr(to_rel),
RelationGetDescr(from_rel),
gettext_noop("could not convert row type"));
expr = (List *) map_variable_attnos((Node *) expr,
fromrel_varno, 0,
part_attnos,
RelationGetDescr(from_rel)->natts,
RelationGetForm(to_rel)->reltype,
&my_found_whole_row);
}
/*
* part_index < 0 means we failed to find a partition of this parent. Use
* the default partition, if there is one.
*/
if (part_index < 0)
part_index = partdesc->boundinfo->default_index;
if (found_whole_row)
*found_whole_row = my_found_whole_row;
return part_index;
return expr;
}
/*
......@@ -2632,10 +203,9 @@ get_partition_for_tuple(Relation relation, Datum *values, bool *isnull)
* text.
*/
bool
has_partition_attrs(Relation rel, Bitmapset *attnums,
bool *used_in_expr)
has_partition_attrs(Relation rel, Bitmapset *attnums, bool *used_in_expr)
{
PartitionKey key;
PartitionKey key;
int partnatts;
List *partexprs;
ListCell *partexprs_item;
......@@ -2685,394 +255,6 @@ has_partition_attrs(Relation rel, Bitmapset *attnums,
return false;
}
/*
* qsort_partition_hbound_cmp
*
* We sort hash bounds by modulus, then by remainder.
*/
static int32
qsort_partition_hbound_cmp(const void *a, const void *b)
{
PartitionHashBound *h1 = (*(PartitionHashBound *const *) a);
PartitionHashBound *h2 = (*(PartitionHashBound *const *) b);
return partition_hbound_cmp(h1->modulus, h1->remainder,
h2->modulus, h2->remainder);
}
/*
* partition_hbound_cmp
*
* Compares modulus first, then remainder if modulus are equal.
*/
static int32
partition_hbound_cmp(int modulus1, int remainder1, int modulus2, int remainder2)
{
if (modulus1 < modulus2)
return -1;
if (modulus1 > modulus2)
return 1;
if (modulus1 == modulus2 && remainder1 != remainder2)
return (remainder1 > remainder2) ? 1 : -1;
return 0;
}
/*
* qsort_partition_list_value_cmp
*
* Compare two list partition bound datums
*/
static int32
qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
{
Datum val1 = (*(const PartitionListValue **) a)->value,
val2 = (*(const PartitionListValue **) b)->value;
PartitionKey key = (PartitionKey) arg;
return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
key->partcollation[0],
val1, val2));
}
/*
* make_one_range_bound
*
* Return a PartitionRangeBound given a list of PartitionRangeDatum elements
* and a flag telling whether the bound is lower or not. Made into a function
* because there are multiple sites that want to use this facility.
*/
static PartitionRangeBound *
make_one_range_bound(PartitionKey key, int index, List *datums, bool lower)
{
PartitionRangeBound *bound;
ListCell *lc;
int i;
Assert(datums != NIL);
bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound));
bound->index = index;
bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum));
bound->kind = (PartitionRangeDatumKind *) palloc0(key->partnatts *
sizeof(PartitionRangeDatumKind));
bound->lower = lower;
i = 0;
foreach(lc, datums)
{
PartitionRangeDatum *datum = castNode(PartitionRangeDatum, lfirst(lc));
/* What's contained in this range datum? */
bound->kind[i] = datum->kind;
if (datum->kind == PARTITION_RANGE_DATUM_VALUE)
{
Const *val = castNode(Const, datum->value);
if (val->constisnull)
elog(ERROR, "invalid range bound datum");
bound->datums[i] = val->constvalue;
}
i++;
}
return bound;
}
/* Used when sorting range bounds across all range partitions */
static int32
qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
{
PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
PartitionKey key = (PartitionKey) arg;
return partition_rbound_cmp(key->partnatts, key->partsupfunc,
key->partcollation, b1->datums, b1->kind,
b1->lower, b2);
}
/*
* partition_rbound_cmp
*
* Return for two range bounds whether the 1st one (specified in datums1,
* kind1, and lower1) is <, =, or > the bound specified in *b2.
*
* partnatts, partsupfunc and partcollation give the number of attributes in the
* bounds to be compared, comparison function to be used and the collations of
* attributes, respectively.
*
* Note that if the values of the two range bounds compare equal, then we take
* into account whether they are upper or lower bounds, and an upper bound is
* considered to be smaller than a lower bound. This is important to the way
* that RelationBuildPartitionDesc() builds the PartitionBoundInfoData
* structure, which only stores the upper bound of a common boundary between
* two contiguous partitions.
*/
static int32
partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc, Oid *partcollation,
Datum *datums1, PartitionRangeDatumKind *kind1,
bool lower1, PartitionRangeBound *b2)
{
int32 cmpval = 0; /* placate compiler */
int i;
Datum *datums2 = b2->datums;
PartitionRangeDatumKind *kind2 = b2->kind;
bool lower2 = b2->lower;
for (i = 0; i < partnatts; i++)
{
/*
* First, handle cases where the column is unbounded, which should not
* invoke the comparison procedure, and should not consider any later
* columns. Note that the PartitionRangeDatumKind enum elements
* compare the same way as the values they represent.
*/
if (kind1[i] < kind2[i])
return -1;
else if (kind1[i] > kind2[i])
return 1;
else if (kind1[i] != PARTITION_RANGE_DATUM_VALUE)
/*
* The column bounds are both MINVALUE or both MAXVALUE. No later
* columns should be considered, but we still need to compare
* whether they are upper or lower bounds.
*/
break;
cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i],
partcollation[i],
datums1[i],
datums2[i]));
if (cmpval != 0)
break;
}
/*
* If the comparison is anything other than equal, we're done. If they
* compare equal though, we still have to consider whether the boundaries
* are inclusive or exclusive. Exclusive one is considered smaller of the
* two.
*/
if (cmpval == 0 && lower1 != lower2)
cmpval = lower1 ? 1 : -1;
return cmpval;
}
/*
* partition_rbound_datum_cmp
*
* Return whether range bound (specified in rb_datums, rb_kind, and rb_lower)
* is <, =, or > partition key of tuple (tuple_datums)
*
* n_tuple_datums, partsupfunc and partcollation give number of attributes in
* the bounds to be compared, comparison function to be used and the collations
* of attributes resp.
*
*/
int32
partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
Datum *tuple_datums, int n_tuple_datums)
{
int i;
int32 cmpval = -1;
for (i = 0; i < n_tuple_datums; i++)
{
if (rb_kind[i] == PARTITION_RANGE_DATUM_MINVALUE)
return -1;
else if (rb_kind[i] == PARTITION_RANGE_DATUM_MAXVALUE)
return 1;
cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i],
partcollation[i],
rb_datums[i],
tuple_datums[i]));
if (cmpval != 0)
break;
}
return cmpval;
}
/*
* partition_list_bsearch
* Returns the index of the greatest bound datum that is less than equal
* to the given value or -1 if all of the bound datums are greater
*
* *is_equal is set to true if the bound datum at the returned index is equal
* to the input value.
*/
int
partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
PartitionBoundInfo boundinfo,
Datum value, bool *is_equal)
{
int lo,
hi,
mid;
lo = -1;
hi = boundinfo->ndatums - 1;
while (lo < hi)
{
int32 cmpval;
mid = (lo + hi + 1) / 2;
cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[0],
partcollation[0],
boundinfo->datums[mid][0],
value));
if (cmpval <= 0)
{
lo = mid;
*is_equal = (cmpval == 0);
if (*is_equal)
break;
}
else
hi = mid - 1;
}
return lo;
}
/*
* partition_range_bsearch
* Returns the index of the greatest range bound that is less than or
* equal to the given range bound or -1 if all of the range bounds are
* greater
*
* *is_equal is set to true if the range bound at the returned index is equal
* to the input range bound
*/
int
partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
Oid *partcollation,
PartitionBoundInfo boundinfo,
PartitionRangeBound *probe, bool *is_equal)
{
int lo,
hi,
mid;
lo = -1;
hi = boundinfo->ndatums - 1;
while (lo < hi)
{
int32 cmpval;
mid = (lo + hi + 1) / 2;
cmpval = partition_rbound_cmp(partnatts, partsupfunc, partcollation,
boundinfo->datums[mid],
boundinfo->kind[mid],
(boundinfo->indexes[mid] == -1),
probe);
if (cmpval <= 0)
{
lo = mid;
*is_equal = (cmpval == 0);
if (*is_equal)
break;
}
else
hi = mid - 1;
}
return lo;
}
/*
* partition_range_bsearch
* Returns the index of the greatest range bound that is less than or
* equal to the given tuple or -1 if all of the range bounds are greater
*
* *is_equal is set to true if the range bound at the returned index is equal
* to the input tuple.
*/
int
partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
PartitionBoundInfo boundinfo,
int nvalues, Datum *values, bool *is_equal)
{
int lo,
hi,
mid;
lo = -1;
hi = boundinfo->ndatums - 1;
while (lo < hi)
{
int32 cmpval;
mid = (lo + hi + 1) / 2;
cmpval = partition_rbound_datum_cmp(partsupfunc,
partcollation,
boundinfo->datums[mid],
boundinfo->kind[mid],
values,
nvalues);
if (cmpval <= 0)
{
lo = mid;
*is_equal = (cmpval == 0);
if (*is_equal)
break;
}
else
hi = mid - 1;
}
return lo;
}
/*
* partition_hash_bsearch
* Returns the index of the greatest (modulus, remainder) pair that is
* less than or equal to the given (modulus, remainder) pair or -1 if
* all of them are greater
*/
int
partition_hash_bsearch(PartitionBoundInfo boundinfo,
int modulus, int remainder)
{
int lo,
hi,
mid;
lo = -1;
hi = boundinfo->ndatums - 1;
while (lo < hi)
{
int32 cmpval,
bound_modulus,
bound_remainder;
mid = (lo + hi + 1) / 2;
bound_modulus = DatumGetInt32(boundinfo->datums[mid][0]);
bound_remainder = DatumGetInt32(boundinfo->datums[mid][1]);
cmpval = partition_hbound_cmp(bound_modulus, bound_remainder,
modulus, remainder);
if (cmpval <= 0)
{
lo = mid;
if (cmpval == 0)
break;
}
else
hi = mid - 1;
}
return lo;
}
/*
* get_default_oid_from_partdesc
*
......@@ -3175,323 +357,3 @@ get_proposed_default_constraint(List *new_part_constraints)
return make_ands_implicit(defPartConstraint);
}
/*
* get_partition_bound_num_indexes
*
* Returns the number of the entries in the partition bound indexes array.
*/
static int
get_partition_bound_num_indexes(PartitionBoundInfo bound)
{
int num_indexes;
Assert(bound);
switch (bound->strategy)
{
case PARTITION_STRATEGY_HASH:
/*
* The number of the entries in the indexes array is same as the
* greatest modulus.
*/
num_indexes = get_hash_partition_greatest_modulus(bound);
break;
case PARTITION_STRATEGY_LIST:
num_indexes = bound->ndatums;
break;
case PARTITION_STRATEGY_RANGE:
/* Range partitioned table has an extra index. */
num_indexes = bound->ndatums + 1;
break;
default:
elog(ERROR, "unexpected partition strategy: %d",
(int) bound->strategy);
}
return num_indexes;
}
/*
* get_hash_partition_greatest_modulus
*
* Returns the greatest modulus of the hash partition bound. The greatest
* modulus will be at the end of the datums array because hash partitions are
* arranged in the ascending order of their modulus and remainders.
*/
int
get_hash_partition_greatest_modulus(PartitionBoundInfo bound)
{
Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH);
Assert(bound->datums && bound->ndatums > 0);
Assert(DatumGetInt32(bound->datums[bound->ndatums - 1][0]) > 0);
return DatumGetInt32(bound->datums[bound->ndatums - 1][0]);
}
/*
* compute_hash_value
*
* Compute the hash value for given not null partition key values.
*/
uint64
compute_hash_value(int partnatts, FmgrInfo *partsupfunc,
Datum *values, bool *isnull)
{
int i;
uint64 rowHash = 0;
Datum seed = UInt64GetDatum(HASH_PARTITION_SEED);
for (i = 0; i < partnatts; i++)
{
if (!isnull[i])
{
Datum hash;
Assert(OidIsValid(partsupfunc[i].fn_oid));
/*
* Compute hash for each datum value by calling respective
* datatype-specific hash functions of each partition key
* attribute.
*/
hash = FunctionCall2(&partsupfunc[i], values[i], seed);
/* Form a single 64-bit hash value */
rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
}
}
return rowHash;
}
/*
* satisfies_hash_partition
*
* This is an SQL-callable function for use in hash partition constraints.
* The first three arguments are the parent table OID, modulus, and remainder.
* The remaining arguments are the value of the partitioning columns (or
* expressions); these are hashed and the results are combined into a single
* hash value by calling hash_combine64.
*
* Returns true if remainder produced when this computed single hash value is
* divided by the given modulus is equal to given remainder, otherwise false.
*
* See get_qual_for_hash() for usage.
*/
Datum
satisfies_hash_partition(PG_FUNCTION_ARGS)
{
typedef struct ColumnsHashData
{
Oid relid;
int nkeys;
Oid variadic_type;
int16 variadic_typlen;
bool variadic_typbyval;
char variadic_typalign;
FmgrInfo partsupfunc[PARTITION_MAX_KEYS];
} ColumnsHashData;
Oid parentId;
int modulus;
int remainder;
Datum seed = UInt64GetDatum(HASH_PARTITION_SEED);
ColumnsHashData *my_extra;
uint64 rowHash = 0;
/* Return null if the parent OID, modulus, or remainder is NULL. */
if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2))
PG_RETURN_NULL();
parentId = PG_GETARG_OID(0);
modulus = PG_GETARG_INT32(1);
remainder = PG_GETARG_INT32(2);
/* Sanity check modulus and remainder. */
if (modulus <= 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("modulus for hash partition must be a positive integer")));
if (remainder < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("remainder for hash partition must be a non-negative integer")));
if (remainder >= modulus)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("remainder for hash partition must be less than modulus")));
/*
* Cache hash function information.
*/
my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
if (my_extra == NULL || my_extra->relid != parentId)
{
Relation parent;
PartitionKey key;
int j;
/* Open parent relation and fetch partition keyinfo */
parent = try_relation_open(parentId, AccessShareLock);
if (parent == NULL)
PG_RETURN_NULL();
key = RelationGetPartitionKey(parent);
/* Reject parent table that is not hash-partitioned. */
if (parent->rd_rel->relkind != RELKIND_PARTITIONED_TABLE ||
key->strategy != PARTITION_STRATEGY_HASH)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("\"%s\" is not a hash partitioned table",
get_rel_name(parentId))));
if (!get_fn_expr_variadic(fcinfo->flinfo))
{
int nargs = PG_NARGS() - 3;
/* complain if wrong number of column values */
if (key->partnatts != nargs)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
key->partnatts, nargs)));
/* allocate space for our cache */
fcinfo->flinfo->fn_extra =
MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
offsetof(ColumnsHashData, partsupfunc) +
sizeof(FmgrInfo) * nargs);
my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
my_extra->relid = parentId;
my_extra->nkeys = key->partnatts;
/* check argument types and save fmgr_infos */
for (j = 0; j < key->partnatts; ++j)
{
Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, j + 3);
if (argtype != key->parttypid[j] && !IsBinaryCoercible(argtype, key->parttypid[j]))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"",
j + 1, format_type_be(key->parttypid[j]), format_type_be(argtype))));
fmgr_info_copy(&my_extra->partsupfunc[j],
&key->partsupfunc[j],
fcinfo->flinfo->fn_mcxt);
}
}
else
{
ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
/* allocate space for our cache -- just one FmgrInfo in this case */
fcinfo->flinfo->fn_extra =
MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
offsetof(ColumnsHashData, partsupfunc) +
sizeof(FmgrInfo));
my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
my_extra->relid = parentId;
my_extra->nkeys = key->partnatts;
my_extra->variadic_type = ARR_ELEMTYPE(variadic_array);
get_typlenbyvalalign(my_extra->variadic_type,
&my_extra->variadic_typlen,
&my_extra->variadic_typbyval,
&my_extra->variadic_typalign);
/* check argument types */
for (j = 0; j < key->partnatts; ++j)
if (key->parttypid[j] != my_extra->variadic_type)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"",
j + 1,
format_type_be(key->parttypid[j]),
format_type_be(my_extra->variadic_type))));
fmgr_info_copy(&my_extra->partsupfunc[0],
&key->partsupfunc[0],
fcinfo->flinfo->fn_mcxt);
}
/* Hold lock until commit */
relation_close(parent, NoLock);
}
if (!OidIsValid(my_extra->variadic_type))
{
int nkeys = my_extra->nkeys;
int i;
/*
* For a non-variadic call, neither the number of arguments nor their
* types can change across calls, so avoid the expense of rechecking
* here.
*/
for (i = 0; i < nkeys; i++)
{
Datum hash;
/* keys start from fourth argument of function. */
int argno = i + 3;
if (PG_ARGISNULL(argno))
continue;
Assert(OidIsValid(my_extra->partsupfunc[i].fn_oid));
hash = FunctionCall2(&my_extra->partsupfunc[i],
PG_GETARG_DATUM(argno),
seed);
/* Form a single 64-bit hash value */
rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
}
}
else
{
ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
int i;
int nelems;
Datum *datum;
bool *isnull;
deconstruct_array(variadic_array,
my_extra->variadic_type,
my_extra->variadic_typlen,
my_extra->variadic_typbyval,
my_extra->variadic_typalign,
&datum, &isnull, &nelems);
/* complain if wrong number of column values */
if (nelems != my_extra->nkeys)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
my_extra->nkeys, nelems)));
for (i = 0; i < nelems; i++)
{
Datum hash;
if (isnull[i])
continue;
Assert(OidIsValid(my_extra->partsupfunc[0].fn_oid));
hash = FunctionCall2(&my_extra->partsupfunc[0],
datum[i],
seed);
/* Form a single 64-bit hash value */
rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
}
}
PG_RETURN_BOOL(rowHash % modulus == remainder);
}
......@@ -18,9 +18,11 @@
#include "access/heapam.h"
#include "access/htup_details.h"
#include "access/sysattr.h"
#include "access/tupconvert.h"
#include "catalog/dependency.h"
#include "catalog/indexing.h"
#include "catalog/objectaccess.h"
#include "catalog/partition.h"
#include "catalog/pg_constraint.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
......
......@@ -56,6 +56,7 @@
#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/partcache.h"
#include "utils/regproc.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
......
......@@ -77,6 +77,7 @@
#include "parser/parse_type.h"
#include "parser/parse_utilcmd.h"
#include "parser/parser.h"
#include "partitioning/partbounds.h"
#include "pgstat.h"
#include "rewrite/rewriteDefine.h"
#include "rewrite/rewriteHandler.h"
......@@ -92,6 +93,7 @@
#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/partcache.h"
#include "utils/relcache.h"
#include "utils/ruleutils.h"
#include "utils/snapmgr.h"
......
......@@ -23,6 +23,7 @@
#include "catalog/index.h"
#include "catalog/indexing.h"
#include "catalog/objectaccess.h"
#include "catalog/partition.h"
#include "catalog/pg_constraint.h"
#include "catalog/pg_inherits.h"
#include "catalog/pg_proc.h"
......
......@@ -42,7 +42,6 @@
#include "access/transam.h"
#include "access/xact.h"
#include "catalog/namespace.h"
#include "catalog/partition.h"
#include "catalog/pg_publication.h"
#include "commands/matview.h"
#include "commands/trigger.h"
......@@ -60,6 +59,7 @@
#include "utils/acl.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/partcache.h"
#include "utils/rls.h"
#include "utils/ruleutils.h"
#include "utils/snapmgr.h"
......
......@@ -11,9 +11,9 @@
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "catalog/partition.h"
#include "catalog/pg_inherits.h"
#include "catalog/pg_type.h"
#include "executor/execPartition.h"
......@@ -22,10 +22,15 @@
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "partitioning/partbounds.h"
#include "partitioning/partprune.h"
#include "utils/lsyscache.h"
#include "utils/partcache.h"
#include "utils/rel.h"
#include "utils/rls.h"
#include "utils/ruleutils.h"
static PartitionDispatch *RelationGetPartitionDispatchInfo(Relation rel,
int *num_parted, List **leaf_part_oids);
static void get_partition_dispatch_recurse(Relation rel, Relation parent,
......@@ -35,6 +40,8 @@ static void FormPartitionKeyDatum(PartitionDispatch pd,
EState *estate,
Datum *values,
bool *isnull);
static int get_partition_for_tuple(Relation relation, Datum *values,
bool *isnull);
static char *ExecBuildSlotPartitionKeyDescription(Relation rel,
Datum *values,
bool *isnull,
......@@ -1022,6 +1029,110 @@ FormPartitionKeyDatum(PartitionDispatch pd,
elog(ERROR, "wrong number of partition key expressions");
}
/*
* get_partition_for_tuple
* Finds partition of relation which accepts the partition key specified
* in values and isnull
*
* Return value is index of the partition (>= 0 and < partdesc->nparts) if one
* found or -1 if none found.
*/
int
get_partition_for_tuple(Relation relation, Datum *values, bool *isnull)
{
int bound_offset;
int part_index = -1;
PartitionKey key = RelationGetPartitionKey(relation);
PartitionDesc partdesc = RelationGetPartitionDesc(relation);
/* Route as appropriate based on partitioning strategy. */
switch (key->strategy)
{
case PARTITION_STRATEGY_HASH:
{
PartitionBoundInfo boundinfo = partdesc->boundinfo;
int greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
uint64 rowHash = compute_hash_value(key->partnatts,
key->partsupfunc,
values, isnull);
part_index = boundinfo->indexes[rowHash % greatest_modulus];
}
break;
case PARTITION_STRATEGY_LIST:
if (isnull[0])
{
if (partition_bound_accepts_nulls(partdesc->boundinfo))
part_index = partdesc->boundinfo->null_index;
}
else
{
bool equal = false;
bound_offset = partition_list_bsearch(key->partsupfunc,
key->partcollation,
partdesc->boundinfo,
values[0], &equal);
if (bound_offset >= 0 && equal)
part_index = partdesc->boundinfo->indexes[bound_offset];
}
break;
case PARTITION_STRATEGY_RANGE:
{
bool equal = false,
range_partkey_has_null = false;
int i;
/*
* No range includes NULL, so this will be accepted by the
* default partition if there is one, and otherwise rejected.
*/
for (i = 0; i < key->partnatts; i++)
{
if (isnull[i])
{
range_partkey_has_null = true;
break;
}
}
if (!range_partkey_has_null)
{
bound_offset = partition_range_datum_bsearch(key->partsupfunc,
key->partcollation,
partdesc->boundinfo,
key->partnatts,
values,
&equal);
/*
* The bound at bound_offset is less than or equal to the
* tuple value, so the bound at offset+1 is the upper
* bound of the partition we're looking for, if there
* actually exists one.
*/
part_index = partdesc->boundinfo->indexes[bound_offset + 1];
}
}
break;
default:
elog(ERROR, "unexpected partition strategy: %d",
(int) key->strategy);
}
/*
* part_index < 0 means we failed to find a partition of this parent. Use
* the default partition, if there is one.
*/
if (part_index < 0)
part_index = partdesc->boundinfo->default_index;
return part_index;
}
/*
* ExecBuildSlotPartitionKeyDescription
*
......
......@@ -15,12 +15,12 @@
#include "postgres.h"
#include "miscadmin.h"
#include "catalog/partition.h"
#include "optimizer/clauses.h"
#include "optimizer/joininfo.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/prep.h"
#include "partitioning/partbounds.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
......
......@@ -38,6 +38,7 @@
#include "optimizer/plancat.h"
#include "optimizer/predtest.h"
#include "optimizer/prep.h"
#include "partitioning/partbounds.h"
#include "parser/parse_relation.h"
#include "parser/parsetree.h"
#include "rewrite/rewriteManip.h"
......@@ -45,8 +46,9 @@
#include "storage/bufmgr.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
#include "utils/partcache.h"
#include "utils/rel.h"
#include "utils/syscache.h"
#include "utils/snapmgr.h"
......
......@@ -17,7 +17,6 @@
#include <limits.h>
#include "miscadmin.h"
#include "catalog/partition.h"
#include "optimizer/clauses.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
......@@ -27,6 +26,7 @@
#include "optimizer/prep.h"
#include "optimizer/restrictinfo.h"
#include "optimizer/tlist.h"
#include "partitioning/partbounds.h"
#include "utils/hsearch.h"
......
......@@ -63,6 +63,7 @@
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/partcache.h"
#include "utils/rel.h"
#include "utils/ruleutils.h"
#include "utils/syscache.h"
......
......@@ -12,6 +12,6 @@ subdir = src/backend/partitioning
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
OBJS = partprune.o
OBJS = partprune.o partbounds.o
include $(top_srcdir)/src/backend/common.mk
/*-------------------------------------------------------------------------
*
* partbounds.c
* Support routines for manipulating partition bounds
*
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/partitioning/partbounds.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "catalog/partition.h"
#include "catalog/pg_inherits.h"
#include "catalog/pg_type.h"
#include "commands/tablecmds.h"
#include "executor/executor.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "parser/parse_coerce.h"
#include "partitioning/partprune.h"
#include "partitioning/partbounds.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/fmgroids.h"
#include "utils/hashutils.h"
#include "utils/lsyscache.h"
#include "utils/partcache.h"
#include "utils/rel.h"
#include "utils/snapmgr.h"
#include "utils/ruleutils.h"
#include "utils/syscache.h"
static int get_partition_bound_num_indexes(PartitionBoundInfo b);
static Expr *make_partition_op_expr(PartitionKey key, int keynum,
uint16 strategy, Expr *arg1, Expr *arg2);
static Oid get_partition_operator(PartitionKey key, int col,
StrategyNumber strategy, bool *need_relabel);
static List *get_qual_for_hash(Relation parent, PartitionBoundSpec *spec);
static List *get_qual_for_list(Relation parent, PartitionBoundSpec *spec);
static List *get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
bool for_default);
static void get_range_key_properties(PartitionKey key, int keynum,
PartitionRangeDatum *ldatum,
PartitionRangeDatum *udatum,
ListCell **partexprs_item,
Expr **keyCol,
Const **lower_val, Const **upper_val);
static List *get_range_nulltest(PartitionKey key);
/*
* get_qual_from_partbound
* Given a parser node for partition bound, return the list of executable
* expressions as partition constraint
*/
List *
get_qual_from_partbound(Relation rel, Relation parent,
PartitionBoundSpec *spec)
{
PartitionKey key = RelationGetPartitionKey(parent);
List *my_qual = NIL;
Assert(key != NULL);
switch (key->strategy)
{
case PARTITION_STRATEGY_HASH:
Assert(spec->strategy == PARTITION_STRATEGY_HASH);
my_qual = get_qual_for_hash(parent, spec);
break;
case PARTITION_STRATEGY_LIST:
Assert(spec->strategy == PARTITION_STRATEGY_LIST);
my_qual = get_qual_for_list(parent, spec);
break;
case PARTITION_STRATEGY_RANGE:
Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
my_qual = get_qual_for_range(parent, spec, false);
break;
default:
elog(ERROR, "unexpected partition strategy: %d",
(int) key->strategy);
}
return my_qual;
}
/*
* Are two partition bound collections logically equal?
*
* Used in the keep logic of relcache.c (ie, in RelationClearRelation()).
* This is also useful when b1 and b2 are bound collections of two separate
* relations, respectively, because PartitionBoundInfo is a canonical
* representation of partition bounds.
*/
bool
partition_bounds_equal(int partnatts, int16 *parttyplen, bool *parttypbyval,
PartitionBoundInfo b1, PartitionBoundInfo b2)
{
int i;
if (b1->strategy != b2->strategy)
return false;
if (b1->ndatums != b2->ndatums)
return false;
if (b1->null_index != b2->null_index)
return false;
if (b1->default_index != b2->default_index)
return false;
if (b1->strategy == PARTITION_STRATEGY_HASH)
{
int greatest_modulus = get_hash_partition_greatest_modulus(b1);
/*
* If two hash partitioned tables have different greatest moduli,
* their partition schemes don't match.
*/
if (greatest_modulus != get_hash_partition_greatest_modulus(b2))
return false;
/*
* We arrange the partitions in the ascending order of their modulus
* and remainders. Also every modulus is factor of next larger
* modulus. Therefore we can safely store index of a given partition
* in indexes array at remainder of that partition. Also entries at
* (remainder + N * modulus) positions in indexes array are all same
* for (modulus, remainder) specification for any partition. Thus
* datums array from both the given bounds are same, if and only if
* their indexes array will be same. So, it suffices to compare
* indexes array.
*/
for (i = 0; i < greatest_modulus; i++)
if (b1->indexes[i] != b2->indexes[i])
return false;
#ifdef USE_ASSERT_CHECKING
/*
* Nonetheless make sure that the bounds are indeed same when the
* indexes match. Hash partition bound stores modulus and remainder
* at b1->datums[i][0] and b1->datums[i][1] position respectively.
*/
for (i = 0; i < b1->ndatums; i++)
Assert((b1->datums[i][0] == b2->datums[i][0] &&
b1->datums[i][1] == b2->datums[i][1]));
#endif
}
else
{
for (i = 0; i < b1->ndatums; i++)
{
int j;
for (j = 0; j < partnatts; j++)
{
/* For range partitions, the bounds might not be finite. */
if (b1->kind != NULL)
{
/* The different kinds of bound all differ from each other */
if (b1->kind[i][j] != b2->kind[i][j])
return false;
/*
* Non-finite bounds are equal without further
* examination.
*/
if (b1->kind[i][j] != PARTITION_RANGE_DATUM_VALUE)
continue;
}
/*
* Compare the actual values. Note that it would be both
* incorrect and unsafe to invoke the comparison operator
* derived from the partitioning specification here. It would
* be incorrect because we want the relcache entry to be
* updated for ANY change to the partition bounds, not just
* those that the partitioning operator thinks are
* significant. It would be unsafe because we might reach
* this code in the context of an aborted transaction, and an
* arbitrary partitioning operator might not be safe in that
* context. datumIsEqual() should be simple enough to be
* safe.
*/
if (!datumIsEqual(b1->datums[i][j], b2->datums[i][j],
parttypbyval[j], parttyplen[j]))
return false;
}
if (b1->indexes[i] != b2->indexes[i])
return false;
}
/* There are ndatums+1 indexes in case of range partitions */
if (b1->strategy == PARTITION_STRATEGY_RANGE &&
b1->indexes[i] != b2->indexes[i])
return false;
}
return true;
}
/*
* Return a copy of given PartitionBoundInfo structure. The data types of bounds
* are described by given partition key specification.
*/
PartitionBoundInfo
partition_bounds_copy(PartitionBoundInfo src,
PartitionKey key)
{
PartitionBoundInfo dest;
int i;
int ndatums;
int partnatts;
int num_indexes;
dest = (PartitionBoundInfo) palloc(sizeof(PartitionBoundInfoData));
dest->strategy = src->strategy;
ndatums = dest->ndatums = src->ndatums;
partnatts = key->partnatts;
num_indexes = get_partition_bound_num_indexes(src);
/* List partitioned tables have only a single partition key. */
Assert(key->strategy != PARTITION_STRATEGY_LIST || partnatts == 1);
dest->datums = (Datum **) palloc(sizeof(Datum *) * ndatums);
if (src->kind != NULL)
{
dest->kind = (PartitionRangeDatumKind **) palloc(ndatums *
sizeof(PartitionRangeDatumKind *));
for (i = 0; i < ndatums; i++)
{
dest->kind[i] = (PartitionRangeDatumKind *) palloc(partnatts *
sizeof(PartitionRangeDatumKind));
memcpy(dest->kind[i], src->kind[i],
sizeof(PartitionRangeDatumKind) * key->partnatts);
}
}
else
dest->kind = NULL;
for (i = 0; i < ndatums; i++)
{
int j;
/*
* For a corresponding to hash partition, datums array will have two
* elements - modulus and remainder.
*/
bool hash_part = (key->strategy == PARTITION_STRATEGY_HASH);
int natts = hash_part ? 2 : partnatts;
dest->datums[i] = (Datum *) palloc(sizeof(Datum) * natts);
for (j = 0; j < natts; j++)
{
bool byval;
int typlen;
if (hash_part)
{
typlen = sizeof(int32); /* Always int4 */
byval = true; /* int4 is pass-by-value */
}
else
{
byval = key->parttypbyval[j];
typlen = key->parttyplen[j];
}
if (dest->kind == NULL ||
dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE)
dest->datums[i][j] = datumCopy(src->datums[i][j],
byval, typlen);
}
}
dest->indexes = (int *) palloc(sizeof(int) * num_indexes);
memcpy(dest->indexes, src->indexes, sizeof(int) * num_indexes);
dest->null_index = src->null_index;
dest->default_index = src->default_index;
return dest;
}
/*
* check_new_partition_bound
*
* Checks if the new partition's bound overlaps any of the existing partitions
* of parent. Also performs additional checks as necessary per strategy.
*/
void
check_new_partition_bound(char *relname, Relation parent,
PartitionBoundSpec *spec)
{
PartitionKey key = RelationGetPartitionKey(parent);
PartitionDesc partdesc = RelationGetPartitionDesc(parent);
PartitionBoundInfo boundinfo = partdesc->boundinfo;
ParseState *pstate = make_parsestate(NULL);
int with = -1;
bool overlap = false;
if (spec->is_default)
{
/*
* The default partition bound never conflicts with any other
* partition's; if that's what we're attaching, the only possible
* problem is that one already exists, so check for that and we're
* done.
*/
if (boundinfo == NULL || !partition_bound_has_default(boundinfo))
return;
/* Default partition already exists, error out. */
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("partition \"%s\" conflicts with existing default partition \"%s\"",
relname, get_rel_name(partdesc->oids[boundinfo->default_index])),
parser_errposition(pstate, spec->location)));
}
switch (key->strategy)
{
case PARTITION_STRATEGY_HASH:
{
Assert(spec->strategy == PARTITION_STRATEGY_HASH);
Assert(spec->remainder >= 0 && spec->remainder < spec->modulus);
if (partdesc->nparts > 0)
{
Datum **datums = boundinfo->datums;
int ndatums = boundinfo->ndatums;
int greatest_modulus;
int remainder;
int offset;
bool valid_modulus = true;
int prev_modulus, /* Previous largest modulus */
next_modulus; /* Next largest modulus */
/*
* Check rule that every modulus must be a factor of the
* next larger modulus. For example, if you have a bunch
* of partitions that all have modulus 5, you can add a
* new partition with modulus 10 or a new partition with
* modulus 15, but you cannot add both a partition with
* modulus 10 and a partition with modulus 15, because 10
* is not a factor of 15.
*
* Get the greatest (modulus, remainder) pair contained in
* boundinfo->datums that is less than or equal to the
* (spec->modulus, spec->remainder) pair.
*/
offset = partition_hash_bsearch(boundinfo,
spec->modulus,
spec->remainder);
if (offset < 0)
{
next_modulus = DatumGetInt32(datums[0][0]);
valid_modulus = (next_modulus % spec->modulus) == 0;
}
else
{
prev_modulus = DatumGetInt32(datums[offset][0]);
valid_modulus = (spec->modulus % prev_modulus) == 0;
if (valid_modulus && (offset + 1) < ndatums)
{
next_modulus = DatumGetInt32(datums[offset + 1][0]);
valid_modulus = (next_modulus % spec->modulus) == 0;
}
}
if (!valid_modulus)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("every hash partition modulus must be a factor of the next larger modulus")));
greatest_modulus = get_hash_partition_greatest_modulus(boundinfo);
remainder = spec->remainder;
/*
* Normally, the lowest remainder that could conflict with
* the new partition is equal to the remainder specified
* for the new partition, but when the new partition has a
* modulus higher than any used so far, we need to adjust.
*/
if (remainder >= greatest_modulus)
remainder = remainder % greatest_modulus;
/* Check every potentially-conflicting remainder. */
do
{
if (boundinfo->indexes[remainder] != -1)
{
overlap = true;
with = boundinfo->indexes[remainder];
break;
}
remainder += spec->modulus;
} while (remainder < greatest_modulus);
}
break;
}
case PARTITION_STRATEGY_LIST:
{
Assert(spec->strategy == PARTITION_STRATEGY_LIST);
if (partdesc->nparts > 0)
{
ListCell *cell;
Assert(boundinfo &&
boundinfo->strategy == PARTITION_STRATEGY_LIST &&
(boundinfo->ndatums > 0 ||
partition_bound_accepts_nulls(boundinfo) ||
partition_bound_has_default(boundinfo)));
foreach(cell, spec->listdatums)
{
Const *val = castNode(Const, lfirst(cell));
if (!val->constisnull)
{
int offset;
bool equal;
offset = partition_list_bsearch(&key->partsupfunc[0],
key->partcollation,
boundinfo,
val->constvalue,
&equal);
if (offset >= 0 && equal)
{
overlap = true;
with = boundinfo->indexes[offset];
break;
}
}
else if (partition_bound_accepts_nulls(boundinfo))
{
overlap = true;
with = boundinfo->null_index;
break;
}
}
}
break;
}
case PARTITION_STRATEGY_RANGE:
{
PartitionRangeBound *lower,
*upper;
Assert(spec->strategy == PARTITION_STRATEGY_RANGE);
lower = make_one_range_bound(key, -1, spec->lowerdatums, true);
upper = make_one_range_bound(key, -1, spec->upperdatums, false);
/*
* First check if the resulting range would be empty with
* specified lower and upper bounds
*/
if (partition_rbound_cmp(key->partnatts, key->partsupfunc,
key->partcollation, lower->datums,
lower->kind, true, upper) >= 0)
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("empty range bound specified for partition \"%s\"",
relname),
errdetail("Specified lower bound %s is greater than or equal to upper bound %s.",
get_range_partbound_string(spec->lowerdatums),
get_range_partbound_string(spec->upperdatums)),
parser_errposition(pstate, spec->location)));
}
if (partdesc->nparts > 0)
{
int offset;
bool equal;
Assert(boundinfo &&
boundinfo->strategy == PARTITION_STRATEGY_RANGE &&
(boundinfo->ndatums > 0 ||
partition_bound_has_default(boundinfo)));
/*
* Test whether the new lower bound (which is treated
* inclusively as part of the new partition) lies inside
* an existing partition, or in a gap.
*
* If it's inside an existing partition, the bound at
* offset + 1 will be the upper bound of that partition,
* and its index will be >= 0.
*
* If it's in a gap, the bound at offset + 1 will be the
* lower bound of the next partition, and its index will
* be -1. This is also true if there is no next partition,
* since the index array is initialised with an extra -1
* at the end.
*/
offset = partition_range_bsearch(key->partnatts,
key->partsupfunc,
key->partcollation,
boundinfo, lower,
&equal);
if (boundinfo->indexes[offset + 1] < 0)
{
/*
* Check that the new partition will fit in the gap.
* For it to fit, the new upper bound must be less
* than or equal to the lower bound of the next
* partition, if there is one.
*/
if (offset + 1 < boundinfo->ndatums)
{
int32 cmpval;
Datum *datums;
PartitionRangeDatumKind *kind;
bool is_lower;
datums = boundinfo->datums[offset + 1];
kind = boundinfo->kind[offset + 1];
is_lower = (boundinfo->indexes[offset + 1] == -1);
cmpval = partition_rbound_cmp(key->partnatts,
key->partsupfunc,
key->partcollation,
datums, kind,
is_lower, upper);
if (cmpval < 0)
{
/*
* The new partition overlaps with the
* existing partition between offset + 1 and
* offset + 2.
*/
overlap = true;
with = boundinfo->indexes[offset + 2];
}
}
}
else
{
/*
* The new partition overlaps with the existing
* partition between offset and offset + 1.
*/
overlap = true;
with = boundinfo->indexes[offset + 1];
}
}
break;
}
default:
elog(ERROR, "unexpected partition strategy: %d",
(int) key->strategy);
}
if (overlap)
{
Assert(with >= 0);
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("partition \"%s\" would overlap partition \"%s\"",
relname, get_rel_name(partdesc->oids[with])),
parser_errposition(pstate, spec->location)));
}
}
/*
* check_default_allows_bound
*
* This function checks if there exists a row in the default partition that
* would properly belong to the new partition being added. If it finds one,
* it throws an error.
*/
void
check_default_allows_bound(Relation parent, Relation default_rel,
PartitionBoundSpec *new_spec)
{
List *new_part_constraints;
List *def_part_constraints;
List *all_parts;
ListCell *lc;
new_part_constraints = (new_spec->strategy == PARTITION_STRATEGY_LIST)
? get_qual_for_list(parent, new_spec)
: get_qual_for_range(parent, new_spec, false);
def_part_constraints =
get_proposed_default_constraint(new_part_constraints);
/*
* If the existing constraints on the default partition imply that it will
* not contain any row that would belong to the new partition, we can
* avoid scanning the default partition.
*/
if (PartConstraintImpliedByRelConstraint(default_rel, def_part_constraints))
{
ereport(INFO,
(errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints",
RelationGetRelationName(default_rel))));
return;
}
/*
* Scan the default partition and its subpartitions, and check for rows
* that do not satisfy the revised partition constraints.
*/
if (default_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
all_parts = find_all_inheritors(RelationGetRelid(default_rel),
AccessExclusiveLock, NULL);
else
all_parts = list_make1_oid(RelationGetRelid(default_rel));
foreach(lc, all_parts)
{
Oid part_relid = lfirst_oid(lc);
Relation part_rel;
Expr *constr;
Expr *partition_constraint;
EState *estate;
HeapTuple tuple;
ExprState *partqualstate = NULL;
Snapshot snapshot;
TupleDesc tupdesc;
ExprContext *econtext;
HeapScanDesc scan;
MemoryContext oldCxt;
TupleTableSlot *tupslot;
/* Lock already taken above. */
if (part_relid != RelationGetRelid(default_rel))
{
part_rel = heap_open(part_relid, NoLock);
/*
* If the partition constraints on default partition child imply
* that it will not contain any row that would belong to the new
* partition, we can avoid scanning the child table.
*/
if (PartConstraintImpliedByRelConstraint(part_rel,
def_part_constraints))
{
ereport(INFO,
(errmsg("updated partition constraint for default partition \"%s\" is implied by existing constraints",
RelationGetRelationName(part_rel))));
heap_close(part_rel, NoLock);
continue;
}
}
else
part_rel = default_rel;
/*
* Only RELKIND_RELATION relations (i.e. leaf partitions) need to be
* scanned.
*/
if (part_rel->rd_rel->relkind != RELKIND_RELATION)
{
if (part_rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
ereport(WARNING,
(errcode(ERRCODE_CHECK_VIOLATION),
errmsg("skipped scanning foreign table \"%s\" which is a partition of default partition \"%s\"",
RelationGetRelationName(part_rel),
RelationGetRelationName(default_rel))));
if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
heap_close(part_rel, NoLock);
continue;
}
tupdesc = CreateTupleDescCopy(RelationGetDescr(part_rel));
constr = linitial(def_part_constraints);
partition_constraint = (Expr *)
map_partition_varattnos((List *) constr,
1, part_rel, parent, NULL);
estate = CreateExecutorState();
/* Build expression execution states for partition check quals */
partqualstate = ExecPrepareExpr(partition_constraint, estate);
econtext = GetPerTupleExprContext(estate);
snapshot = RegisterSnapshot(GetLatestSnapshot());
scan = heap_beginscan(part_rel, snapshot, 0, NULL);
tupslot = MakeSingleTupleTableSlot(tupdesc);
/*
* Switch to per-tuple memory context and reset it for each tuple
* produced, so we don't leak memory.
*/
oldCxt = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
{
ExecStoreTuple(tuple, tupslot, InvalidBuffer, false);
econtext->ecxt_scantuple = tupslot;
if (!ExecCheck(partqualstate, econtext))
ereport(ERROR,
(errcode(ERRCODE_CHECK_VIOLATION),
errmsg("updated partition constraint for default partition \"%s\" would be violated by some row",
RelationGetRelationName(default_rel))));
ResetExprContext(econtext);
CHECK_FOR_INTERRUPTS();
}
MemoryContextSwitchTo(oldCxt);
heap_endscan(scan);
UnregisterSnapshot(snapshot);
ExecDropSingleTupleTableSlot(tupslot);
FreeExecutorState(estate);
if (RelationGetRelid(default_rel) != RelationGetRelid(part_rel))
heap_close(part_rel, NoLock); /* keep the lock until commit */
}
}
/*
* get_hash_partition_greatest_modulus
*
* Returns the greatest modulus of the hash partition bound. The greatest
* modulus will be at the end of the datums array because hash partitions are
* arranged in the ascending order of their modulus and remainders.
*/
int
get_hash_partition_greatest_modulus(PartitionBoundInfo bound)
{
Assert(bound && bound->strategy == PARTITION_STRATEGY_HASH);
Assert(bound->datums && bound->ndatums > 0);
Assert(DatumGetInt32(bound->datums[bound->ndatums - 1][0]) > 0);
return DatumGetInt32(bound->datums[bound->ndatums - 1][0]);
}
/*
* make_one_range_bound
*
* Return a PartitionRangeBound given a list of PartitionRangeDatum elements
* and a flag telling whether the bound is lower or not. Made into a function
* because there are multiple sites that want to use this facility.
*/
PartitionRangeBound *
make_one_range_bound(PartitionKey key, int index, List *datums, bool lower)
{
PartitionRangeBound *bound;
ListCell *lc;
int i;
Assert(datums != NIL);
bound = (PartitionRangeBound *) palloc0(sizeof(PartitionRangeBound));
bound->index = index;
bound->datums = (Datum *) palloc0(key->partnatts * sizeof(Datum));
bound->kind = (PartitionRangeDatumKind *) palloc0(key->partnatts *
sizeof(PartitionRangeDatumKind));
bound->lower = lower;
i = 0;
foreach(lc, datums)
{
PartitionRangeDatum *datum = castNode(PartitionRangeDatum, lfirst(lc));
/* What's contained in this range datum? */
bound->kind[i] = datum->kind;
if (datum->kind == PARTITION_RANGE_DATUM_VALUE)
{
Const *val = castNode(Const, datum->value);
if (val->constisnull)
elog(ERROR, "invalid range bound datum");
bound->datums[i] = val->constvalue;
}
i++;
}
return bound;
}
/*
* partition_rbound_cmp
*
* Return for two range bounds whether the 1st one (specified in datums1,
* kind1, and lower1) is <, =, or > the bound specified in *b2.
*
* partnatts, partsupfunc and partcollation give the number of attributes in the
* bounds to be compared, comparison function to be used and the collations of
* attributes, respectively.
*
* Note that if the values of the two range bounds compare equal, then we take
* into account whether they are upper or lower bounds, and an upper bound is
* considered to be smaller than a lower bound. This is important to the way
* that RelationBuildPartitionDesc() builds the PartitionBoundInfoData
* structure, which only stores the upper bound of a common boundary between
* two contiguous partitions.
*/
int32
partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
Oid *partcollation,
Datum *datums1, PartitionRangeDatumKind *kind1,
bool lower1, PartitionRangeBound *b2)
{
int32 cmpval = 0; /* placate compiler */
int i;
Datum *datums2 = b2->datums;
PartitionRangeDatumKind *kind2 = b2->kind;
bool lower2 = b2->lower;
for (i = 0; i < partnatts; i++)
{
/*
* First, handle cases where the column is unbounded, which should not
* invoke the comparison procedure, and should not consider any later
* columns. Note that the PartitionRangeDatumKind enum elements
* compare the same way as the values they represent.
*/
if (kind1[i] < kind2[i])
return -1;
else if (kind1[i] > kind2[i])
return 1;
else if (kind1[i] != PARTITION_RANGE_DATUM_VALUE)
/*
* The column bounds are both MINVALUE or both MAXVALUE. No later
* columns should be considered, but we still need to compare
* whether they are upper or lower bounds.
*/
break;
cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i],
partcollation[i],
datums1[i],
datums2[i]));
if (cmpval != 0)
break;
}
/*
* If the comparison is anything other than equal, we're done. If they
* compare equal though, we still have to consider whether the boundaries
* are inclusive or exclusive. Exclusive one is considered smaller of the
* two.
*/
if (cmpval == 0 && lower1 != lower2)
cmpval = lower1 ? 1 : -1;
return cmpval;
}
/*
* partition_rbound_datum_cmp
*
* Return whether range bound (specified in rb_datums, rb_kind, and rb_lower)
* is <, =, or > partition key of tuple (tuple_datums)
*
* n_tuple_datums, partsupfunc and partcollation give number of attributes in
* the bounds to be compared, comparison function to be used and the collations
* of attributes resp.
*
*/
int32
partition_rbound_datum_cmp(FmgrInfo *partsupfunc, Oid *partcollation,
Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
Datum *tuple_datums, int n_tuple_datums)
{
int i;
int32 cmpval = -1;
for (i = 0; i < n_tuple_datums; i++)
{
if (rb_kind[i] == PARTITION_RANGE_DATUM_MINVALUE)
return -1;
else if (rb_kind[i] == PARTITION_RANGE_DATUM_MAXVALUE)
return 1;
cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[i],
partcollation[i],
rb_datums[i],
tuple_datums[i]));
if (cmpval != 0)
break;
}
return cmpval;
}
/*
* partition_hbound_cmp
*
* Compares modulus first, then remainder if modulus are equal.
*/
int32
partition_hbound_cmp(int modulus1, int remainder1, int modulus2, int remainder2)
{
if (modulus1 < modulus2)
return -1;
if (modulus1 > modulus2)
return 1;
if (modulus1 == modulus2 && remainder1 != remainder2)
return (remainder1 > remainder2) ? 1 : -1;
return 0;
}
/*
* partition_list_bsearch
* Returns the index of the greatest bound datum that is less than equal
* to the given value or -1 if all of the bound datums are greater
*
* *is_equal is set to true if the bound datum at the returned index is equal
* to the input value.
*/
int
partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
PartitionBoundInfo boundinfo,
Datum value, bool *is_equal)
{
int lo,
hi,
mid;
lo = -1;
hi = boundinfo->ndatums - 1;
while (lo < hi)
{
int32 cmpval;
mid = (lo + hi + 1) / 2;
cmpval = DatumGetInt32(FunctionCall2Coll(&partsupfunc[0],
partcollation[0],
boundinfo->datums[mid][0],
value));
if (cmpval <= 0)
{
lo = mid;
*is_equal = (cmpval == 0);
if (*is_equal)
break;
}
else
hi = mid - 1;
}
return lo;
}
/*
* partition_range_bsearch
* Returns the index of the greatest range bound that is less than or
* equal to the given range bound or -1 if all of the range bounds are
* greater
*
* *is_equal is set to true if the range bound at the returned index is equal
* to the input range bound
*/
int
partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
Oid *partcollation,
PartitionBoundInfo boundinfo,
PartitionRangeBound *probe, bool *is_equal)
{
int lo,
hi,
mid;
lo = -1;
hi = boundinfo->ndatums - 1;
while (lo < hi)
{
int32 cmpval;
mid = (lo + hi + 1) / 2;
cmpval = partition_rbound_cmp(partnatts, partsupfunc,
partcollation,
boundinfo->datums[mid],
boundinfo->kind[mid],
(boundinfo->indexes[mid] == -1),
probe);
if (cmpval <= 0)
{
lo = mid;
*is_equal = (cmpval == 0);
if (*is_equal)
break;
}
else
hi = mid - 1;
}
return lo;
}
/*
* partition_range_bsearch
* Returns the index of the greatest range bound that is less than or
* equal to the given tuple or -1 if all of the range bounds are greater
*
* *is_equal is set to true if the range bound at the returned index is equal
* to the input tuple.
*/
int
partition_range_datum_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
PartitionBoundInfo boundinfo,
int nvalues, Datum *values, bool *is_equal)
{
int lo,
hi,
mid;
lo = -1;
hi = boundinfo->ndatums - 1;
while (lo < hi)
{
int32 cmpval;
mid = (lo + hi + 1) / 2;
cmpval = partition_rbound_datum_cmp(partsupfunc,
partcollation,
boundinfo->datums[mid],
boundinfo->kind[mid],
values,
nvalues);
if (cmpval <= 0)
{
lo = mid;
*is_equal = (cmpval == 0);
if (*is_equal)
break;
}
else
hi = mid - 1;
}
return lo;
}
/*
* partition_hash_bsearch
* Returns the index of the greatest (modulus, remainder) pair that is
* less than or equal to the given (modulus, remainder) pair or -1 if
* all of them are greater
*/
int
partition_hash_bsearch(PartitionBoundInfo boundinfo,
int modulus, int remainder)
{
int lo,
hi,
mid;
lo = -1;
hi = boundinfo->ndatums - 1;
while (lo < hi)
{
int32 cmpval,
bound_modulus,
bound_remainder;
mid = (lo + hi + 1) / 2;
bound_modulus = DatumGetInt32(boundinfo->datums[mid][0]);
bound_remainder = DatumGetInt32(boundinfo->datums[mid][1]);
cmpval = partition_hbound_cmp(bound_modulus, bound_remainder,
modulus, remainder);
if (cmpval <= 0)
{
lo = mid;
if (cmpval == 0)
break;
}
else
hi = mid - 1;
}
return lo;
}
/*
* get_partition_bound_num_indexes
*
* Returns the number of the entries in the partition bound indexes array.
*/
static int
get_partition_bound_num_indexes(PartitionBoundInfo bound)
{
int num_indexes;
Assert(bound);
switch (bound->strategy)
{
case PARTITION_STRATEGY_HASH:
/*
* The number of the entries in the indexes array is same as the
* greatest modulus.
*/
num_indexes = get_hash_partition_greatest_modulus(bound);
break;
case PARTITION_STRATEGY_LIST:
num_indexes = bound->ndatums;
break;
case PARTITION_STRATEGY_RANGE:
/* Range partitioned table has an extra index. */
num_indexes = bound->ndatums + 1;
break;
default:
elog(ERROR, "unexpected partition strategy: %d",
(int) bound->strategy);
}
return num_indexes;
}
/*
* get_partition_operator
*
* Return oid of the operator of given strategy for a given partition key
* column.
*/
static Oid
get_partition_operator(PartitionKey key, int col, StrategyNumber strategy,
bool *need_relabel)
{
Oid operoid;
/*
* First check if there exists an operator of the given strategy, with
* this column's type as both its lefttype and righttype, in the
* partitioning operator family specified for the column.
*/
operoid = get_opfamily_member(key->partopfamily[col],
key->parttypid[col],
key->parttypid[col],
strategy);
/*
* If one doesn't exist, we must resort to using an operator in the same
* operator family but with the operator class declared input type. It is
* OK to do so, because the column's type is known to be binary-coercible
* with the operator class input type (otherwise, the operator class in
* question would not have been accepted as the partitioning operator
* class). We must however inform the caller to wrap the non-Const
* expression with a RelabelType node to denote the implicit coercion. It
* ensures that the resulting expression structurally matches similarly
* processed expressions within the optimizer.
*/
if (!OidIsValid(operoid))
{
operoid = get_opfamily_member(key->partopfamily[col],
key->partopcintype[col],
key->partopcintype[col],
strategy);
if (!OidIsValid(operoid))
elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
strategy, key->partopcintype[col], key->partopcintype[col],
key->partopfamily[col]);
*need_relabel = true;
}
else
*need_relabel = false;
return operoid;
}
/*
* make_partition_op_expr
* Returns an Expr for the given partition key column with arg1 and
* arg2 as its leftop and rightop, respectively
*/
static Expr *
make_partition_op_expr(PartitionKey key, int keynum,
uint16 strategy, Expr *arg1, Expr *arg2)
{
Oid operoid;
bool need_relabel = false;
Expr *result = NULL;
/* Get the correct btree operator for this partitioning column */
operoid = get_partition_operator(key, keynum, strategy, &need_relabel);
/*
* Chosen operator may be such that the non-Const operand needs to be
* coerced, so apply the same; see the comment in
* get_partition_operator().
*/
if (!IsA(arg1, Const) &&
(need_relabel ||
key->partcollation[keynum] != key->parttypcoll[keynum]))
arg1 = (Expr *) makeRelabelType(arg1,
key->partopcintype[keynum],
-1,
key->partcollation[keynum],
COERCE_EXPLICIT_CAST);
/* Generate the actual expression */
switch (key->strategy)
{
case PARTITION_STRATEGY_LIST:
{
List *elems = (List *) arg2;
int nelems = list_length(elems);
Assert(nelems >= 1);
Assert(keynum == 0);
if (nelems > 1 &&
!type_is_array(key->parttypid[keynum]))
{
ArrayExpr *arrexpr;
ScalarArrayOpExpr *saopexpr;
/* Construct an ArrayExpr for the right-hand inputs */
arrexpr = makeNode(ArrayExpr);
arrexpr->array_typeid =
get_array_type(key->parttypid[keynum]);
arrexpr->array_collid = key->parttypcoll[keynum];
arrexpr->element_typeid = key->parttypid[keynum];
arrexpr->elements = elems;
arrexpr->multidims = false;
arrexpr->location = -1;
/* Build leftop = ANY (rightop) */
saopexpr = makeNode(ScalarArrayOpExpr);
saopexpr->opno = operoid;
saopexpr->opfuncid = get_opcode(operoid);
saopexpr->useOr = true;
saopexpr->inputcollid = key->partcollation[keynum];
saopexpr->args = list_make2(arg1, arrexpr);
saopexpr->location = -1;
result = (Expr *) saopexpr;
}
else
{
List *elemops = NIL;
ListCell *lc;
foreach(lc, elems)
{
Expr *elem = lfirst(lc),
*elemop;
elemop = make_opclause(operoid,
BOOLOID,
false,
arg1, elem,
InvalidOid,
key->partcollation[keynum]);
elemops = lappend(elemops, elemop);
}
result = nelems > 1 ? makeBoolExpr(OR_EXPR, elemops, -1) : linitial(elemops);
}
break;
}
case PARTITION_STRATEGY_RANGE:
result = make_opclause(operoid,
BOOLOID,
false,
arg1, arg2,
InvalidOid,
key->partcollation[keynum]);
break;
default:
elog(ERROR, "invalid partitioning strategy");
break;
}
return result;
}
/*
* get_qual_for_hash
*
* Returns a CHECK constraint expression to use as a hash partition's
* constraint, given the parent relation and partition bound structure.
*
* The partition constraint for a hash partition is always a call to the
* built-in function satisfies_hash_partition().
*/
static List *
get_qual_for_hash(Relation parent, PartitionBoundSpec *spec)
{
PartitionKey key = RelationGetPartitionKey(parent);
FuncExpr *fexpr;
Node *relidConst;
Node *modulusConst;
Node *remainderConst;
List *args;
ListCell *partexprs_item;
int i;
/* Fixed arguments. */
relidConst = (Node *) makeConst(OIDOID,
-1,
InvalidOid,
sizeof(Oid),
ObjectIdGetDatum(RelationGetRelid(parent)),
false,
true);
modulusConst = (Node *) makeConst(INT4OID,
-1,
InvalidOid,
sizeof(int32),
Int32GetDatum(spec->modulus),
false,
true);
remainderConst = (Node *) makeConst(INT4OID,
-1,
InvalidOid,
sizeof(int32),
Int32GetDatum(spec->remainder),
false,
true);
args = list_make3(relidConst, modulusConst, remainderConst);
partexprs_item = list_head(key->partexprs);
/* Add an argument for each key column. */
for (i = 0; i < key->partnatts; i++)
{
Node *keyCol;
/* Left operand */
if (key->partattrs[i] != 0)
{
keyCol = (Node *) makeVar(1,
key->partattrs[i],
key->parttypid[i],
key->parttypmod[i],
key->parttypcoll[i],
0);
}
else
{
keyCol = (Node *) copyObject(lfirst(partexprs_item));
partexprs_item = lnext(partexprs_item);
}
args = lappend(args, keyCol);
}
fexpr = makeFuncExpr(F_SATISFIES_HASH_PARTITION,
BOOLOID,
args,
InvalidOid,
InvalidOid,
COERCE_EXPLICIT_CALL);
return list_make1(fexpr);
}
/*
* get_qual_for_list
*
* Returns an implicit-AND list of expressions to use as a list partition's
* constraint, given the parent relation and partition bound structure.
*
* The function returns NIL for a default partition when it's the only
* partition since in that case there is no constraint.
*/
static List *
get_qual_for_list(Relation parent, PartitionBoundSpec *spec)
{
PartitionKey key = RelationGetPartitionKey(parent);
List *result;
Expr *keyCol;
Expr *opexpr;
NullTest *nulltest;
ListCell *cell;
List *elems = NIL;
bool list_has_null = false;
/*
* Only single-column list partitioning is supported, so we are worried
* only about the partition key with index 0.
*/
Assert(key->partnatts == 1);
/* Construct Var or expression representing the partition column */
if (key->partattrs[0] != 0)
keyCol = (Expr *) makeVar(1,
key->partattrs[0],
key->parttypid[0],
key->parttypmod[0],
key->parttypcoll[0],
0);
else
keyCol = (Expr *) copyObject(linitial(key->partexprs));
/*
* For default list partition, collect datums for all the partitions. The
* default partition constraint should check that the partition key is
* equal to none of those.
*/
if (spec->is_default)
{
int i;
int ndatums = 0;
PartitionDesc pdesc = RelationGetPartitionDesc(parent);
PartitionBoundInfo boundinfo = pdesc->boundinfo;
if (boundinfo)
{
ndatums = boundinfo->ndatums;
if (partition_bound_accepts_nulls(boundinfo))
list_has_null = true;
}
/*
* If default is the only partition, there need not be any partition
* constraint on it.
*/
if (ndatums == 0 && !list_has_null)
return NIL;
for (i = 0; i < ndatums; i++)
{
Const *val;
/*
* Construct Const from known-not-null datum. We must be careful
* to copy the value, because our result has to be able to outlive
* the relcache entry we're copying from.
*/
val = makeConst(key->parttypid[0],
key->parttypmod[0],
key->parttypcoll[0],
key->parttyplen[0],
datumCopy(*boundinfo->datums[i],
key->parttypbyval[0],
key->parttyplen[0]),
false, /* isnull */
key->parttypbyval[0]);
elems = lappend(elems, val);
}
}
else
{
/*
* Create list of Consts for the allowed values, excluding any nulls.
*/
foreach(cell, spec->listdatums)
{
Const *val = castNode(Const, lfirst(cell));
if (val->constisnull)
list_has_null = true;
else
elems = lappend(elems, copyObject(val));
}
}
if (elems)
{
/*
* Generate the operator expression from the non-null partition
* values.
*/
opexpr = make_partition_op_expr(key, 0, BTEqualStrategyNumber,
keyCol, (Expr *) elems);
}
else
{
/*
* If there are no partition values, we don't need an operator
* expression.
*/
opexpr = NULL;
}
if (!list_has_null)
{
/*
* Gin up a "col IS NOT NULL" test that will be AND'd with the main
* expression. This might seem redundant, but the partition routing
* machinery needs it.
*/
nulltest = makeNode(NullTest);
nulltest->arg = keyCol;
nulltest->nulltesttype = IS_NOT_NULL;
nulltest->argisrow = false;
nulltest->location = -1;
result = opexpr ? list_make2(nulltest, opexpr) : list_make1(nulltest);
}
else
{
/*
* Gin up a "col IS NULL" test that will be OR'd with the main
* expression.
*/
nulltest = makeNode(NullTest);
nulltest->arg = keyCol;
nulltest->nulltesttype = IS_NULL;
nulltest->argisrow = false;
nulltest->location = -1;
if (opexpr)
{
Expr *or;
or = makeBoolExpr(OR_EXPR, list_make2(nulltest, opexpr), -1);
result = list_make1(or);
}
else
result = list_make1(nulltest);
}
/*
* Note that, in general, applying NOT to a constraint expression doesn't
* necessarily invert the set of rows it accepts, because NOT (NULL) is
* NULL. However, the partition constraints we construct here never
* evaluate to NULL, so applying NOT works as intended.
*/
if (spec->is_default)
{
result = list_make1(make_ands_explicit(result));
result = list_make1(makeBoolExpr(NOT_EXPR, result, -1));
}
return result;
}
/*
* get_qual_for_range
*
* Returns an implicit-AND list of expressions to use as a range partition's
* constraint, given the parent relation and partition bound structure.
*
* For a multi-column range partition key, say (a, b, c), with (al, bl, cl)
* as the lower bound tuple and (au, bu, cu) as the upper bound tuple, we
* generate an expression tree of the following form:
*
* (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
* AND
* (a > al OR (a = al AND b > bl) OR (a = al AND b = bl AND c >= cl))
* AND
* (a < au OR (a = au AND b < bu) OR (a = au AND b = bu AND c < cu))
*
* It is often the case that a prefix of lower and upper bound tuples contains
* the same values, for example, (al = au), in which case, we will emit an
* expression tree of the following form:
*
* (a IS NOT NULL) and (b IS NOT NULL) and (c IS NOT NULL)
* AND
* (a = al)
* AND
* (b > bl OR (b = bl AND c >= cl))
* AND
* (b < bu) OR (b = bu AND c < cu))
*
* If a bound datum is either MINVALUE or MAXVALUE, these expressions are
* simplified using the fact that any value is greater than MINVALUE and less
* than MAXVALUE. So, for example, if cu = MAXVALUE, c < cu is automatically
* true, and we need not emit any expression for it, and the last line becomes
*
* (b < bu) OR (b = bu), which is simplified to (b <= bu)
*
* In most common cases with only one partition column, say a, the following
* expression tree will be generated: a IS NOT NULL AND a >= al AND a < au
*
* For default partition, it returns the negation of the constraints of all
* the other partitions.
*
* External callers should pass for_default as false; we set it to true only
* when recursing.
*/
static List *
get_qual_for_range(Relation parent, PartitionBoundSpec *spec,
bool for_default)
{
List *result = NIL;
ListCell *cell1,
*cell2,
*partexprs_item,
*partexprs_item_saved;
int i,
j;
PartitionRangeDatum *ldatum,
*udatum;
PartitionKey key = RelationGetPartitionKey(parent);
Expr *keyCol;
Const *lower_val,
*upper_val;
List *lower_or_arms,
*upper_or_arms;
int num_or_arms,
current_or_arm;
ListCell *lower_or_start_datum,
*upper_or_start_datum;
bool need_next_lower_arm,
need_next_upper_arm;
if (spec->is_default)
{
List *or_expr_args = NIL;
PartitionDesc pdesc = RelationGetPartitionDesc(parent);
Oid *inhoids = pdesc->oids;
int nparts = pdesc->nparts,
i;
for (i = 0; i < nparts; i++)
{
Oid inhrelid = inhoids[i];
HeapTuple tuple;
Datum datum;
bool isnull;
PartitionBoundSpec *bspec;
tuple = SearchSysCache1(RELOID, inhrelid);
if (!HeapTupleIsValid(tuple))
elog(ERROR, "cache lookup failed for relation %u", inhrelid);
datum = SysCacheGetAttr(RELOID, tuple,
Anum_pg_class_relpartbound,
&isnull);
Assert(!isnull);
bspec = (PartitionBoundSpec *)
stringToNode(TextDatumGetCString(datum));
if (!IsA(bspec, PartitionBoundSpec))
elog(ERROR, "expected PartitionBoundSpec");
if (!bspec->is_default)
{
List *part_qual;
part_qual = get_qual_for_range(parent, bspec, true);
/*
* AND the constraints of the partition and add to
* or_expr_args
*/
or_expr_args = lappend(or_expr_args, list_length(part_qual) > 1
? makeBoolExpr(AND_EXPR, part_qual, -1)
: linitial(part_qual));
}
ReleaseSysCache(tuple);
}
if (or_expr_args != NIL)
{
Expr *other_parts_constr;
/*
* Combine the constraints obtained for non-default partitions
* using OR. As requested, each of the OR's args doesn't include
* the NOT NULL test for partition keys (which is to avoid its
* useless repetition). Add the same now.
*/
other_parts_constr =
makeBoolExpr(AND_EXPR,
lappend(get_range_nulltest(key),
list_length(or_expr_args) > 1
? makeBoolExpr(OR_EXPR, or_expr_args,
-1)
: linitial(or_expr_args)),
-1);
/*
* Finally, the default partition contains everything *NOT*
* contained in the non-default partitions.
*/
result = list_make1(makeBoolExpr(NOT_EXPR,
list_make1(other_parts_constr), -1));
}
return result;
}
lower_or_start_datum = list_head(spec->lowerdatums);
upper_or_start_datum = list_head(spec->upperdatums);
num_or_arms = key->partnatts;
/*
* If it is the recursive call for default, we skip the get_range_nulltest
* to avoid accumulating the NullTest on the same keys for each partition.
*/
if (!for_default)
result = get_range_nulltest(key);
/*
* Iterate over the key columns and check if the corresponding lower and
* upper datums are equal using the btree equality operator for the
* column's type. If equal, we emit single keyCol = common_value
* expression. Starting from the first column for which the corresponding
* lower and upper bound datums are not equal, we generate OR expressions
* as shown in the function's header comment.
*/
i = 0;
partexprs_item = list_head(key->partexprs);
partexprs_item_saved = partexprs_item; /* placate compiler */
forboth(cell1, spec->lowerdatums, cell2, spec->upperdatums)
{
EState *estate;
MemoryContext oldcxt;
Expr *test_expr;
ExprState *test_exprstate;
Datum test_result;
bool isNull;
ldatum = castNode(PartitionRangeDatum, lfirst(cell1));
udatum = castNode(PartitionRangeDatum, lfirst(cell2));
/*
* Since get_range_key_properties() modifies partexprs_item, and we
* might need to start over from the previous expression in the later
* part of this function, save away the current value.
*/
partexprs_item_saved = partexprs_item;
get_range_key_properties(key, i, ldatum, udatum,
&partexprs_item,
&keyCol,
&lower_val, &upper_val);
/*
* If either value is NULL, the corresponding partition bound is
* either MINVALUE or MAXVALUE, and we treat them as unequal, because
* even if they're the same, there is no common value to equate the
* key column with.
*/
if (!lower_val || !upper_val)
break;
/* Create the test expression */
estate = CreateExecutorState();
oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
test_expr = make_partition_op_expr(key, i, BTEqualStrategyNumber,
(Expr *) lower_val,
(Expr *) upper_val);
fix_opfuncids((Node *) test_expr);
test_exprstate = ExecInitExpr(test_expr, NULL);
test_result = ExecEvalExprSwitchContext(test_exprstate,
GetPerTupleExprContext(estate),
&isNull);
MemoryContextSwitchTo(oldcxt);
FreeExecutorState(estate);
/* If not equal, go generate the OR expressions */
if (!DatumGetBool(test_result))
break;
/*
* The bounds for the last key column can't be equal, because such a
* range partition would never be allowed to be defined (it would have
* an empty range otherwise).
*/
if (i == key->partnatts - 1)
elog(ERROR, "invalid range bound specification");
/* Equal, so generate keyCol = lower_val expression */
result = lappend(result,
make_partition_op_expr(key, i, BTEqualStrategyNumber,
keyCol, (Expr *) lower_val));
i++;
}
/* First pair of lower_val and upper_val that are not equal. */
lower_or_start_datum = cell1;
upper_or_start_datum = cell2;
/* OR will have as many arms as there are key columns left. */
num_or_arms = key->partnatts - i;
current_or_arm = 0;
lower_or_arms = upper_or_arms = NIL;
need_next_lower_arm = need_next_upper_arm = true;
while (current_or_arm < num_or_arms)
{
List *lower_or_arm_args = NIL,
*upper_or_arm_args = NIL;
/* Restart scan of columns from the i'th one */
j = i;
partexprs_item = partexprs_item_saved;
for_both_cell(cell1, lower_or_start_datum, cell2, upper_or_start_datum)
{
PartitionRangeDatum *ldatum_next = NULL,
*udatum_next = NULL;
ldatum = castNode(PartitionRangeDatum, lfirst(cell1));
if (lnext(cell1))
ldatum_next = castNode(PartitionRangeDatum,
lfirst(lnext(cell1)));
udatum = castNode(PartitionRangeDatum, lfirst(cell2));
if (lnext(cell2))
udatum_next = castNode(PartitionRangeDatum,
lfirst(lnext(cell2)));
get_range_key_properties(key, j, ldatum, udatum,
&partexprs_item,
&keyCol,
&lower_val, &upper_val);
if (need_next_lower_arm && lower_val)
{
uint16 strategy;
/*
* For the non-last columns of this arm, use the EQ operator.
* For the last column of this arm, use GT, unless this is the
* last column of the whole bound check, or the next bound
* datum is MINVALUE, in which case use GE.
*/
if (j - i < current_or_arm)
strategy = BTEqualStrategyNumber;
else if (j == key->partnatts - 1 ||
(ldatum_next &&
ldatum_next->kind == PARTITION_RANGE_DATUM_MINVALUE))
strategy = BTGreaterEqualStrategyNumber;
else
strategy = BTGreaterStrategyNumber;
lower_or_arm_args = lappend(lower_or_arm_args,
make_partition_op_expr(key, j,
strategy,
keyCol,
(Expr *) lower_val));
}
if (need_next_upper_arm && upper_val)
{
uint16 strategy;
/*
* For the non-last columns of this arm, use the EQ operator.
* For the last column of this arm, use LT, unless the next
* bound datum is MAXVALUE, in which case use LE.
*/
if (j - i < current_or_arm)
strategy = BTEqualStrategyNumber;
else if (udatum_next &&
udatum_next->kind == PARTITION_RANGE_DATUM_MAXVALUE)
strategy = BTLessEqualStrategyNumber;
else
strategy = BTLessStrategyNumber;
upper_or_arm_args = lappend(upper_or_arm_args,
make_partition_op_expr(key, j,
strategy,
keyCol,
(Expr *) upper_val));
}
/*
* Did we generate enough of OR's arguments? First arm considers
* the first of the remaining columns, second arm considers first
* two of the remaining columns, and so on.
*/
++j;
if (j - i > current_or_arm)
{
/*
* We must not emit any more arms if the new column that will
* be considered is unbounded, or this one was.
*/
if (!lower_val || !ldatum_next ||
ldatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
need_next_lower_arm = false;
if (!upper_val || !udatum_next ||
udatum_next->kind != PARTITION_RANGE_DATUM_VALUE)
need_next_upper_arm = false;
break;
}
}
if (lower_or_arm_args != NIL)
lower_or_arms = lappend(lower_or_arms,
list_length(lower_or_arm_args) > 1
? makeBoolExpr(AND_EXPR, lower_or_arm_args, -1)
: linitial(lower_or_arm_args));
if (upper_or_arm_args != NIL)
upper_or_arms = lappend(upper_or_arms,
list_length(upper_or_arm_args) > 1
? makeBoolExpr(AND_EXPR, upper_or_arm_args, -1)
: linitial(upper_or_arm_args));
/* If no work to do in the next iteration, break away. */
if (!need_next_lower_arm && !need_next_upper_arm)
break;
++current_or_arm;
}
/*
* Generate the OR expressions for each of lower and upper bounds (if
* required), and append to the list of implicitly ANDed list of
* expressions.
*/
if (lower_or_arms != NIL)
result = lappend(result,
list_length(lower_or_arms) > 1
? makeBoolExpr(OR_EXPR, lower_or_arms, -1)
: linitial(lower_or_arms));
if (upper_or_arms != NIL)
result = lappend(result,
list_length(upper_or_arms) > 1
? makeBoolExpr(OR_EXPR, upper_or_arms, -1)
: linitial(upper_or_arms));
/*
* As noted above, for non-default, we return list with constant TRUE. If
* the result is NIL during the recursive call for default, it implies
* this is the only other partition which can hold every value of the key
* except NULL. Hence we return the NullTest result skipped earlier.
*/
if (result == NIL)
result = for_default
? get_range_nulltest(key)
: list_make1(makeBoolConst(true, false));
return result;
}
/*
* get_range_key_properties
* Returns range partition key information for a given column
*
* This is a subroutine for get_qual_for_range, and its API is pretty
* specialized to that caller.
*
* Constructs an Expr for the key column (returned in *keyCol) and Consts
* for the lower and upper range limits (returned in *lower_val and
* *upper_val). For MINVALUE/MAXVALUE limits, NULL is returned instead of
* a Const. All of these structures are freshly palloc'd.
*
* *partexprs_item points to the cell containing the next expression in
* the key->partexprs list, or NULL. It may be advanced upon return.
*/
static void
get_range_key_properties(PartitionKey key, int keynum,
PartitionRangeDatum *ldatum,
PartitionRangeDatum *udatum,
ListCell **partexprs_item,
Expr **keyCol,
Const **lower_val, Const **upper_val)
{
/* Get partition key expression for this column */
if (key->partattrs[keynum] != 0)
{
*keyCol = (Expr *) makeVar(1,
key->partattrs[keynum],
key->parttypid[keynum],
key->parttypmod[keynum],
key->parttypcoll[keynum],
0);
}
else
{
if (*partexprs_item == NULL)
elog(ERROR, "wrong number of partition key expressions");
*keyCol = copyObject(lfirst(*partexprs_item));
*partexprs_item = lnext(*partexprs_item);
}
/* Get appropriate Const nodes for the bounds */
if (ldatum->kind == PARTITION_RANGE_DATUM_VALUE)
*lower_val = castNode(Const, copyObject(ldatum->value));
else
*lower_val = NULL;
if (udatum->kind == PARTITION_RANGE_DATUM_VALUE)
*upper_val = castNode(Const, copyObject(udatum->value));
else
*upper_val = NULL;
}
/*
* get_range_nulltest
*
* A non-default range partition table does not currently allow partition
* keys to be null, so emit an IS NOT NULL expression for each key column.
*/
static List *
get_range_nulltest(PartitionKey key)
{
List *result = NIL;
NullTest *nulltest;
ListCell *partexprs_item;
int i;
partexprs_item = list_head(key->partexprs);
for (i = 0; i < key->partnatts; i++)
{
Expr *keyCol;
if (key->partattrs[i] != 0)
{
keyCol = (Expr *) makeVar(1,
key->partattrs[i],
key->parttypid[i],
key->parttypmod[i],
key->parttypcoll[i],
0);
}
else
{
if (partexprs_item == NULL)
elog(ERROR, "wrong number of partition key expressions");
keyCol = copyObject(lfirst(partexprs_item));
partexprs_item = lnext(partexprs_item);
}
nulltest = makeNode(NullTest);
nulltest->arg = keyCol;
nulltest->nulltesttype = IS_NOT_NULL;
nulltest->argisrow = false;
nulltest->location = -1;
result = lappend(result, nulltest);
}
return result;
}
/*
* compute_hash_value
*
* Compute the hash value for given not null partition key values.
*/
uint64
compute_hash_value(int partnatts, FmgrInfo *partsupfunc,
Datum *values, bool *isnull)
{
int i;
uint64 rowHash = 0;
Datum seed = UInt64GetDatum(HASH_PARTITION_SEED);
for (i = 0; i < partnatts; i++)
{
if (!isnull[i])
{
Datum hash;
Assert(OidIsValid(partsupfunc[i].fn_oid));
/*
* Compute hash for each datum value by calling respective
* datatype-specific hash functions of each partition key
* attribute.
*/
hash = FunctionCall2(&partsupfunc[i], values[i], seed);
/* Form a single 64-bit hash value */
rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
}
}
return rowHash;
}
/*
* satisfies_hash_partition
*
* This is an SQL-callable function for use in hash partition constraints.
* The first three arguments are the parent table OID, modulus, and remainder.
* The remaining arguments are the value of the partitioning columns (or
* expressions); these are hashed and the results are combined into a single
* hash value by calling hash_combine64.
*
* Returns true if remainder produced when this computed single hash value is
* divided by the given modulus is equal to given remainder, otherwise false.
*
* See get_qual_for_hash() for usage.
*/
Datum
satisfies_hash_partition(PG_FUNCTION_ARGS)
{
typedef struct ColumnsHashData
{
Oid relid;
int nkeys;
Oid variadic_type;
int16 variadic_typlen;
bool variadic_typbyval;
char variadic_typalign;
FmgrInfo partsupfunc[PARTITION_MAX_KEYS];
} ColumnsHashData;
Oid parentId;
int modulus;
int remainder;
Datum seed = UInt64GetDatum(HASH_PARTITION_SEED);
ColumnsHashData *my_extra;
uint64 rowHash = 0;
/* Return null if the parent OID, modulus, or remainder is NULL. */
if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(2))
PG_RETURN_NULL();
parentId = PG_GETARG_OID(0);
modulus = PG_GETARG_INT32(1);
remainder = PG_GETARG_INT32(2);
/* Sanity check modulus and remainder. */
if (modulus <= 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("modulus for hash partition must be a positive integer")));
if (remainder < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("remainder for hash partition must be a non-negative integer")));
if (remainder >= modulus)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("remainder for hash partition must be less than modulus")));
/*
* Cache hash function information.
*/
my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
if (my_extra == NULL || my_extra->relid != parentId)
{
Relation parent;
PartitionKey key;
int j;
/* Open parent relation and fetch partition keyinfo */
parent = try_relation_open(parentId, AccessShareLock);
if (parent == NULL)
PG_RETURN_NULL();
key = RelationGetPartitionKey(parent);
/* Reject parent table that is not hash-partitioned. */
if (parent->rd_rel->relkind != RELKIND_PARTITIONED_TABLE ||
key->strategy != PARTITION_STRATEGY_HASH)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("\"%s\" is not a hash partitioned table",
get_rel_name(parentId))));
if (!get_fn_expr_variadic(fcinfo->flinfo))
{
int nargs = PG_NARGS() - 3;
/* complain if wrong number of column values */
if (key->partnatts != nargs)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
key->partnatts, nargs)));
/* allocate space for our cache */
fcinfo->flinfo->fn_extra =
MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
offsetof(ColumnsHashData, partsupfunc) +
sizeof(FmgrInfo) * nargs);
my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
my_extra->relid = parentId;
my_extra->nkeys = key->partnatts;
/* check argument types and save fmgr_infos */
for (j = 0; j < key->partnatts; ++j)
{
Oid argtype = get_fn_expr_argtype(fcinfo->flinfo, j + 3);
if (argtype != key->parttypid[j] && !IsBinaryCoercible(argtype, key->parttypid[j]))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"",
j + 1, format_type_be(key->parttypid[j]), format_type_be(argtype))));
fmgr_info_copy(&my_extra->partsupfunc[j],
&key->partsupfunc[j],
fcinfo->flinfo->fn_mcxt);
}
}
else
{
ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
/* allocate space for our cache -- just one FmgrInfo in this case */
fcinfo->flinfo->fn_extra =
MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt,
offsetof(ColumnsHashData, partsupfunc) +
sizeof(FmgrInfo));
my_extra = (ColumnsHashData *) fcinfo->flinfo->fn_extra;
my_extra->relid = parentId;
my_extra->nkeys = key->partnatts;
my_extra->variadic_type = ARR_ELEMTYPE(variadic_array);
get_typlenbyvalalign(my_extra->variadic_type,
&my_extra->variadic_typlen,
&my_extra->variadic_typbyval,
&my_extra->variadic_typalign);
/* check argument types */
for (j = 0; j < key->partnatts; ++j)
if (key->parttypid[j] != my_extra->variadic_type)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("column %d of the partition key has type \"%s\", but supplied value is of type \"%s\"",
j + 1,
format_type_be(key->parttypid[j]),
format_type_be(my_extra->variadic_type))));
fmgr_info_copy(&my_extra->partsupfunc[0],
&key->partsupfunc[0],
fcinfo->flinfo->fn_mcxt);
}
/* Hold lock until commit */
relation_close(parent, NoLock);
}
if (!OidIsValid(my_extra->variadic_type))
{
int nkeys = my_extra->nkeys;
int i;
/*
* For a non-variadic call, neither the number of arguments nor their
* types can change across calls, so avoid the expense of rechecking
* here.
*/
for (i = 0; i < nkeys; i++)
{
Datum hash;
/* keys start from fourth argument of function. */
int argno = i + 3;
if (PG_ARGISNULL(argno))
continue;
Assert(OidIsValid(my_extra->partsupfunc[i].fn_oid));
hash = FunctionCall2(&my_extra->partsupfunc[i],
PG_GETARG_DATUM(argno),
seed);
/* Form a single 64-bit hash value */
rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
}
}
else
{
ArrayType *variadic_array = PG_GETARG_ARRAYTYPE_P(3);
int i;
int nelems;
Datum *datum;
bool *isnull;
deconstruct_array(variadic_array,
my_extra->variadic_type,
my_extra->variadic_typlen,
my_extra->variadic_typbyval,
my_extra->variadic_typalign,
&datum, &isnull, &nelems);
/* complain if wrong number of column values */
if (nelems != my_extra->nkeys)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("number of partitioning columns (%d) does not match number of partition keys provided (%d)",
my_extra->nkeys, nelems)));
for (i = 0; i < nelems; i++)
{
Datum hash;
if (isnull[i])
continue;
Assert(OidIsValid(my_extra->partsupfunc[0].fn_oid));
hash = FunctionCall2(&my_extra->partsupfunc[0],
datum[i],
seed);
/* Form a single 64-bit hash value */
rowHash = hash_combine64(rowHash, DatumGetUInt64(hash));
}
}
PG_RETURN_BOOL(rowHash % modulus == remainder);
}
......@@ -68,6 +68,7 @@
#include "utils/acl.h"
#include "utils/guc.h"
#include "utils/syscache.h"
#include "utils/rel.h"
/* Hook for plugins to get control in ProcessUtility() */
......
......@@ -24,7 +24,6 @@
#include "access/sysattr.h"
#include "catalog/dependency.h"
#include "catalog/indexing.h"
#include "catalog/partition.h"
#include "catalog/pg_aggregate.h"
#include "catalog/pg_am.h"
#include "catalog/pg_authid.h"
......@@ -64,6 +63,7 @@
#include "utils/guc.h"
#include "utils/hsearch.h"
#include "utils/lsyscache.h"
#include "utils/partcache.h"
#include "utils/rel.h"
#include "utils/ruleutils.h"
#include "utils/snapmgr.h"
......
......@@ -12,8 +12,8 @@ subdir = src/backend/utils/cache
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
OBJS = attoptcache.o catcache.o evtcache.o inval.o plancache.o relcache.o \
relmapper.o relfilenodemap.o spccache.o syscache.o lsyscache.o \
typcache.o ts_cache.o
OBJS = attoptcache.o catcache.o evtcache.o inval.o lsyscache.o \
partcache.o plancache.o relcache.o relmapper.o relfilenodemap.o \
spccache.o syscache.o ts_cache.o typcache.o
include $(top_srcdir)/src/backend/common.mk
/*-------------------------------------------------------------------------
*
* partcache.c
* Support routines for manipulating partition information cached in
* relcache
*
* Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* src/backend/utils/cache/partcache.c
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/hash.h"
#include "access/heapam.h"
#include "access/htup_details.h"
#include "access/nbtree.h"
#include "catalog/partition.h"
#include "catalog/pg_inherits.h"
#include "catalog/pg_opclass.h"
#include "catalog/pg_partitioned_table.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/clauses.h"
#include "optimizer/planner.h"
#include "partitioning/partbounds.h"
#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/partcache.h"
#include "utils/rel.h"
#include "utils/syscache.h"
static List *generate_partition_qual(Relation rel);
static int32 qsort_partition_hbound_cmp(const void *a, const void *b);
static int32 qsort_partition_list_value_cmp(const void *a, const void *b,
void *arg);
static int32 qsort_partition_rbound_cmp(const void *a, const void *b,
void *arg);
/*
* RelationBuildPartitionKey
* Build and attach to relcache partition key data of relation
*
* Partitioning key data is a complex structure; to avoid complicated logic to
* free individual elements whenever the relcache entry is flushed, we give it
* its own memory context, child of CacheMemoryContext, which can easily be
* deleted on its own. To avoid leaking memory in that context in case of an
* error partway through this function, the context is initially created as a
* child of CurTransactionContext and only re-parented to CacheMemoryContext
* at the end, when no further errors are possible. Also, we don't make this
* context the current context except in very brief code sections, out of fear
* that some of our callees allocate memory on their own which would be leaked
* permanently.
*/
void
RelationBuildPartitionKey(Relation relation)
{
Form_pg_partitioned_table form;
HeapTuple tuple;
bool isnull;
int i;
PartitionKey key;
AttrNumber *attrs;
oidvector *opclass;
oidvector *collation;
ListCell *partexprs_item;
Datum datum;
MemoryContext partkeycxt,
oldcxt;
int16 procnum;
tuple = SearchSysCache1(PARTRELID,
ObjectIdGetDatum(RelationGetRelid(relation)));
/*
* The following happens when we have created our pg_class entry but not
* the pg_partitioned_table entry yet.
*/
if (!HeapTupleIsValid(tuple))
return;
partkeycxt = AllocSetContextCreate(CurTransactionContext,
"partition key",
ALLOCSET_SMALL_SIZES);
MemoryContextCopyAndSetIdentifier(partkeycxt,
RelationGetRelationName(relation));
key = (PartitionKey) MemoryContextAllocZero(partkeycxt,
sizeof(PartitionKeyData));
/* Fixed-length attributes */
form = (Form_pg_partitioned_table) GETSTRUCT(tuple);
key->strategy = form->partstrat;
key->partnatts = form->partnatts;
/*
* We can rely on the first variable-length attribute being mapped to the
* relevant field of the catalog's C struct, because all previous
* attributes are non-nullable and fixed-length.
*/
attrs = form->partattrs.values;
/* But use the hard way to retrieve further variable-length attributes */
/* Operator class */
datum = SysCacheGetAttr(PARTRELID, tuple,
Anum_pg_partitioned_table_partclass, &isnull);
Assert(!isnull);
opclass = (oidvector *) DatumGetPointer(datum);
/* Collation */
datum = SysCacheGetAttr(PARTRELID, tuple,
Anum_pg_partitioned_table_partcollation, &isnull);
Assert(!isnull);
collation = (oidvector *) DatumGetPointer(datum);
/* Expressions */
datum = SysCacheGetAttr(PARTRELID, tuple,
Anum_pg_partitioned_table_partexprs, &isnull);
if (!isnull)
{
char *exprString;
Node *expr;
exprString = TextDatumGetCString(datum);
expr = stringToNode(exprString);
pfree(exprString);
/*
* Run the expressions through const-simplification since the planner
* will be comparing them to similarly-processed qual clause operands,
* and may fail to detect valid matches without this step; fix
* opfuncids while at it. We don't need to bother with
* canonicalize_qual() though, because partition expressions should be
* in canonical form already (ie, no need for OR-merging or constant
* elimination).
*/
expr = eval_const_expressions(NULL, expr);
fix_opfuncids(expr);
oldcxt = MemoryContextSwitchTo(partkeycxt);
key->partexprs = (List *) copyObject(expr);
MemoryContextSwitchTo(oldcxt);
}
oldcxt = MemoryContextSwitchTo(partkeycxt);
key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber));
key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid));
key->partopcintype = (Oid *) palloc0(key->partnatts * sizeof(Oid));
key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo));
key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid));
/* Gather type and collation info as well */
key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid));
key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32));
key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16));
key->parttypbyval = (bool *) palloc0(key->partnatts * sizeof(bool));
key->parttypalign = (char *) palloc0(key->partnatts * sizeof(char));
key->parttypcoll = (Oid *) palloc0(key->partnatts * sizeof(Oid));
MemoryContextSwitchTo(oldcxt);
/* determine support function number to search for */
procnum = (key->strategy == PARTITION_STRATEGY_HASH) ?
HASHEXTENDED_PROC : BTORDER_PROC;
/* Copy partattrs and fill other per-attribute info */
memcpy(key->partattrs, attrs, key->partnatts * sizeof(int16));
partexprs_item = list_head(key->partexprs);
for (i = 0; i < key->partnatts; i++)
{
AttrNumber attno = key->partattrs[i];
HeapTuple opclasstup;
Form_pg_opclass opclassform;
Oid funcid;
/* Collect opfamily information */
opclasstup = SearchSysCache1(CLAOID,
ObjectIdGetDatum(opclass->values[i]));
if (!HeapTupleIsValid(opclasstup))
elog(ERROR, "cache lookup failed for opclass %u", opclass->values[i]);
opclassform = (Form_pg_opclass) GETSTRUCT(opclasstup);
key->partopfamily[i] = opclassform->opcfamily;
key->partopcintype[i] = opclassform->opcintype;
/* Get a support function for the specified opfamily and datatypes */
funcid = get_opfamily_proc(opclassform->opcfamily,
opclassform->opcintype,
opclassform->opcintype,
procnum);
if (!OidIsValid(funcid))
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("operator class \"%s\" of access method %s is missing support function %d for type %s",
NameStr(opclassform->opcname),
(key->strategy == PARTITION_STRATEGY_HASH) ?
"hash" : "btree",
procnum,
format_type_be(opclassform->opcintype))));
fmgr_info_cxt(funcid, &key->partsupfunc[i], partkeycxt);
/* Collation */
key->partcollation[i] = collation->values[i];
/* Collect type information */
if (attno != 0)
{
Form_pg_attribute att = TupleDescAttr(relation->rd_att, attno - 1);
key->parttypid[i] = att->atttypid;
key->parttypmod[i] = att->atttypmod;
key->parttypcoll[i] = att->attcollation;
}
else
{
if (partexprs_item == NULL)
elog(ERROR, "wrong number of partition key expressions");
key->parttypid[i] = exprType(lfirst(partexprs_item));
key->parttypmod[i] = exprTypmod(lfirst(partexprs_item));
key->parttypcoll[i] = exprCollation(lfirst(partexprs_item));
partexprs_item = lnext(partexprs_item);
}
get_typlenbyvalalign(key->parttypid[i],
&key->parttyplen[i],
&key->parttypbyval[i],
&key->parttypalign[i]);
ReleaseSysCache(opclasstup);
}
ReleaseSysCache(tuple);
/*
* Success --- reparent our context and make the relcache point to the
* newly constructed key
*/
MemoryContextSetParent(partkeycxt, CacheMemoryContext);
relation->rd_partkeycxt = partkeycxt;
relation->rd_partkey = key;
}
/*
* RelationBuildPartitionDesc
* Form rel's partition descriptor
*
* Not flushed from the cache by RelationClearRelation() unless changed because
* of addition or removal of partition.
*/
void
RelationBuildPartitionDesc(Relation rel)
{
List *inhoids,
*partoids;
Oid *oids = NULL;
List *boundspecs = NIL;
ListCell *cell;
int i,
nparts;
PartitionKey key = RelationGetPartitionKey(rel);
PartitionDesc result;
MemoryContext oldcxt;
int ndatums = 0;
int default_index = -1;
/* Hash partitioning specific */
PartitionHashBound **hbounds = NULL;
/* List partitioning specific */
PartitionListValue **all_values = NULL;
int null_index = -1;
/* Range partitioning specific */
PartitionRangeBound **rbounds = NULL;
/* Get partition oids from pg_inherits */
inhoids = find_inheritance_children(RelationGetRelid(rel), NoLock);
/* Collect bound spec nodes in a list */
i = 0;
partoids = NIL;
foreach(cell, inhoids)
{
Oid inhrelid = lfirst_oid(cell);
HeapTuple tuple;
Datum datum;
bool isnull;
Node *boundspec;
tuple = SearchSysCache1(RELOID, inhrelid);
if (!HeapTupleIsValid(tuple))
elog(ERROR, "cache lookup failed for relation %u", inhrelid);
/*
* It is possible that the pg_class tuple of a partition has not been
* updated yet to set its relpartbound field. The only case where
* this happens is when we open the parent relation to check using its
* partition descriptor that a new partition's bound does not overlap
* some existing partition.
*/
if (!((Form_pg_class) GETSTRUCT(tuple))->relispartition)
{
ReleaseSysCache(tuple);
continue;
}
datum = SysCacheGetAttr(RELOID, tuple,
Anum_pg_class_relpartbound,
&isnull);
Assert(!isnull);
boundspec = (Node *) stringToNode(TextDatumGetCString(datum));
/*
* Sanity check: If the PartitionBoundSpec says this is the default
* partition, its OID should correspond to whatever's stored in
* pg_partitioned_table.partdefid; if not, the catalog is corrupt.
*/
if (castNode(PartitionBoundSpec, boundspec)->is_default)
{
Oid partdefid;
partdefid = get_default_partition_oid(RelationGetRelid(rel));
if (partdefid != inhrelid)
elog(ERROR, "expected partdefid %u, but got %u",
inhrelid, partdefid);
}
boundspecs = lappend(boundspecs, boundspec);
partoids = lappend_oid(partoids, inhrelid);
ReleaseSysCache(tuple);
}
nparts = list_length(partoids);
if (nparts > 0)
{
oids = (Oid *) palloc(nparts * sizeof(Oid));
i = 0;
foreach(cell, partoids)
oids[i++] = lfirst_oid(cell);
/* Convert from node to the internal representation */
if (key->strategy == PARTITION_STRATEGY_HASH)
{
ndatums = nparts;
hbounds = (PartitionHashBound **)
palloc(nparts * sizeof(PartitionHashBound *));
i = 0;
foreach(cell, boundspecs)
{
PartitionBoundSpec *spec = castNode(PartitionBoundSpec,
lfirst(cell));
if (spec->strategy != PARTITION_STRATEGY_HASH)
elog(ERROR, "invalid strategy in partition bound spec");
hbounds[i] = (PartitionHashBound *)
palloc(sizeof(PartitionHashBound));
hbounds[i]->modulus = spec->modulus;
hbounds[i]->remainder = spec->remainder;
hbounds[i]->index = i;
i++;
}
/* Sort all the bounds in ascending order */
qsort(hbounds, nparts, sizeof(PartitionHashBound *),
qsort_partition_hbound_cmp);
}
else if (key->strategy == PARTITION_STRATEGY_LIST)
{
List *non_null_values = NIL;
/*
* Create a unified list of non-null values across all partitions.
*/
i = 0;
null_index = -1;
foreach(cell, boundspecs)
{
PartitionBoundSpec *spec = castNode(PartitionBoundSpec,
lfirst(cell));
ListCell *c;
if (spec->strategy != PARTITION_STRATEGY_LIST)
elog(ERROR, "invalid strategy in partition bound spec");
/*
* Note the index of the partition bound spec for the default
* partition. There's no datum to add to the list of non-null
* datums for this partition.
*/
if (spec->is_default)
{
default_index = i;
i++;
continue;
}
foreach(c, spec->listdatums)
{
Const *val = castNode(Const, lfirst(c));
PartitionListValue *list_value = NULL;
if (!val->constisnull)
{
list_value = (PartitionListValue *)
palloc0(sizeof(PartitionListValue));
list_value->index = i;
list_value->value = val->constvalue;
}
else
{
/*
* Never put a null into the values array, flag
* instead for the code further down below where we
* construct the actual relcache struct.
*/
if (null_index != -1)
elog(ERROR, "found null more than once");
null_index = i;
}
if (list_value)
non_null_values = lappend(non_null_values,
list_value);
}
i++;
}
ndatums = list_length(non_null_values);
/*
* Collect all list values in one array. Alongside the value, we
* also save the index of partition the value comes from.
*/
all_values = (PartitionListValue **) palloc(ndatums *
sizeof(PartitionListValue *));
i = 0;
foreach(cell, non_null_values)
{
PartitionListValue *src = lfirst(cell);
all_values[i] = (PartitionListValue *)
palloc(sizeof(PartitionListValue));
all_values[i]->value = src->value;
all_values[i]->index = src->index;
i++;
}
qsort_arg(all_values, ndatums, sizeof(PartitionListValue *),
qsort_partition_list_value_cmp, (void *) key);
}
else if (key->strategy == PARTITION_STRATEGY_RANGE)
{
int k;
PartitionRangeBound **all_bounds,
*prev;
all_bounds = (PartitionRangeBound **) palloc0(2 * nparts *
sizeof(PartitionRangeBound *));
/*
* Create a unified list of range bounds across all the
* partitions.
*/
i = ndatums = 0;
foreach(cell, boundspecs)
{
PartitionBoundSpec *spec = castNode(PartitionBoundSpec,
lfirst(cell));
PartitionRangeBound *lower,
*upper;
if (spec->strategy != PARTITION_STRATEGY_RANGE)
elog(ERROR, "invalid strategy in partition bound spec");
/*
* Note the index of the partition bound spec for the default
* partition. There's no datum to add to the allbounds array
* for this partition.
*/
if (spec->is_default)
{
default_index = i++;
continue;
}
lower = make_one_range_bound(key, i, spec->lowerdatums,
true);
upper = make_one_range_bound(key, i, spec->upperdatums,
false);
all_bounds[ndatums++] = lower;
all_bounds[ndatums++] = upper;
i++;
}
Assert(ndatums == nparts * 2 ||
(default_index != -1 && ndatums == (nparts - 1) * 2));
/* Sort all the bounds in ascending order */
qsort_arg(all_bounds, ndatums,
sizeof(PartitionRangeBound *),
qsort_partition_rbound_cmp,
(void *) key);
/* Save distinct bounds from all_bounds into rbounds. */
rbounds = (PartitionRangeBound **)
palloc(ndatums * sizeof(PartitionRangeBound *));
k = 0;
prev = NULL;
for (i = 0; i < ndatums; i++)
{
PartitionRangeBound *cur = all_bounds[i];
bool is_distinct = false;
int j;
/* Is the current bound distinct from the previous one? */
for (j = 0; j < key->partnatts; j++)
{
Datum cmpval;
if (prev == NULL || cur->kind[j] != prev->kind[j])
{
is_distinct = true;
break;
}
/*
* If the bounds are both MINVALUE or MAXVALUE, stop now
* and treat them as equal, since any values after this
* point must be ignored.
*/
if (cur->kind[j] != PARTITION_RANGE_DATUM_VALUE)
break;
cmpval = FunctionCall2Coll(&key->partsupfunc[j],
key->partcollation[j],
cur->datums[j],
prev->datums[j]);
if (DatumGetInt32(cmpval) != 0)
{
is_distinct = true;
break;
}
}
/*
* Only if the bound is distinct save it into a temporary
* array i.e. rbounds which is later copied into boundinfo
* datums array.
*/
if (is_distinct)
rbounds[k++] = all_bounds[i];
prev = cur;
}
/* Update ndatums to hold the count of distinct datums. */
ndatums = k;
}
else
elog(ERROR, "unexpected partition strategy: %d",
(int) key->strategy);
}
/* Now build the actual relcache partition descriptor */
rel->rd_pdcxt = AllocSetContextCreate(CacheMemoryContext,
"partition descriptor",
ALLOCSET_DEFAULT_SIZES);
MemoryContextCopyAndSetIdentifier(rel->rd_pdcxt, RelationGetRelationName(rel));
oldcxt = MemoryContextSwitchTo(rel->rd_pdcxt);
result = (PartitionDescData *) palloc0(sizeof(PartitionDescData));
result->nparts = nparts;
if (nparts > 0)
{
PartitionBoundInfo boundinfo;
int *mapping;
int next_index = 0;
result->oids = (Oid *) palloc0(nparts * sizeof(Oid));
boundinfo = (PartitionBoundInfoData *)
palloc0(sizeof(PartitionBoundInfoData));
boundinfo->strategy = key->strategy;
boundinfo->default_index = -1;
boundinfo->ndatums = ndatums;
boundinfo->null_index = -1;
boundinfo->datums = (Datum **) palloc0(ndatums * sizeof(Datum *));
/* Initialize mapping array with invalid values */
mapping = (int *) palloc(sizeof(int) * nparts);
for (i = 0; i < nparts; i++)
mapping[i] = -1;
switch (key->strategy)
{
case PARTITION_STRATEGY_HASH:
{
/* Modulus are stored in ascending order */
int greatest_modulus = hbounds[ndatums - 1]->modulus;
boundinfo->indexes = (int *) palloc(greatest_modulus *
sizeof(int));
for (i = 0; i < greatest_modulus; i++)
boundinfo->indexes[i] = -1;
for (i = 0; i < nparts; i++)
{
int modulus = hbounds[i]->modulus;
int remainder = hbounds[i]->remainder;
boundinfo->datums[i] = (Datum *) palloc(2 *
sizeof(Datum));
boundinfo->datums[i][0] = Int32GetDatum(modulus);
boundinfo->datums[i][1] = Int32GetDatum(remainder);
while (remainder < greatest_modulus)
{
/* overlap? */
Assert(boundinfo->indexes[remainder] == -1);
boundinfo->indexes[remainder] = i;
remainder += modulus;
}
mapping[hbounds[i]->index] = i;
pfree(hbounds[i]);
}
pfree(hbounds);
break;
}
case PARTITION_STRATEGY_LIST:
{
boundinfo->indexes = (int *) palloc(ndatums * sizeof(int));
/*
* Copy values. Indexes of individual values are mapped
* to canonical values so that they match for any two list
* partitioned tables with same number of partitions and
* same lists per partition. One way to canonicalize is
* to assign the index in all_values[] of the smallest
* value of each partition, as the index of all of the
* partition's values.
*/
for (i = 0; i < ndatums; i++)
{
boundinfo->datums[i] = (Datum *) palloc(sizeof(Datum));
boundinfo->datums[i][0] = datumCopy(all_values[i]->value,
key->parttypbyval[0],
key->parttyplen[0]);
/* If the old index has no mapping, assign one */
if (mapping[all_values[i]->index] == -1)
mapping[all_values[i]->index] = next_index++;
boundinfo->indexes[i] = mapping[all_values[i]->index];
}
/*
* If null-accepting partition has no mapped index yet,
* assign one. This could happen if such partition
* accepts only null and hence not covered in the above
* loop which only handled non-null values.
*/
if (null_index != -1)
{
Assert(null_index >= 0);
if (mapping[null_index] == -1)
mapping[null_index] = next_index++;
boundinfo->null_index = mapping[null_index];
}
/* Assign mapped index for the default partition. */
if (default_index != -1)
{
/*
* The default partition accepts any value not
* specified in the lists of other partitions, hence
* it should not get mapped index while assigning
* those for non-null datums.
*/
Assert(default_index >= 0 &&
mapping[default_index] == -1);
mapping[default_index] = next_index++;
boundinfo->default_index = mapping[default_index];
}
/* All partition must now have a valid mapping */
Assert(next_index == nparts);
break;
}
case PARTITION_STRATEGY_RANGE:
{
boundinfo->kind = (PartitionRangeDatumKind **)
palloc(ndatums *
sizeof(PartitionRangeDatumKind *));
boundinfo->indexes = (int *) palloc((ndatums + 1) *
sizeof(int));
for (i = 0; i < ndatums; i++)
{
int j;
boundinfo->datums[i] = (Datum *) palloc(key->partnatts *
sizeof(Datum));
boundinfo->kind[i] = (PartitionRangeDatumKind *)
palloc(key->partnatts *
sizeof(PartitionRangeDatumKind));
for (j = 0; j < key->partnatts; j++)
{
if (rbounds[i]->kind[j] == PARTITION_RANGE_DATUM_VALUE)
boundinfo->datums[i][j] =
datumCopy(rbounds[i]->datums[j],
key->parttypbyval[j],
key->parttyplen[j]);
boundinfo->kind[i][j] = rbounds[i]->kind[j];
}
/*
* There is no mapping for invalid indexes.
*
* Any lower bounds in the rbounds array have invalid
* indexes assigned, because the values between the
* previous bound (if there is one) and this (lower)
* bound are not part of the range of any existing
* partition.
*/
if (rbounds[i]->lower)
boundinfo->indexes[i] = -1;
else
{
int orig_index = rbounds[i]->index;
/* If the old index has no mapping, assign one */
if (mapping[orig_index] == -1)
mapping[orig_index] = next_index++;
boundinfo->indexes[i] = mapping[orig_index];
}
}
/* Assign mapped index for the default partition. */
if (default_index != -1)
{
Assert(default_index >= 0 && mapping[default_index] == -1);
mapping[default_index] = next_index++;
boundinfo->default_index = mapping[default_index];
}
boundinfo->indexes[i] = -1;
break;
}
default:
elog(ERROR, "unexpected partition strategy: %d",
(int) key->strategy);
}
result->boundinfo = boundinfo;
/*
* Now assign OIDs from the original array into mapped indexes of the
* result array. Order of OIDs in the former is defined by the
* catalog scan that retrieved them, whereas that in the latter is
* defined by canonicalized representation of the partition bounds.
*/
for (i = 0; i < nparts; i++)
result->oids[mapping[i]] = oids[i];
pfree(mapping);
}
MemoryContextSwitchTo(oldcxt);
rel->rd_partdesc = result;
}
/*
* RelationGetPartitionQual
*
* Returns a list of partition quals
*/
List *
RelationGetPartitionQual(Relation rel)
{
/* Quick exit */
if (!rel->rd_rel->relispartition)
return NIL;
return generate_partition_qual(rel);
}
/*
* get_partition_qual_relid
*
* Returns an expression tree describing the passed-in relation's partition
* constraint. If there is no partition constraint returns NULL; this can
* happen if the default partition is the only partition.
*/
Expr *
get_partition_qual_relid(Oid relid)
{
Relation rel = heap_open(relid, AccessShareLock);
Expr *result = NULL;
List *and_args;
/* Do the work only if this relation is a partition. */
if (rel->rd_rel->relispartition)
{
and_args = generate_partition_qual(rel);
if (and_args == NIL)
result = NULL;
else if (list_length(and_args) > 1)
result = makeBoolExpr(AND_EXPR, and_args, -1);
else
result = linitial(and_args);
}
/* Keep the lock. */
heap_close(rel, NoLock);
return result;
}
/*
* generate_partition_qual
*
* Generate partition predicate from rel's partition bound expression. The
* function returns a NIL list if there is no predicate.
*
* Result expression tree is stored CacheMemoryContext to ensure it survives
* as long as the relcache entry. But we should be running in a less long-lived
* working context. To avoid leaking cache memory if this routine fails partway
* through, we build in working memory and then copy the completed structure
* into cache memory.
*/
static List *
generate_partition_qual(Relation rel)
{
HeapTuple tuple;
MemoryContext oldcxt;
Datum boundDatum;
bool isnull;
PartitionBoundSpec *bound;
List *my_qual = NIL,
*result = NIL;
Relation parent;
bool found_whole_row;
/* Guard against stack overflow due to overly deep partition tree */
check_stack_depth();
/* Quick copy */
if (rel->rd_partcheck != NIL)
return copyObject(rel->rd_partcheck);
/* Grab at least an AccessShareLock on the parent table */
parent = heap_open(get_partition_parent(RelationGetRelid(rel)),
AccessShareLock);
/* Get pg_class.relpartbound */
tuple = SearchSysCache1(RELOID, RelationGetRelid(rel));
if (!HeapTupleIsValid(tuple))
elog(ERROR, "cache lookup failed for relation %u",
RelationGetRelid(rel));
boundDatum = SysCacheGetAttr(RELOID, tuple,
Anum_pg_class_relpartbound,
&isnull);
if (isnull) /* should not happen */
elog(ERROR, "relation \"%s\" has relpartbound = null",
RelationGetRelationName(rel));
bound = castNode(PartitionBoundSpec,
stringToNode(TextDatumGetCString(boundDatum)));
ReleaseSysCache(tuple);
my_qual = get_qual_from_partbound(rel, parent, bound);
/* Add the parent's quals to the list (if any) */
if (parent->rd_rel->relispartition)
result = list_concat(generate_partition_qual(parent), my_qual);
else
result = my_qual;
/*
* Change Vars to have partition's attnos instead of the parent's. We do
* this after we concatenate the parent's quals, because we want every Var
* in it to bear this relation's attnos. It's safe to assume varno = 1
* here.
*/
result = map_partition_varattnos(result, 1, rel, parent,
&found_whole_row);
/* There can never be a whole-row reference here */
if (found_whole_row)
elog(ERROR, "unexpected whole-row reference found in partition key");
/* Save a copy in the relcache */
oldcxt = MemoryContextSwitchTo(CacheMemoryContext);
rel->rd_partcheck = copyObject(result);
MemoryContextSwitchTo(oldcxt);
/* Keep the parent locked until commit */
heap_close(parent, NoLock);
return result;
}
/*
* qsort_partition_hbound_cmp
*
* We sort hash bounds by modulus, then by remainder.
*/
static int32
qsort_partition_hbound_cmp(const void *a, const void *b)
{
PartitionHashBound *h1 = (*(PartitionHashBound *const *) a);
PartitionHashBound *h2 = (*(PartitionHashBound *const *) b);
return partition_hbound_cmp(h1->modulus, h1->remainder,
h2->modulus, h2->remainder);
}
/*
* qsort_partition_list_value_cmp
*
* Compare two list partition bound datums
*/
static int32
qsort_partition_list_value_cmp(const void *a, const void *b, void *arg)
{
Datum val1 = (*(const PartitionListValue **) a)->value,
val2 = (*(const PartitionListValue **) b)->value;
PartitionKey key = (PartitionKey) arg;
return DatumGetInt32(FunctionCall2Coll(&key->partsupfunc[0],
key->partcollation[0],
val1, val2));
}
/* Used when sorting range bounds across all range partitions */
static int32
qsort_partition_rbound_cmp(const void *a, const void *b, void *arg)
{
PartitionRangeBound *b1 = (*(PartitionRangeBound *const *) a);
PartitionRangeBound *b2 = (*(PartitionRangeBound *const *) b);
PartitionKey key = (PartitionKey) arg;
return partition_rbound_cmp(key->partnatts, key->partsupfunc,
key->partcollation, b1->datums, b1->kind,
b1->lower, b2);
}
......@@ -73,6 +73,7 @@
#include "optimizer/cost.h"
#include "optimizer/prep.h"
#include "optimizer/var.h"
#include "partitioning/partbounds.h"
#include "pgstat.h"
#include "rewrite/rewriteDefine.h"
#include "rewrite/rowsecurity.h"
......@@ -85,6 +86,7 @@
#include "utils/inval.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/partcache.h"
#include "utils/relmapper.h"
#include "utils/resowner_private.h"
#include "utils/snapmgr.h"
......@@ -265,7 +267,6 @@ static HeapTuple ScanPgRelation(Oid targetRelId, bool indexOK, bool force_non_hi
static Relation AllocateRelationDesc(Form_pg_class relp);
static void RelationParseRelOptions(Relation relation, HeapTuple tuple);
static void RelationBuildTupleDesc(Relation relation);
static void RelationBuildPartitionKey(Relation relation);
static Relation RelationBuildDesc(Oid targetRelId, bool insertIt);
static void RelationInitPhysicalAddr(Relation relation);
static void load_critical_index(Oid indexoid, Oid heapoid);
......@@ -873,211 +874,6 @@ RelationBuildRuleLock(Relation relation)
relation->rd_rules = rulelock;
}
/*
* RelationBuildPartitionKey
* Build and attach to relcache partition key data of relation
*
* Partitioning key data is a complex structure; to avoid complicated logic to
* free individual elements whenever the relcache entry is flushed, we give it
* its own memory context, child of CacheMemoryContext, which can easily be
* deleted on its own. To avoid leaking memory in that context in case of an
* error partway through this function, the context is initially created as a
* child of CurTransactionContext and only re-parented to CacheMemoryContext
* at the end, when no further errors are possible. Also, we don't make this
* context the current context except in very brief code sections, out of fear
* that some of our callees allocate memory on their own which would be leaked
* permanently.
*/
static void
RelationBuildPartitionKey(Relation relation)
{
Form_pg_partitioned_table form;
HeapTuple tuple;
bool isnull;
int i;
PartitionKey key;
AttrNumber *attrs;
oidvector *opclass;
oidvector *collation;
ListCell *partexprs_item;
Datum datum;
MemoryContext partkeycxt,
oldcxt;
int16 procnum;
tuple = SearchSysCache1(PARTRELID,
ObjectIdGetDatum(RelationGetRelid(relation)));
/*
* The following happens when we have created our pg_class entry but not
* the pg_partitioned_table entry yet.
*/
if (!HeapTupleIsValid(tuple))
return;
partkeycxt = AllocSetContextCreate(CurTransactionContext,
"partition key",
ALLOCSET_SMALL_SIZES);
MemoryContextCopyAndSetIdentifier(partkeycxt,
RelationGetRelationName(relation));
key = (PartitionKey) MemoryContextAllocZero(partkeycxt,
sizeof(PartitionKeyData));
/* Fixed-length attributes */
form = (Form_pg_partitioned_table) GETSTRUCT(tuple);
key->strategy = form->partstrat;
key->partnatts = form->partnatts;
/*
* We can rely on the first variable-length attribute being mapped to the
* relevant field of the catalog's C struct, because all previous
* attributes are non-nullable and fixed-length.
*/
attrs = form->partattrs.values;
/* But use the hard way to retrieve further variable-length attributes */
/* Operator class */
datum = SysCacheGetAttr(PARTRELID, tuple,
Anum_pg_partitioned_table_partclass, &isnull);
Assert(!isnull);
opclass = (oidvector *) DatumGetPointer(datum);
/* Collation */
datum = SysCacheGetAttr(PARTRELID, tuple,
Anum_pg_partitioned_table_partcollation, &isnull);
Assert(!isnull);
collation = (oidvector *) DatumGetPointer(datum);
/* Expressions */
datum = SysCacheGetAttr(PARTRELID, tuple,
Anum_pg_partitioned_table_partexprs, &isnull);
if (!isnull)
{
char *exprString;
Node *expr;
exprString = TextDatumGetCString(datum);
expr = stringToNode(exprString);
pfree(exprString);
/*
* Run the expressions through const-simplification since the planner
* will be comparing them to similarly-processed qual clause operands,
* and may fail to detect valid matches without this step; fix
* opfuncids while at it. We don't need to bother with
* canonicalize_qual() though, because partition expressions should be
* in canonical form already (ie, no need for OR-merging or constant
* elimination).
*/
expr = eval_const_expressions(NULL, expr);
fix_opfuncids(expr);
oldcxt = MemoryContextSwitchTo(partkeycxt);
key->partexprs = (List *) copyObject(expr);
MemoryContextSwitchTo(oldcxt);
}
oldcxt = MemoryContextSwitchTo(partkeycxt);
key->partattrs = (AttrNumber *) palloc0(key->partnatts * sizeof(AttrNumber));
key->partopfamily = (Oid *) palloc0(key->partnatts * sizeof(Oid));
key->partopcintype = (Oid *) palloc0(key->partnatts * sizeof(Oid));
key->partsupfunc = (FmgrInfo *) palloc0(key->partnatts * sizeof(FmgrInfo));
key->partcollation = (Oid *) palloc0(key->partnatts * sizeof(Oid));
/* Gather type and collation info as well */
key->parttypid = (Oid *) palloc0(key->partnatts * sizeof(Oid));
key->parttypmod = (int32 *) palloc0(key->partnatts * sizeof(int32));
key->parttyplen = (int16 *) palloc0(key->partnatts * sizeof(int16));
key->parttypbyval = (bool *) palloc0(key->partnatts * sizeof(bool));
key->parttypalign = (char *) palloc0(key->partnatts * sizeof(char));
key->parttypcoll = (Oid *) palloc0(key->partnatts * sizeof(Oid));
MemoryContextSwitchTo(oldcxt);
/* determine support function number to search for */
procnum = (key->strategy == PARTITION_STRATEGY_HASH) ?
HASHEXTENDED_PROC : BTORDER_PROC;
/* Copy partattrs and fill other per-attribute info */
memcpy(key->partattrs, attrs, key->partnatts * sizeof(int16));
partexprs_item = list_head(key->partexprs);
for (i = 0; i < key->partnatts; i++)
{
AttrNumber attno = key->partattrs[i];
HeapTuple opclasstup;
Form_pg_opclass opclassform;
Oid funcid;
/* Collect opfamily information */
opclasstup = SearchSysCache1(CLAOID,
ObjectIdGetDatum(opclass->values[i]));
if (!HeapTupleIsValid(opclasstup))
elog(ERROR, "cache lookup failed for opclass %u", opclass->values[i]);
opclassform = (Form_pg_opclass) GETSTRUCT(opclasstup);
key->partopfamily[i] = opclassform->opcfamily;
key->partopcintype[i] = opclassform->opcintype;
/* Get a support function for the specified opfamily and datatypes */
funcid = get_opfamily_proc(opclassform->opcfamily,
opclassform->opcintype,
opclassform->opcintype,
procnum);
if (!OidIsValid(funcid))
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("operator class \"%s\" of access method %s is missing support function %d for type %s",
NameStr(opclassform->opcname),
(key->strategy == PARTITION_STRATEGY_HASH) ?
"hash" : "btree",
procnum,
format_type_be(opclassform->opcintype))));
fmgr_info_cxt(funcid, &key->partsupfunc[i], partkeycxt);
/* Collation */
key->partcollation[i] = collation->values[i];
/* Collect type information */
if (attno != 0)
{
Form_pg_attribute att = TupleDescAttr(relation->rd_att, attno - 1);
key->parttypid[i] = att->atttypid;
key->parttypmod[i] = att->atttypmod;
key->parttypcoll[i] = att->attcollation;
}
else
{
if (partexprs_item == NULL)
elog(ERROR, "wrong number of partition key expressions");
key->parttypid[i] = exprType(lfirst(partexprs_item));
key->parttypmod[i] = exprTypmod(lfirst(partexprs_item));
key->parttypcoll[i] = exprCollation(lfirst(partexprs_item));
partexprs_item = lnext(partexprs_item);
}
get_typlenbyvalalign(key->parttypid[i],
&key->parttyplen[i],
&key->parttypbyval[i],
&key->parttypalign[i]);
ReleaseSysCache(opclasstup);
}
ReleaseSysCache(tuple);
/*
* Success --- reparent our context and make the relcache point to the
* newly constructed key
*/
MemoryContextSetParent(partkeycxt, CacheMemoryContext);
relation->rd_partkeycxt = partkeycxt;
relation->rd_partkey = key;
}
/*
* equalRuleLocks
*
......
......@@ -15,13 +15,14 @@
#include "postgres.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "access/htup_details.h"
#include "access/xlog_internal.h"
#include "access/xlog.h"
#include "catalog/pg_control.h"
#include "catalog/pg_type.h"
#include "common/controldata_utils.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "utils/builtins.h"
#include "utils/pg_lsn.h"
#include "utils/timestamp.h"
......
......@@ -14,22 +14,12 @@
#define PARTITION_H
#include "fmgr.h"
#include "executor/tuptable.h"
#include "nodes/execnodes.h"
#include "parser/parse_node.h"
#include "utils/rel.h"
#include "partitioning/partdefs.h"
#include "utils/relcache.h"
/* Seed for the extended hash function */
#define HASH_PARTITION_SEED UINT64CONST(0x7A5B22367996DCFD)
/*
* PartitionBoundInfo encapsulates a set of partition bounds. It is usually
* associated with partitioned tables as part of its partition descriptor.
*
* The internal structure appears in partbounds.h.
*/
typedef struct PartitionBoundInfoData *PartitionBoundInfo;
/*
* Information about partitions of a partitioned table.
*/
......@@ -40,37 +30,17 @@ typedef struct PartitionDescData
PartitionBoundInfo boundinfo; /* collection of partition bounds */
} PartitionDescData;
typedef struct PartitionDescData *PartitionDesc;
extern void RelationBuildPartitionDesc(Relation relation);
extern bool partition_bounds_equal(int partnatts, int16 *parttyplen,
bool *parttypbyval, PartitionBoundInfo b1,
PartitionBoundInfo b2);
extern PartitionBoundInfo partition_bounds_copy(PartitionBoundInfo src,
PartitionKey key);
extern void check_new_partition_bound(char *relname, Relation parent,
PartitionBoundSpec *spec);
extern Oid get_partition_parent(Oid relid);
extern List *get_partition_ancestors(Oid relid);
extern List *get_qual_from_partbound(Relation rel, Relation parent,
PartitionBoundSpec *spec);
extern List *map_partition_varattnos(List *expr, int fromrel_varno,
Relation to_rel, Relation from_rel,
bool *found_whole_row);
extern List *RelationGetPartitionQual(Relation rel);
extern Expr *get_partition_qual_relid(Oid relid);
extern bool has_partition_attrs(Relation rel, Bitmapset *attnums,
bool *used_in_expr);
extern Oid get_default_oid_from_partdesc(PartitionDesc partdesc);
extern Oid get_default_partition_oid(Oid parentId);
extern void update_default_partition_oid(Oid parentId, Oid defaultPartId);
extern void check_default_allows_bound(Relation parent, Relation defaultRel,
PartitionBoundSpec *new_spec);
extern List *get_proposed_default_constraint(List *new_part_constaints);
extern int get_partition_for_tuple(Relation relation, Datum *values,
bool *isnull);
#endif /* PARTITION_H */
......@@ -18,7 +18,6 @@
#include "catalog/dependency.h"
#include "catalog/objectaddress.h"
#include "nodes/parsenodes.h"
#include "catalog/partition.h"
#include "storage/lock.h"
#include "utils/relcache.h"
......
......@@ -13,7 +13,6 @@
#ifndef EXECPARTITION_H
#define EXECPARTITION_H
#include "catalog/partition.h"
#include "nodes/execnodes.h"
#include "nodes/parsenodes.h"
#include "nodes/plannodes.h"
......
......@@ -14,7 +14,6 @@
#ifndef EXECUTOR_H
#define EXECUTOR_H
#include "catalog/partition.h"
#include "executor/execdesc.h"
#include "nodes/parsenodes.h"
#include "utils/memutils.h"
......
......@@ -26,6 +26,8 @@
#include "nodes/lockoptions.h"
#include "nodes/primnodes.h"
#include "nodes/value.h"
#include "partitioning/partdefs.h"
typedef enum OverridingKind
{
......@@ -803,7 +805,7 @@ typedef struct PartitionSpec
* This represents the portion of the partition key space assigned to a
* particular partition. These are stored on disk in pg_class.relpartbound.
*/
typedef struct PartitionBoundSpec
struct PartitionBoundSpec
{
NodeTag type;
......@@ -822,7 +824,7 @@ typedef struct PartitionBoundSpec
List *upperdatums; /* List of PartitionRangeDatums */
int location; /* token location, or -1 if unknown */
} PartitionBoundSpec;
};
/*
* PartitionRangeDatum - one of the values in a range partition bound
......
......@@ -11,7 +11,11 @@
#ifndef PARTBOUNDS_H
#define PARTBOUNDS_H
#include "catalog/partition.h"
#include "fmgr.h"
#include "nodes/parsenodes.h"
#include "nodes/pg_list.h"
#include "partitioning/partdefs.h"
#include "utils/relcache.h"
/*
......@@ -101,7 +105,34 @@ typedef struct PartitionRangeBound
} PartitionRangeBound;
extern int get_hash_partition_greatest_modulus(PartitionBoundInfo b);
extern int partition_list_bsearch(FmgrInfo *partsupfunc, Oid *partcollation,
extern uint64 compute_hash_value(int partnatts, FmgrInfo *partsupfunc,
Datum *values, bool *isnull);
extern List *get_qual_from_partbound(Relation rel, Relation parent,
PartitionBoundSpec *spec);
extern bool partition_bounds_equal(int partnatts, int16 *parttyplen,
bool *parttypbyval, PartitionBoundInfo b1,
PartitionBoundInfo b2);
extern PartitionBoundInfo partition_bounds_copy(PartitionBoundInfo src,
PartitionKey key);
extern void check_new_partition_bound(char *relname, Relation parent,
PartitionBoundSpec *spec);
extern void check_default_allows_bound(Relation parent, Relation defaultRel,
PartitionBoundSpec *new_spec);
extern PartitionRangeBound *make_one_range_bound(PartitionKey key, int index,
List *datums, bool lower);
extern int32 partition_hbound_cmp(int modulus1, int remainder1, int modulus2,
int remainder2);
extern int32 partition_rbound_cmp(int partnatts, FmgrInfo *partsupfunc,
Oid *partcollation, Datum *datums1,
PartitionRangeDatumKind *kind1, bool lower1,
PartitionRangeBound *b2);
extern int32 partition_rbound_datum_cmp(FmgrInfo *partsupfunc,
Oid *partcollation,
Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
Datum *tuple_datums, int n_tuple_datums);
extern int partition_list_bsearch(FmgrInfo *partsupfunc,
Oid *partcollation,
PartitionBoundInfo boundinfo,
Datum value, bool *is_equal);
extern int partition_range_bsearch(int partnatts, FmgrInfo *partsupfunc,
......@@ -114,11 +145,5 @@ extern int partition_range_datum_bsearch(FmgrInfo *partsupfunc,
int nvalues, Datum *values, bool *is_equal);
extern int partition_hash_bsearch(PartitionBoundInfo boundinfo,
int modulus, int remainder);
extern uint64 compute_hash_value(int partnatts, FmgrInfo *partsupfunc,
Datum *values, bool *isnull);
extern int32 partition_rbound_datum_cmp(FmgrInfo *partsupfunc,
Oid *partcollation,
Datum *rb_datums, PartitionRangeDatumKind *rb_kind,
Datum *tuple_datums, int n_tuple_datums);
#endif /* PARTBOUNDS_H */
/*-------------------------------------------------------------------------
*
* partdefs.h
* Base definitions for partitioned table handling
*
* Copyright (c) 2007-2018, PostgreSQL Global Development Group
*
* src/include/partitioning/partdefs.h
*
*-------------------------------------------------------------------------
*/
#ifndef PARTDEFS_H
#define PARTDEFS_H
typedef struct PartitionBoundInfoData *PartitionBoundInfo;
typedef struct PartitionKeyData *PartitionKey;
typedef struct PartitionBoundSpec PartitionBoundSpec;
typedef struct PartitionDescData *PartitionDesc;
#endif /* PARTDEFS_H */
......@@ -14,9 +14,10 @@
#ifndef PARTPRUNE_H
#define PARTPRUNE_H
#include "catalog/partition.h"
#include "nodes/execnodes.h"
#include "nodes/relation.h"
/*
* PartitionPruneContext
*
......
/*-------------------------------------------------------------------------
*
* partcache.h
*
* Copyright (c) 1996-2018, PostgreSQL Global Development Group
*
* src/include/utils/partcache.h
*
*-------------------------------------------------------------------------
*/
#ifndef PARTCACHE_H
#define PARTCACHE_H
#include "access/attnum.h"
#include "fmgr.h"
#include "nodes/pg_list.h"
#include "nodes/primnodes.h"
#include "partitioning/partdefs.h"
#include "utils/relcache.h"
/*
* Information about the partition key of a relation
*/
typedef struct PartitionKeyData
{
char strategy; /* partitioning strategy */
int16 partnatts; /* number of columns in the partition key */
AttrNumber *partattrs; /* attribute numbers of columns in the
* partition key */
List *partexprs; /* list of expressions in the partitioning
* key, or NIL */
Oid *partopfamily; /* OIDs of operator families */
Oid *partopcintype; /* OIDs of opclass declared input data types */
FmgrInfo *partsupfunc; /* lookup info for support funcs */
/* Partitioning collation per attribute */
Oid *partcollation;
/* Type information per attribute */
Oid *parttypid;
int32 *parttypmod;
int16 *parttyplen;
bool *parttypbyval;
char *parttypalign;
Oid *parttypcoll;
} PartitionKeyData;
extern void RelationBuildPartitionKey(Relation relation);
extern void RelationBuildPartitionDesc(Relation rel);
extern List *RelationGetPartitionQual(Relation rel);
extern Expr *get_partition_qual_relid(Oid relid);
/*
* PartitionKey inquiry functions
*/
static inline int
get_partition_strategy(PartitionKey key)
{
return key->strategy;
}
static inline int
get_partition_natts(PartitionKey key)
{
return key->partnatts;
}
static inline List *
get_partition_exprs(PartitionKey key)
{
return key->partexprs;
}
/*
* PartitionKey inquiry functions - one column
*/
static inline int16
get_partition_col_attnum(PartitionKey key, int col)
{
return key->partattrs[col];
}
static inline Oid
get_partition_col_typid(PartitionKey key, int col)
{
return key->parttypid[col];
}
static inline int32
get_partition_col_typmod(PartitionKey key, int col)
{
return key->parttypmod[col];
}
#endif /* PARTCACHE_H */
......@@ -46,36 +46,6 @@ typedef struct LockInfoData
typedef LockInfoData *LockInfo;
/*
* Information about the partition key of a relation
*/
typedef struct PartitionKeyData
{
char strategy; /* partitioning strategy */
int16 partnatts; /* number of columns in the partition key */
AttrNumber *partattrs; /* attribute numbers of columns in the
* partition key */
List *partexprs; /* list of expressions in the partitioning
* key, or NIL */
Oid *partopfamily; /* OIDs of operator families */
Oid *partopcintype; /* OIDs of opclass declared input data types */
FmgrInfo *partsupfunc; /* lookup info for support funcs */
/* Partitioning collation per attribute */
Oid *partcollation;
/* Type information per attribute */
Oid *parttypid;
int32 *parttypmod;
int16 *parttyplen;
bool *parttypbyval;
char *parttypalign;
Oid *parttypcoll;
} PartitionKeyData;
typedef struct PartitionKeyData *PartitionKey;
/*
* Here are the contents of a relation cache entry.
*/
......@@ -618,48 +588,6 @@ typedef struct ViewOptions
*/
#define RelationGetPartitionKey(relation) ((relation)->rd_partkey)
/*
* PartitionKey inquiry functions
*/
static inline int
get_partition_strategy(PartitionKey key)
{
return key->strategy;
}
static inline int
get_partition_natts(PartitionKey key)
{
return key->partnatts;
}
static inline List *
get_partition_exprs(PartitionKey key)
{
return key->partexprs;
}
/*
* PartitionKey inquiry functions - one column
*/
static inline int16
get_partition_col_attnum(PartitionKey key, int col)
{
return key->partattrs[col];
}
static inline Oid
get_partition_col_typid(PartitionKey key, int col)
{
return key->parttypid[col];
}
static inline int32
get_partition_col_typmod(PartitionKey key, int col)
{
return key->parttypmod[col];
}
/*
* RelationGetPartitionDesc
* Returns partition descriptor for a relation.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment