Commit 81c5e46c authored by Robert Haas's avatar Robert Haas

Introduce 64-bit hash functions with a 64-bit seed.

This will be useful for hash partitioning, which needs a way to seed
the hash functions to avoid problems such as a hash index on a hash
partitioned table clumping all values into a small portion of the
bucket space; it's also useful for anything that wants a 64-bit hash
value rather than a 32-bit hash value.

Just in case somebody wants a 64-bit hash value that is compatible
with the existing 32-bit hash values, make the low 32-bits of the
64-bit hash value match the 32-bit hash value when the seed is 0.

Robert Haas and Amul Sul

Discussion: http://postgr.es/m/CA+Tgmoafx2yoJuhCQQOL5CocEi-w_uG4S2xT0EtgiJnPGcHW3g@mail.gmail.com
parent 2d44c58c
......@@ -436,7 +436,8 @@
</table>
<para>
Hash indexes require one support function, shown in <xref
Hash indexes require one support function, and allow a second one to be
supplied at the operator class author's option, as shown in <xref
linkend="xindex-hash-support-table">.
</para>
......@@ -451,9 +452,17 @@
</thead>
<tbody>
<row>
<entry>Compute the hash value for a key</entry>
<entry>Compute the 32-bit hash value for a key</entry>
<entry>1</entry>
</row>
<row>
<entry>
Compute the 64-bit hash value for a key given a 64-bit salt; if
the salt is 0, the low 32 bits will match the value that would
have been computed by function 1
</entry>
<entry>2</entry>
</row>
</tbody>
</tgroup>
</table>
......
This diff is collapsed.
......@@ -373,7 +373,7 @@ _hash_init(Relation rel, double num_tuples, ForkNumber forkNum)
if (ffactor < 10)
ffactor = 10;
procid = index_getprocid(rel, 1, HASHPROC);
procid = index_getprocid(rel, 1, HASHSTANDARD_PROC);
/*
* We initialize the metapage, the first N bucket pages, and the first
......
......@@ -85,7 +85,7 @@ _hash_datum2hashkey(Relation rel, Datum key)
Oid collation;
/* XXX assumes index has only one attribute */
procinfo = index_getprocinfo(rel, 1, HASHPROC);
procinfo = index_getprocinfo(rel, 1, HASHSTANDARD_PROC);
collation = rel->rd_indcollation[0];
return DatumGetUInt32(FunctionCall1Coll(procinfo, collation, key));
......@@ -108,10 +108,10 @@ _hash_datum2hashkey_type(Relation rel, Datum key, Oid keytype)
hash_proc = get_opfamily_proc(rel->rd_opfamily[0],
keytype,
keytype,
HASHPROC);
HASHSTANDARD_PROC);
if (!RegProcedureIsValid(hash_proc))
elog(ERROR, "missing support function %d(%u,%u) for index \"%s\"",
HASHPROC, keytype, keytype,
HASHSTANDARD_PROC, keytype, keytype,
RelationGetRelationName(rel));
collation = rel->rd_indcollation[0];
......
......@@ -29,7 +29,7 @@
#include "utils/syscache.h"
static bool check_hash_func_signature(Oid funcid, Oid restype, Oid argtype);
static bool check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype);
/*
......@@ -105,8 +105,9 @@ hashvalidate(Oid opclassoid)
/* Check procedure numbers and function signatures */
switch (procform->amprocnum)
{
case HASHPROC:
if (!check_hash_func_signature(procform->amproc, INT4OID,
case HASHSTANDARD_PROC:
case HASHEXTENDED_PROC:
if (!check_hash_func_signature(procform->amproc, procform->amprocnum,
procform->amproclefttype))
{
ereport(INFO,
......@@ -264,19 +265,37 @@ hashvalidate(Oid opclassoid)
* hacks in the core hash opclass definitions.
*/
static bool
check_hash_func_signature(Oid funcid, Oid restype, Oid argtype)
check_hash_func_signature(Oid funcid, int16 amprocnum, Oid argtype)
{
bool result = true;
Oid restype;
int16 nargs;
HeapTuple tp;
Form_pg_proc procform;
switch (amprocnum)
{
case HASHSTANDARD_PROC:
restype = INT4OID;
nargs = 1;
break;
case HASHEXTENDED_PROC:
restype = INT8OID;
nargs = 2;
break;
default:
elog(ERROR, "invalid amprocnum");
}
tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
if (!HeapTupleIsValid(tp))
elog(ERROR, "cache lookup failed for function %u", funcid);
procform = (Form_pg_proc) GETSTRUCT(tp);
if (procform->prorettype != restype || procform->proretset ||
procform->pronargs != 1)
procform->pronargs != nargs)
result = false;
if (!IsBinaryCoercible(argtype, procform->proargtypes.values[0]))
......@@ -290,24 +309,29 @@ check_hash_func_signature(Oid funcid, Oid restype, Oid argtype)
* identity, not just its input type, because hashvarlena() takes
* INTERNAL and allowing any such function seems too scary.
*/
if (funcid == F_HASHINT4 &&
if ((funcid == F_HASHINT4 || funcid == F_HASHINT4EXTENDED) &&
(argtype == DATEOID ||
argtype == ABSTIMEOID || argtype == RELTIMEOID ||
argtype == XIDOID || argtype == CIDOID))
/* okay, allowed use of hashint4() */ ;
else if (funcid == F_TIMESTAMP_HASH &&
else if ((funcid == F_TIMESTAMP_HASH ||
funcid == F_TIMESTAMP_HASH_EXTENDED) &&
argtype == TIMESTAMPTZOID)
/* okay, allowed use of timestamp_hash() */ ;
else if (funcid == F_HASHCHAR &&
else if ((funcid == F_HASHCHAR || funcid == F_HASHCHAREXTENDED) &&
argtype == BOOLOID)
/* okay, allowed use of hashchar() */ ;
else if (funcid == F_HASHVARLENA &&
else if ((funcid == F_HASHVARLENA || funcid == F_HASHVARLENAEXTENDED) &&
argtype == BYTEAOID)
/* okay, allowed use of hashvarlena() */ ;
else
result = false;
}
/* If function takes a second argument, it must be for a 64-bit salt. */
if (nargs == 2 && procform->proargtypes.values[1] != INT8OID)
result = false;
ReleaseSysCache(tp);
return result;
}
......@@ -18,6 +18,7 @@
#include <limits.h>
#include "access/genam.h"
#include "access/hash.h"
#include "access/heapam.h"
#include "access/nbtree.h"
#include "access/htup_details.h"
......@@ -1129,7 +1130,8 @@ assignProcTypes(OpFamilyMember *member, Oid amoid, Oid typeoid)
/*
* btree comparison procs must be 2-arg procs returning int4, while btree
* sortsupport procs must take internal and return void. hash support
* procs must be 1-arg procs returning int4. Otherwise we don't know.
* proc 1 must be a 1-arg proc returning int4, while proc 2 must be a
* 2-arg proc returning int8. Otherwise we don't know.
*/
if (amoid == BTREE_AM_OID)
{
......@@ -1172,14 +1174,28 @@ assignProcTypes(OpFamilyMember *member, Oid amoid, Oid typeoid)
}
else if (amoid == HASH_AM_OID)
{
if (procform->pronargs != 1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("hash procedures must have one argument")));
if (procform->prorettype != INT4OID)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("hash procedures must return integer")));
if (member->number == HASHSTANDARD_PROC)
{
if (procform->pronargs != 1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("hash procedure 1 must have one argument")));
if (procform->prorettype != INT4OID)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("hash procedure 1 must return integer")));
}
else if (member->number == HASHEXTENDED_PROC)
{
if (procform->pronargs != 2)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("hash procedure 2 must have two arguments")));
if (procform->prorettype != INT8OID)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("hash procedure 2 must return bigint")));
}
/*
* If lefttype/righttype isn't specified, use the proc's input type
......
......@@ -16,6 +16,7 @@
#include <ctype.h>
#include "access/hash.h"
#include "access/htup_details.h"
#include "catalog/catalog.h"
#include "catalog/namespace.h"
......@@ -717,6 +718,20 @@ hash_aclitem(PG_FUNCTION_ARGS)
PG_RETURN_UINT32((uint32) (a->ai_privs + a->ai_grantee + a->ai_grantor));
}
/*
* 64-bit hash function for aclitem.
*
* Similar to hash_aclitem, but accepts a seed and returns a uint64 value.
*/
Datum
hash_aclitem_extended(PG_FUNCTION_ARGS)
{
AclItem *a = PG_GETARG_ACLITEM_P(0);
uint64 seed = PG_GETARG_INT64(1);
uint32 sum = (uint32) (a->ai_privs + a->ai_grantee + a->ai_grantor);
return (seed == 0) ? UInt64GetDatum(sum) : hash_uint32_extended(sum, seed);
}
/*
* acldefault() --- create an ACL describing default access permissions
......
......@@ -20,6 +20,7 @@
#endif
#include <math.h>
#include "access/hash.h"
#include "access/htup_details.h"
#include "catalog/pg_type.h"
#include "funcapi.h"
......@@ -4020,6 +4021,84 @@ hash_array(PG_FUNCTION_ARGS)
PG_RETURN_UINT32(result);
}
/*
* Returns 64-bit value by hashing a value to a 64-bit value, with a seed.
* Otherwise, similar to hash_array.
*/
Datum
hash_array_extended(PG_FUNCTION_ARGS)
{
AnyArrayType *array = PG_GETARG_ANY_ARRAY(0);
uint64 seed = PG_GETARG_INT64(1);
int ndims = AARR_NDIM(array);
int *dims = AARR_DIMS(array);
Oid element_type = AARR_ELEMTYPE(array);
uint64 result = 1;
int nitems;
TypeCacheEntry *typentry;
int typlen;
bool typbyval;
char typalign;
int i;
array_iter iter;
FunctionCallInfoData locfcinfo;
typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
if (typentry == NULL ||
typentry->type_id != element_type)
{
typentry = lookup_type_cache(element_type,
TYPECACHE_HASH_EXTENDED_PROC_FINFO);
if (!OidIsValid(typentry->hash_extended_proc_finfo.fn_oid))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify an extended hash function for type %s",
format_type_be(element_type))));
fcinfo->flinfo->fn_extra = (void *) typentry;
}
typlen = typentry->typlen;
typbyval = typentry->typbyval;
typalign = typentry->typalign;
InitFunctionCallInfoData(locfcinfo, &typentry->hash_extended_proc_finfo, 2,
InvalidOid, NULL, NULL);
/* Loop over source data */
nitems = ArrayGetNItems(ndims, dims);
array_iter_setup(&iter, array);
for (i = 0; i < nitems; i++)
{
Datum elt;
bool isnull;
uint64 elthash;
/* Get element, checking for NULL */
elt = array_iter_next(&iter, &isnull, i, typlen, typbyval, typalign);
if (isnull)
{
elthash = 0;
}
else
{
/* Apply the hash function */
locfcinfo.arg[0] = elt;
locfcinfo.arg[1] = seed;
locfcinfo.argnull[0] = false;
locfcinfo.argnull[1] = false;
locfcinfo.isnull = false;
elthash = DatumGetUInt64(FunctionCallInvoke(&locfcinfo));
}
result = (result << 5) - result + elthash;
}
AARR_FREE_IF_COPY(array, 0);
PG_RETURN_UINT64(result);
}
/*-----------------------------------------------------------------------------
* array overlap/containment comparisons
......
......@@ -1508,6 +1508,12 @@ time_hash(PG_FUNCTION_ARGS)
return hashint8(fcinfo);
}
Datum
time_hash_extended(PG_FUNCTION_ARGS)
{
return hashint8extended(fcinfo);
}
Datum
time_larger(PG_FUNCTION_ARGS)
{
......@@ -2213,6 +2219,21 @@ timetz_hash(PG_FUNCTION_ARGS)
PG_RETURN_UINT32(thash);
}
Datum
timetz_hash_extended(PG_FUNCTION_ARGS)
{
TimeTzADT *key = PG_GETARG_TIMETZADT_P(0);
uint64 seed = PG_GETARG_DATUM(1);
uint64 thash;
/* Same approach as timetz_hash */
thash = DatumGetUInt64(DirectFunctionCall2(hashint8extended,
Int64GetDatumFast(key->time),
seed));
thash ^= DatumGetUInt64(hash_uint32_extended(key->zone, seed));
PG_RETURN_UINT64(thash);
}
Datum
timetz_larger(PG_FUNCTION_ARGS)
{
......
......@@ -291,3 +291,46 @@ jsonb_hash(PG_FUNCTION_ARGS)
PG_FREE_IF_COPY(jb, 0);
PG_RETURN_INT32(hash);
}
Datum
jsonb_hash_extended(PG_FUNCTION_ARGS)
{
Jsonb *jb = PG_GETARG_JSONB(0);
uint64 seed = PG_GETARG_INT64(1);
JsonbIterator *it;
JsonbValue v;
JsonbIteratorToken r;
uint64 hash = 0;
if (JB_ROOT_COUNT(jb) == 0)
PG_RETURN_UINT64(seed);
it = JsonbIteratorInit(&jb->root);
while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
{
switch (r)
{
/* Rotation is left to JsonbHashScalarValueExtended() */
case WJB_BEGIN_ARRAY:
hash ^= ((UINT64CONST(JB_FARRAY) << 32) | UINT64CONST(JB_FARRAY));
break;
case WJB_BEGIN_OBJECT:
hash ^= ((UINT64CONST(JB_FOBJECT) << 32) | UINT64CONST(JB_FOBJECT));
break;
case WJB_KEY:
case WJB_VALUE:
case WJB_ELEM:
JsonbHashScalarValueExtended(&v, &hash, seed);
break;
case WJB_END_ARRAY:
case WJB_END_OBJECT:
break;
default:
elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r);
}
}
PG_FREE_IF_COPY(jb, 0);
PG_RETURN_UINT64(hash);
}
......@@ -1249,6 +1249,49 @@ JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash)
*hash ^= tmp;
}
/*
* Hash a value to a 64-bit value, with a seed. Otherwise, similar to
* JsonbHashScalarValue.
*/
void
JsonbHashScalarValueExtended(const JsonbValue *scalarVal, uint64 *hash,
uint64 seed)
{
uint64 tmp;
switch (scalarVal->type)
{
case jbvNull:
tmp = seed + 0x01;
break;
case jbvString:
tmp = DatumGetUInt64(hash_any_extended((const unsigned char *) scalarVal->val.string.val,
scalarVal->val.string.len,
seed));
break;
case jbvNumeric:
tmp = DatumGetUInt64(DirectFunctionCall2(hash_numeric_extended,
NumericGetDatum(scalarVal->val.numeric),
UInt64GetDatum(seed)));
break;
case jbvBool:
if (seed)
tmp = DatumGetUInt64(DirectFunctionCall2(hashcharextended,
BoolGetDatum(scalarVal->val.boolean),
UInt64GetDatum(seed)));
else
tmp = scalarVal->val.boolean ? 0x02 : 0x04;
break;
default:
elog(ERROR, "invalid jsonb scalar type");
break;
}
*hash = ROTATE_HIGH_AND_LOW_32BITS(*hash);
*hash ^= tmp;
}
/*
* Are two scalar JsonbValues of the same type a and b equal?
*/
......
......@@ -271,6 +271,15 @@ hashmacaddr(PG_FUNCTION_ARGS)
return hash_any((unsigned char *) key, sizeof(macaddr));
}
Datum
hashmacaddrextended(PG_FUNCTION_ARGS)
{
macaddr *key = PG_GETARG_MACADDR_P(0);
return hash_any_extended((unsigned char *) key, sizeof(macaddr),
PG_GETARG_INT64(1));
}
/*
* Arithmetic functions: bitwise NOT, AND, OR.
*/
......
......@@ -407,6 +407,15 @@ hashmacaddr8(PG_FUNCTION_ARGS)
return hash_any((unsigned char *) key, sizeof(macaddr8));
}
Datum
hashmacaddr8extended(PG_FUNCTION_ARGS)
{
macaddr8 *key = PG_GETARG_MACADDR8_P(0);
return hash_any_extended((unsigned char *) key, sizeof(macaddr8),
PG_GETARG_INT64(1));
}
/*
* Arithmetic functions: bitwise NOT, AND, OR.
*/
......
......@@ -486,6 +486,16 @@ hashinet(PG_FUNCTION_ARGS)
return hash_any((unsigned char *) VARDATA_ANY(addr), addrsize + 2);
}
Datum
hashinetextended(PG_FUNCTION_ARGS)
{
inet *addr = PG_GETARG_INET_PP(0);
int addrsize = ip_addrsize(addr);
return hash_any_extended((unsigned char *) VARDATA_ANY(addr), addrsize + 2,
PG_GETARG_INT64(1));
}
/*
* Boolean network-inclusion tests.
*/
......
......@@ -2230,6 +2230,66 @@ hash_numeric(PG_FUNCTION_ARGS)
PG_RETURN_DATUM(result);
}
/*
* Returns 64-bit value by hashing a value to a 64-bit value, with a seed.
* Otherwise, similar to hash_numeric.
*/
Datum
hash_numeric_extended(PG_FUNCTION_ARGS)
{
Numeric key = PG_GETARG_NUMERIC(0);
uint64 seed = PG_GETARG_INT64(1);
Datum digit_hash;
Datum result;
int weight;
int start_offset;
int end_offset;
int i;
int hash_len;
NumericDigit *digits;
if (NUMERIC_IS_NAN(key))
PG_RETURN_UINT64(seed);
weight = NUMERIC_WEIGHT(key);
start_offset = 0;
end_offset = 0;
digits = NUMERIC_DIGITS(key);
for (i = 0; i < NUMERIC_NDIGITS(key); i++)
{
if (digits[i] != (NumericDigit) 0)
break;
start_offset++;
weight--;
}
if (NUMERIC_NDIGITS(key) == start_offset)
PG_RETURN_UINT64(seed - 1);
for (i = NUMERIC_NDIGITS(key) - 1; i >= 0; i--)
{
if (digits[i] != (NumericDigit) 0)
break;
end_offset++;
}
Assert(start_offset + end_offset < NUMERIC_NDIGITS(key));
hash_len = NUMERIC_NDIGITS(key) - start_offset - end_offset;
digit_hash = hash_any_extended((unsigned char *) (NUMERIC_DIGITS(key)
+ start_offset),
hash_len * sizeof(NumericDigit),
seed);
result = digit_hash ^ weight;
PG_RETURN_DATUM(result);
}
/* ----------------------------------------------------------------------
*
......
......@@ -179,6 +179,12 @@ pg_lsn_hash(PG_FUNCTION_ARGS)
return hashint8(fcinfo);
}
Datum
pg_lsn_hash_extended(PG_FUNCTION_ARGS)
{
return hashint8extended(fcinfo);
}
/*----------------------------------------------------------
* Arithmetic operators on PostgreSQL LSNs.
......
......@@ -1280,6 +1280,69 @@ hash_range(PG_FUNCTION_ARGS)
PG_RETURN_INT32(result);
}
/*
* Returns 64-bit value by hashing a value to a 64-bit value, with a seed.
* Otherwise, similar to hash_range.
*/
Datum
hash_range_extended(PG_FUNCTION_ARGS)
{
RangeType *r = PG_GETARG_RANGE(0);
uint64 seed = PG_GETARG_INT64(1);
uint64 result;
TypeCacheEntry *typcache;
TypeCacheEntry *scache;
RangeBound lower;
RangeBound upper;
bool empty;
char flags;
uint64 lower_hash;
uint64 upper_hash;
check_stack_depth();
typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r));
range_deserialize(typcache, r, &lower, &upper, &empty);
flags = range_get_flags(r);
scache = typcache->rngelemtype;
if (!OidIsValid(scache->hash_extended_proc_finfo.fn_oid))
{
scache = lookup_type_cache(scache->type_id,
TYPECACHE_HASH_EXTENDED_PROC_FINFO);
if (!OidIsValid(scache->hash_extended_proc_finfo.fn_oid))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify a hash function for type %s",
format_type_be(scache->type_id))));
}
if (RANGE_HAS_LBOUND(flags))
lower_hash = DatumGetUInt64(FunctionCall2Coll(&scache->hash_extended_proc_finfo,
typcache->rng_collation,
lower.val,
seed));
else
lower_hash = 0;
if (RANGE_HAS_UBOUND(flags))
upper_hash = DatumGetUInt64(FunctionCall2Coll(&scache->hash_extended_proc_finfo,
typcache->rng_collation,
upper.val,
seed));
else
upper_hash = 0;
/* Merge hashes of flags and bounds */
result = hash_uint32_extended((uint32) flags, seed);
result ^= lower_hash;
result = ROTATE_HIGH_AND_LOW_32BITS(result);
result ^= upper_hash;
PG_RETURN_UINT64(result);
}
/*
*----------------------------------------------------------
* CANONICAL FUNCTIONS
......
......@@ -2113,6 +2113,11 @@ timestamp_hash(PG_FUNCTION_ARGS)
return hashint8(fcinfo);
}
Datum
timestamp_hash_extended(PG_FUNCTION_ARGS)
{
return hashint8extended(fcinfo);
}
/*
* Cross-type comparison functions for timestamp vs timestamptz
......@@ -2419,6 +2424,20 @@ interval_hash(PG_FUNCTION_ARGS)
return DirectFunctionCall1(hashint8, Int64GetDatumFast(span64));
}
Datum
interval_hash_extended(PG_FUNCTION_ARGS)
{
Interval *interval = PG_GETARG_INTERVAL_P(0);
INT128 span = interval_cmp_value(interval);
int64 span64;
/* Same approach as interval_hash */
span64 = int128_to_int64(span);
return DirectFunctionCall2(hashint8extended, Int64GetDatumFast(span64),
PG_GETARG_DATUM(1));
}
/* overlaps_timestamp() --- implements the SQL OVERLAPS operator.
*
* Algorithm is per SQL spec. This is much harder than you'd think
......
......@@ -408,3 +408,11 @@ uuid_hash(PG_FUNCTION_ARGS)
return hash_any(key->data, UUID_LEN);
}
Datum
uuid_hash_extended(PG_FUNCTION_ARGS)
{
pg_uuid_t *key = PG_GETARG_UUID_P(0);
return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
}
......@@ -947,6 +947,24 @@ hashbpchar(PG_FUNCTION_ARGS)
return result;
}
Datum
hashbpcharextended(PG_FUNCTION_ARGS)
{
BpChar *key = PG_GETARG_BPCHAR_PP(0);
char *keydata;
int keylen;
Datum result;
keydata = VARDATA_ANY(key);
keylen = bcTruelen(key);
result = hash_any_extended((unsigned char *) keydata, keylen,
PG_GETARG_INT64(1));
PG_FREE_IF_COPY(key, 0);
return result;
}
/*
* The following operators support character-by-character comparison
......
......@@ -490,8 +490,8 @@ get_compatible_hash_operators(Oid opno,
/*
* get_op_hash_functions
* Get the OID(s) of hash support function(s) compatible with the given
* operator, operating on its LHS and/or RHS datatype as required.
* Get the OID(s) of the standard hash support function(s) compatible with
* the given operator, operating on its LHS and/or RHS datatype as required.
*
* A function for the LHS type is sought and returned into *lhs_procno if
* lhs_procno isn't NULL. Similarly, a function for the RHS type is sought
......@@ -542,7 +542,7 @@ get_op_hash_functions(Oid opno,
*lhs_procno = get_opfamily_proc(aform->amopfamily,
aform->amoplefttype,
aform->amoplefttype,
HASHPROC);
HASHSTANDARD_PROC);
if (!OidIsValid(*lhs_procno))
continue;
/* Matching LHS found, done if caller doesn't want RHS */
......@@ -564,7 +564,7 @@ get_op_hash_functions(Oid opno,
*rhs_procno = get_opfamily_proc(aform->amopfamily,
aform->amoprighttype,
aform->amoprighttype,
HASHPROC);
HASHSTANDARD_PROC);
if (!OidIsValid(*rhs_procno))
{
/* Forget any LHS function from this opfamily */
......
......@@ -90,6 +90,7 @@ static TypeCacheEntry *firstDomainTypeEntry = NULL;
#define TCFLAGS_HAVE_FIELD_EQUALITY 0x1000
#define TCFLAGS_HAVE_FIELD_COMPARE 0x2000
#define TCFLAGS_CHECKED_DOMAIN_CONSTRAINTS 0x4000
#define TCFLAGS_CHECKED_HASH_EXTENDED_PROC 0x8000
/*
* Data stored about a domain type's constraints. Note that we do not create
......@@ -307,6 +308,8 @@ lookup_type_cache(Oid type_id, int flags)
flags |= TYPECACHE_HASH_OPFAMILY;
if ((flags & (TYPECACHE_HASH_PROC | TYPECACHE_HASH_PROC_FINFO |
TYPECACHE_HASH_EXTENDED_PROC |
TYPECACHE_HASH_EXTENDED_PROC_FINFO |
TYPECACHE_HASH_OPFAMILY)) &&
!(typentry->flags & TCFLAGS_CHECKED_HASH_OPCLASS))
{
......@@ -329,6 +332,7 @@ lookup_type_cache(Oid type_id, int flags)
* decision is still good.
*/
typentry->flags &= ~(TCFLAGS_CHECKED_HASH_PROC);
typentry->flags &= ~(TCFLAGS_CHECKED_HASH_EXTENDED_PROC);
typentry->flags |= TCFLAGS_CHECKED_HASH_OPCLASS;
}
......@@ -372,11 +376,12 @@ lookup_type_cache(Oid type_id, int flags)
typentry->eq_opr = eq_opr;
/*
* Reset info about hash function whenever we pick up new info about
* equality operator. This is so we can ensure that the hash function
* matches the operator.
* Reset info about hash functions whenever we pick up new info about
* equality operator. This is so we can ensure that the hash functions
* match the operator.
*/
typentry->flags &= ~(TCFLAGS_CHECKED_HASH_PROC);
typentry->flags &= ~(TCFLAGS_CHECKED_HASH_EXTENDED_PROC);
typentry->flags |= TCFLAGS_CHECKED_EQ_OPR;
}
if ((flags & TYPECACHE_LT_OPR) &&
......@@ -467,7 +472,7 @@ lookup_type_cache(Oid type_id, int flags)
hash_proc = get_opfamily_proc(typentry->hash_opf,
typentry->hash_opintype,
typentry->hash_opintype,
HASHPROC);
HASHSTANDARD_PROC);
/*
* As above, make sure hash_array will succeed. We don't currently
......@@ -485,6 +490,43 @@ lookup_type_cache(Oid type_id, int flags)
typentry->hash_proc = hash_proc;
typentry->flags |= TCFLAGS_CHECKED_HASH_PROC;
}
if ((flags & (TYPECACHE_HASH_EXTENDED_PROC |
TYPECACHE_HASH_EXTENDED_PROC_FINFO)) &&
!(typentry->flags & TCFLAGS_CHECKED_HASH_EXTENDED_PROC))
{
Oid hash_extended_proc = InvalidOid;
/*
* We insist that the eq_opr, if one has been determined, match the
* hash opclass; else report there is no hash function.
*/
if (typentry->hash_opf != InvalidOid &&
(!OidIsValid(typentry->eq_opr) ||
typentry->eq_opr == get_opfamily_member(typentry->hash_opf,
typentry->hash_opintype,
typentry->hash_opintype,
HTEqualStrategyNumber)))
hash_extended_proc = get_opfamily_proc(typentry->hash_opf,
typentry->hash_opintype,
typentry->hash_opintype,
HASHEXTENDED_PROC);
/*
* As above, make sure hash_array_extended will succeed. We don't
* currently support hashing for composite types, but when we do,
* we'll need more logic here to check that case too.
*/
if (hash_extended_proc == F_HASH_ARRAY_EXTENDED &&
!array_element_has_hashing(typentry))
hash_extended_proc = InvalidOid;
/* Force update of hash_proc_finfo only if we're changing state */
if (typentry->hash_extended_proc != hash_extended_proc)
typentry->hash_extended_proc_finfo.fn_oid = InvalidOid;
typentry->hash_extended_proc = hash_extended_proc;
typentry->flags |= TCFLAGS_CHECKED_HASH_EXTENDED_PROC;
}
/*
* Set up fmgr lookup info as requested
......@@ -523,6 +565,14 @@ lookup_type_cache(Oid type_id, int flags)
fmgr_info_cxt(typentry->hash_proc, &typentry->hash_proc_finfo,
CacheMemoryContext);
}
if ((flags & TYPECACHE_HASH_EXTENDED_PROC_FINFO) &&
typentry->hash_extended_proc_finfo.fn_oid == InvalidOid &&
typentry->hash_extended_proc != InvalidOid)
{
fmgr_info_cxt(typentry->hash_extended_proc,
&typentry->hash_extended_proc_finfo,
CacheMemoryContext);
}
/*
* If it's a composite type (row type), get tupdesc if requested
......
......@@ -38,6 +38,17 @@ typedef uint32 Bucket;
#define BUCKET_TO_BLKNO(metap,B) \
((BlockNumber) ((B) + ((B) ? (metap)->hashm_spares[_hash_spareindex((B)+1)-1] : 0)) + 1)
/*
* Rotate the high 32 bits and the low 32 bits separately. The standard
* hash function sometimes rotates the low 32 bits by one bit when
* combining elements. We want extended hash functions to be compatible with
* that algorithm when the seed is 0, so we can't just do a normal rotation.
* This works, though.
*/
#define ROTATE_HIGH_AND_LOW_32BITS(v) \
((((v) << 1) & UINT64CONST(0xfffffffefffffffe)) | \
(((v) >> 31) & UINT64CONST(0x100000001)))
/*
* Special space for hash index pages.
*
......@@ -289,12 +300,20 @@ typedef HashMetaPageData *HashMetaPage;
#define HTMaxStrategyNumber 1
/*
* When a new operator class is declared, we require that the user supply
* us with an amproc procudure for hashing a key of the new type.
* Since we only have one such proc in amproc, it's number 1.
* When a new operator class is declared, we require that the user supply
* us with an amproc procudure for hashing a key of the new type, returning
* a 32-bit hash value. We call this the "standard" hash procedure. We
* also allow an optional "extended" hash procedure which accepts a salt and
* returns a 64-bit hash value. This is highly recommended but, for reasons
* of backward compatibility, optional.
*
* When the salt is 0, the low 32 bits of the value returned by the extended
* hash procedure should match the value that would have been returned by the
* standard hash procedure.
*/
#define HASHPROC 1
#define HASHNProcs 1
#define HASHSTANDARD_PROC 1
#define HASHEXTENDED_PROC 2
#define HASHNProcs 2
/* public routines */
......@@ -322,7 +341,10 @@ extern bytea *hashoptions(Datum reloptions, bool validate);
extern bool hashvalidate(Oid opclassoid);
extern Datum hash_any(register const unsigned char *k, register int keylen);
extern Datum hash_any_extended(register const unsigned char *k,
register int keylen, uint64 seed);
extern Datum hash_uint32(uint32 k);
extern Datum hash_uint32_extended(uint32 k, uint64 seed);
/* private routines */
......
......@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 201707211
#define CATALOG_VERSION_NO 201708311
#endif
......@@ -153,41 +153,77 @@ DATA(insert ( 4033 3802 3802 1 4044 ));
/* hash */
DATA(insert ( 427 1042 1042 1 1080 ));
DATA(insert ( 427 1042 1042 2 972 ));
DATA(insert ( 431 18 18 1 454 ));
DATA(insert ( 431 18 18 2 446 ));
DATA(insert ( 435 1082 1082 1 450 ));
DATA(insert ( 435 1082 1082 2 425 ));
DATA(insert ( 627 2277 2277 1 626 ));
DATA(insert ( 627 2277 2277 2 782 ));
DATA(insert ( 1971 700 700 1 451 ));
DATA(insert ( 1971 700 700 2 443 ));
DATA(insert ( 1971 701 701 1 452 ));
DATA(insert ( 1971 701 701 2 444 ));
DATA(insert ( 1975 869 869 1 422 ));
DATA(insert ( 1975 869 869 2 779 ));
DATA(insert ( 1977 21 21 1 449 ));
DATA(insert ( 1977 21 21 2 441 ));
DATA(insert ( 1977 23 23 1 450 ));
DATA(insert ( 1977 23 23 2 425 ));
DATA(insert ( 1977 20 20 1 949 ));
DATA(insert ( 1977 20 20 2 442 ));
DATA(insert ( 1983 1186 1186 1 1697 ));
DATA(insert ( 1983 1186 1186 2 3418 ));
DATA(insert ( 1985 829 829 1 399 ));
DATA(insert ( 1985 829 829 2 778 ));
DATA(insert ( 1987 19 19 1 455 ));
DATA(insert ( 1987 19 19 2 447 ));
DATA(insert ( 1990 26 26 1 453 ));
DATA(insert ( 1990 26 26 2 445 ));
DATA(insert ( 1992 30 30 1 457 ));
DATA(insert ( 1992 30 30 2 776 ));
DATA(insert ( 1995 25 25 1 400 ));
DATA(insert ( 1995 25 25 2 448));
DATA(insert ( 1997 1083 1083 1 1688 ));
DATA(insert ( 1997 1083 1083 2 3409 ));
DATA(insert ( 1998 1700 1700 1 432 ));
DATA(insert ( 1998 1700 1700 2 780 ));
DATA(insert ( 1999 1184 1184 1 2039 ));
DATA(insert ( 1999 1184 1184 2 3411 ));
DATA(insert ( 2001 1266 1266 1 1696 ));
DATA(insert ( 2001 1266 1266 2 3410 ));
DATA(insert ( 2040 1114 1114 1 2039 ));
DATA(insert ( 2040 1114 1114 2 3411 ));
DATA(insert ( 2222 16 16 1 454 ));
DATA(insert ( 2222 16 16 2 446 ));
DATA(insert ( 2223 17 17 1 456 ));
DATA(insert ( 2223 17 17 2 772 ));
DATA(insert ( 2225 28 28 1 450 ));
DATA(insert ( 2225 28 28 2 425));
DATA(insert ( 2226 29 29 1 450 ));
DATA(insert ( 2226 29 29 2 425 ));
DATA(insert ( 2227 702 702 1 450 ));
DATA(insert ( 2227 702 702 2 425 ));
DATA(insert ( 2228 703 703 1 450 ));
DATA(insert ( 2228 703 703 2 425 ));
DATA(insert ( 2229 25 25 1 400 ));
DATA(insert ( 2229 25 25 2 448 ));
DATA(insert ( 2231 1042 1042 1 1080 ));
DATA(insert ( 2231 1042 1042 2 972 ));
DATA(insert ( 2235 1033 1033 1 329 ));
DATA(insert ( 2235 1033 1033 2 777 ));
DATA(insert ( 2969 2950 2950 1 2963 ));
DATA(insert ( 2969 2950 2950 2 3412 ));
DATA(insert ( 3254 3220 3220 1 3252 ));
DATA(insert ( 3254 3220 3220 2 3413 ));
DATA(insert ( 3372 774 774 1 328 ));
DATA(insert ( 3372 774 774 2 781 ));
DATA(insert ( 3523 3500 3500 1 3515 ));
DATA(insert ( 3523 3500 3500 2 3414 ));
DATA(insert ( 3903 3831 3831 1 3902 ));
DATA(insert ( 3903 3831 3831 2 3417 ));
DATA(insert ( 4034 3802 3802 1 4045 ));
DATA(insert ( 4034 3802 3802 2 3416));
/* gist */
......
This diff is collapsed.
......@@ -325,6 +325,7 @@ extern struct varlena *pg_detoast_datum_packed(struct varlena *datum);
#define PG_RETURN_FLOAT4(x) return Float4GetDatum(x)
#define PG_RETURN_FLOAT8(x) return Float8GetDatum(x)
#define PG_RETURN_INT64(x) return Int64GetDatum(x)
#define PG_RETURN_UINT64(x) return UInt64GetDatum(x)
/* RETURN macros for other pass-by-ref types will typically look like this: */
#define PG_RETURN_BYTEA_P(x) PG_RETURN_POINTER(x)
#define PG_RETURN_TEXT_P(x) PG_RETURN_POINTER(x)
......
......@@ -370,6 +370,8 @@ extern Jsonb *JsonbValueToJsonb(JsonbValue *val);
extern bool JsonbDeepContains(JsonbIterator **val,
JsonbIterator **mContained);
extern void JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash);
extern void JsonbHashScalarValueExtended(const JsonbValue *scalarVal,
uint64 *hash, uint64 seed);
/* jsonb.c support functions */
extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
......
......@@ -56,6 +56,7 @@ typedef struct TypeCacheEntry
Oid gt_opr; /* the greater-than operator */
Oid cmp_proc; /* the btree comparison function */
Oid hash_proc; /* the hash calculation function */
Oid hash_extended_proc; /* the extended hash calculation function */
/*
* Pre-set-up fmgr call info for the equality operator, the btree
......@@ -67,6 +68,7 @@ typedef struct TypeCacheEntry
FmgrInfo eq_opr_finfo;
FmgrInfo cmp_proc_finfo;
FmgrInfo hash_proc_finfo;
FmgrInfo hash_extended_proc_finfo;
/*
* Tuple descriptor if it's a composite type (row type). NULL if not
......@@ -120,6 +122,8 @@ typedef struct TypeCacheEntry
#define TYPECACHE_HASH_OPFAMILY 0x0400
#define TYPECACHE_RANGE_INFO 0x0800
#define TYPECACHE_DOMAIN_INFO 0x1000
#define TYPECACHE_HASH_EXTENDED_PROC 0x2000
#define TYPECACHE_HASH_EXTENDED_PROC_FINFO 0x4000
/*
* Callers wishing to maintain a long-lived reference to a domain's constraint
......
......@@ -421,7 +421,7 @@ BEGIN TRANSACTION;
CREATE OPERATOR FAMILY alt_opf13 USING hash;
CREATE FUNCTION fn_opf13 (int4) RETURNS BIGINT AS 'SELECT NULL::BIGINT;' LANGUAGE SQL;
ALTER OPERATOR FAMILY alt_opf13 USING hash ADD FUNCTION 1 fn_opf13(int4);
ERROR: hash procedures must return integer
ERROR: hash procedure 1 must return integer
DROP OPERATOR FAMILY alt_opf13 USING hash;
ERROR: current transaction is aborted, commands ignored until end of transaction block
ROLLBACK;
......@@ -439,7 +439,7 @@ BEGIN TRANSACTION;
CREATE OPERATOR FAMILY alt_opf15 USING hash;
CREATE FUNCTION fn_opf15 (int4, int2) RETURNS BIGINT AS 'SELECT NULL::BIGINT;' LANGUAGE SQL;
ALTER OPERATOR FAMILY alt_opf15 USING hash ADD FUNCTION 1 fn_opf15(int4, int2);
ERROR: hash procedures must have one argument
ERROR: hash procedure 1 must have one argument
DROP OPERATOR FAMILY alt_opf15 USING hash;
ERROR: current transaction is aborted, commands ignored until end of transaction block
ROLLBACK;
......
This diff is collapsed.
......@@ -60,7 +60,7 @@ test: create_index create_view
# ----------
# Another group of parallel tests
# ----------
test: create_aggregate create_function_3 create_cast constraints triggers inherit create_table_like typed_table vacuum drop_if_exists updatable_views rolenames roleattributes create_am
test: create_aggregate create_function_3 create_cast constraints triggers inherit create_table_like typed_table vacuum drop_if_exists updatable_views rolenames roleattributes create_am hash_func
# ----------
# sanity_check does a vacuum, affecting the sort order of SELECT *
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment