Commit af871480 authored by Tom Lane's avatar Tom Lane

Fix some more hashjoin-related bugs in pg_operator. Fix

hashjoin's hashFunc() so that it does the right thing with pass-by-value
data types (the old code would always return 0 for int2 or char values,
which would work but would slow things down a lot).  Extend opr_sanity
regress test to catch more kinds of errors.
parent 1819e89a
......@@ -6,7 +6,7 @@
* Copyright (c) 1994, Regents of the University of California
*
*
* $Id: nodeHash.c,v 1.31 1999/02/13 23:15:22 momjian Exp $
* $Id: nodeHash.c,v 1.32 1999/04/07 23:33:30 tgl Exp $
*
*-------------------------------------------------------------------------
*/
......@@ -41,7 +41,7 @@ extern int NBuffers;
static int HashTBSize;
static void mk_hj_temp(char *tempname);
static int hashFunc(char *key, int len);
static int hashFunc(Datum key, int len, bool byVal);
static int ExecHashPartition(Hash *node);
static RelativeAddr hashTableAlloc(int size, HashJoinTable hashtable);
static void ExecHashOverflowInsert(HashJoinTable hashtable,
......@@ -580,10 +580,8 @@ ExecHashGetBucket(HashJoinTable hashtable,
* compute the hash function
* ------------------
*/
if (execConstByVal)
bucketno = hashFunc((char *) &keyval, execConstLen) % hashtable->totalbuckets;
else
bucketno = hashFunc((char *) keyval, execConstLen) % hashtable->totalbuckets;
bucketno = hashFunc(keyval, execConstLen, execConstByVal) % hashtable->totalbuckets;
#ifdef HJDEBUG
if (bucketno >= hashtable->nbuckets)
printf("hash(%d) = %d SAVED\n", keyval, bucketno);
......@@ -771,12 +769,23 @@ ExecScanHashBucket(HashJoinState *hjstate,
* ----------------------------------------------------------------
*/
static int
hashFunc(char *key, int len)
hashFunc(Datum key, int len, bool byVal)
{
unsigned int h;
int l;
unsigned int h = 0;
unsigned char *k;
if (byVal) {
/*
* If it's a by-value data type, use the 'len' least significant bytes
* of the Datum value. This should do the right thing on either
* bigendian or littleendian hardware --- see the Datum access
* macros in c.h.
*/
while (len-- > 0) {
h = (h * PRIME1) ^ (key & 0xFF);
key >>= 8;
}
} else {
/*
* If this is a variable length type, then 'k' points to a "struct
* varlena" and len == -1. NOTE: VARSIZE returns the "real" data
......@@ -787,25 +796,18 @@ hashFunc(char *key, int len)
*/
if (len == -1)
{
l = VARSIZE(key) - VARHDRSZ;
len = VARSIZE(key) - VARHDRSZ;
k = (unsigned char *) VARDATA(key);
}
else
{
l = len;
k = (unsigned char *) key;
}
while (len-- > 0)
h = (h * PRIME1) ^ (*k++);
}
h = 0;
/*
* Convert string to integer
*/
while (l--)
h = h * PRIME1 ^ (*k++);
h %= PRIME2;
return h;
return h % PRIME2;
}
/* ----------------------------------------------------------------
......
This diff is collapsed.
......@@ -132,17 +132,6 @@ oid|oprname
---+-------
(0 rows)
QUERY: SELECT p1.oid, p1.oprname
FROM pg_operator as p1
WHERE p1.oprcanhash AND NOT
(p1.oprkind = 'b' AND p1.oprresult = 16 AND p1.oprleft = p1.oprright AND
p1.oprname = '=' AND p1.oprcom = p1.oid);
oid|oprname
----+-------
1136|=
1137|=
(2 rows)
QUERY: SELECT p1.oid, p1.oprcode, p2.oid, p2.oprcode
FROM pg_operator AS p1, pg_operator AS p2
WHERE p1.oid != p2.oid AND
......@@ -236,6 +225,27 @@ oid|oprname
---+-------
(0 rows)
QUERY: SELECT p1.oid, p1.oprname
FROM pg_operator AS p1
WHERE p1.oprcanhash AND NOT
(p1.oprkind = 'b' AND p1.oprresult = 16 AND p1.oprleft = p1.oprright AND
p1.oprname = '=' AND p1.oprcom = p1.oid);
oid|oprname
----+-------
1136|=
1137|=
(2 rows)
QUERY: SELECT p1.oid, p1.oprcanhash, p2.oid, p2.oprcanhash, t1.typname, t2.typname
FROM pg_operator AS p1, pg_operator AS p2, pg_type AS t1, pg_type AS t2
WHERE p1.oprname = '=' AND p1.oprleft = p1.oprright AND
p2.oprname = '=' AND p2.oprleft = p2.oprright AND
p1.oprleft = t1.oid AND p2.oprleft = t2.oid AND t1.typelem = t2.oid AND
p1.oprcanhash != p2.oprcanhash;
oid|oprcanhash|oid|oprcanhash|typname|typname
---+----------+---+----------+-------+-------
(0 rows)
QUERY: SELECT p1.oid, p1.oprname, p2.oid, p2.proname
FROM pg_operator AS p1, pg_proc AS p2
WHERE p1.oprcode = p2.oid AND
......
......@@ -132,16 +132,6 @@ WHERE (p1.oprleft = 0 and p1.oprkind != 'l') OR
(p1.oprright = 0 and p1.oprkind != 'r') OR
(p1.oprright != 0 and p1.oprkind = 'r');
-- Hashing only works on simple equality operators "type = sametype",
-- since the hash itself depends on the bitwise representation of the type.
-- Check that allegedly hashable operators look like they might be "=".
SELECT p1.oid, p1.oprname
FROM pg_operator as p1
WHERE p1.oprcanhash AND NOT
(p1.oprkind = 'b' AND p1.oprresult = 16 AND p1.oprleft = p1.oprright AND
p1.oprname = '=' AND p1.oprcom = p1.oid);
-- Look for conflicting operator definitions (same names and input datatypes).
SELECT p1.oid, p1.oprcode, p2.oid, p2.oprcode
......@@ -236,6 +226,29 @@ WHERE p1.oprlsortop != 0 AND NOT
p2.oprright = p1.oprright AND
p2.oprkind = 'b');
-- Hashing only works on simple equality operators "type = sametype",
-- since the hash itself depends on the bitwise representation of the type.
-- Check that allegedly hashable operators look like they might be "=".
-- NOTE: in 6.5, this search finds int4eqoid and oideqint4. Until we have
-- some cleaner way of dealing with binary-equivalent types, just leave
-- those two tuples in the expected output.
SELECT p1.oid, p1.oprname
FROM pg_operator AS p1
WHERE p1.oprcanhash AND NOT
(p1.oprkind = 'b' AND p1.oprresult = 16 AND p1.oprleft = p1.oprright AND
p1.oprname = '=' AND p1.oprcom = p1.oid);
-- Look for array equality operators that are hashable when the underlying
-- type is not, or vice versa. This is presumably bogus.
SELECT p1.oid, p1.oprcanhash, p2.oid, p2.oprcanhash, t1.typname, t2.typname
FROM pg_operator AS p1, pg_operator AS p2, pg_type AS t1, pg_type AS t2
WHERE p1.oprname = '=' AND p1.oprleft = p1.oprright AND
p2.oprname = '=' AND p2.oprleft = p2.oprright AND
p1.oprleft = t1.oid AND p2.oprleft = t2.oid AND t1.typelem = t2.oid AND
p1.oprcanhash != p2.oprcanhash;
-- Check that each operator defined in pg_operator matches its oprcode entry
-- in pg_proc. Easiest to do this separately for each oprkind.
-- FIXME: want to check that argument/result types match, but how to do that
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment