Commit 054adca6 authored by Peter Eisentraut's avatar Peter Eisentraut

Disable anonymous record hash support except in special cases

Commit 01e658fa added hash support for row types.  This also added
support for hashing anonymous record types, using the same approach
that the type cache uses for comparison support for record types: It
just reports that it works, but it might fail at run time if a
component type doesn't actually support the operation.  We get away
with that for comparison because most types support that.  But some
types don't support hashing, so the current state can result in
failures at run time where the planner chooses hashing over sorting,
whereas that previously worked if only sorting was an option.

We do, however, want the record hashing support for path tracking in
recursive unions, and the SEARCH and CYCLE clauses built on that.  In
that case, hashing is the only plan option.  So enable that, this
commit implements the following approach: The type cache does not
report that hashing is available for the record type.  This undoes
that part of 01e658fa.  Instead, callers that require hashing no
matter what can override that result themselves.  This patch only
touches the callers to make the aforementioned recursive query cases
work, namely the parse analysis of unions, as well as the hash_array()
function.
Reported-by: default avatarSait Talha Nisanci <sait.nisanci@microsoft.com>
Bug: #17158
Discussion: https://www.postgresql.org/message-id/flat/17158-8a2ba823982537a4%40postgresql.org
parent 8db27fbc
...@@ -1852,9 +1852,12 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt) ...@@ -1852,9 +1852,12 @@ transformSetOperationStmt(ParseState *pstate, SelectStmt *stmt)
/* /*
* Make a SortGroupClause node for a SetOperationStmt's groupClauses * Make a SortGroupClause node for a SetOperationStmt's groupClauses
*
* If require_hash is true, the caller is indicating that they need hash
* support or they will fail. So look extra hard for hash support.
*/ */
SortGroupClause * SortGroupClause *
makeSortGroupClauseForSetOp(Oid rescoltype) makeSortGroupClauseForSetOp(Oid rescoltype, bool require_hash)
{ {
SortGroupClause *grpcl = makeNode(SortGroupClause); SortGroupClause *grpcl = makeNode(SortGroupClause);
Oid sortop; Oid sortop;
...@@ -1867,6 +1870,15 @@ makeSortGroupClauseForSetOp(Oid rescoltype) ...@@ -1867,6 +1870,15 @@ makeSortGroupClauseForSetOp(Oid rescoltype)
&sortop, &eqop, NULL, &sortop, &eqop, NULL,
&hashable); &hashable);
/*
* The type cache doesn't believe that record is hashable (see
* cache_record_field_properties()), but if the caller really needs hash
* support, we can assume it does. Worst case, if any components of the
* record don't support hashing, we will fail at execution.
*/
if (require_hash && (rescoltype == RECORDOID || rescoltype == RECORDARRAYOID))
hashable = true;
/* we don't have a tlist yet, so can't assign sortgrouprefs */ /* we don't have a tlist yet, so can't assign sortgrouprefs */
grpcl->tleSortGroupRef = 0; grpcl->tleSortGroupRef = 0;
grpcl->eqop = eqop; grpcl->eqop = eqop;
...@@ -2027,6 +2039,8 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt, ...@@ -2027,6 +2039,8 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
ListCell *ltl; ListCell *ltl;
ListCell *rtl; ListCell *rtl;
const char *context; const char *context;
bool recursive = (pstate->p_parent_cte &&
pstate->p_parent_cte->cterecursive);
context = (stmt->op == SETOP_UNION ? "UNION" : context = (stmt->op == SETOP_UNION ? "UNION" :
(stmt->op == SETOP_INTERSECT ? "INTERSECT" : (stmt->op == SETOP_INTERSECT ? "INTERSECT" :
...@@ -2048,9 +2062,7 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt, ...@@ -2048,9 +2062,7 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
* containing CTE as having those result columns. We should do this * containing CTE as having those result columns. We should do this
* only at the topmost setop of the CTE, of course. * only at the topmost setop of the CTE, of course.
*/ */
if (isTopLevel && if (isTopLevel && recursive)
pstate->p_parent_cte &&
pstate->p_parent_cte->cterecursive)
determineRecursiveColTypes(pstate, op->larg, ltargetlist); determineRecursiveColTypes(pstate, op->larg, ltargetlist);
/* /*
...@@ -2182,8 +2194,9 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt, ...@@ -2182,8 +2194,9 @@ transformSetOperationTree(ParseState *pstate, SelectStmt *stmt,
setup_parser_errposition_callback(&pcbstate, pstate, setup_parser_errposition_callback(&pcbstate, pstate,
bestlocation); bestlocation);
/* If it's a recursive union, we need to require hashing support. */
op->groupClauses = lappend(op->groupClauses, op->groupClauses = lappend(op->groupClauses,
makeSortGroupClauseForSetOp(rescoltype)); makeSortGroupClauseForSetOp(rescoltype, recursive));
cancel_parser_errposition_callback(&pcbstate); cancel_parser_errposition_callback(&pcbstate);
} }
......
...@@ -594,7 +594,7 @@ rewriteSearchAndCycle(CommonTableExpr *cte) ...@@ -594,7 +594,7 @@ rewriteSearchAndCycle(CommonTableExpr *cte)
sos->colCollations = lappend_oid(sos->colCollations, InvalidOid); sos->colCollations = lappend_oid(sos->colCollations, InvalidOid);
if (!sos->all) if (!sos->all)
sos->groupClauses = lappend(sos->groupClauses, sos->groupClauses = lappend(sos->groupClauses,
makeSortGroupClauseForSetOp(search_seq_type)); makeSortGroupClauseForSetOp(search_seq_type, true));
} }
if (cte->cycle_clause) if (cte->cycle_clause)
{ {
...@@ -603,14 +603,14 @@ rewriteSearchAndCycle(CommonTableExpr *cte) ...@@ -603,14 +603,14 @@ rewriteSearchAndCycle(CommonTableExpr *cte)
sos->colCollations = lappend_oid(sos->colCollations, cte->cycle_clause->cycle_mark_collation); sos->colCollations = lappend_oid(sos->colCollations, cte->cycle_clause->cycle_mark_collation);
if (!sos->all) if (!sos->all)
sos->groupClauses = lappend(sos->groupClauses, sos->groupClauses = lappend(sos->groupClauses,
makeSortGroupClauseForSetOp(cte->cycle_clause->cycle_mark_type)); makeSortGroupClauseForSetOp(cte->cycle_clause->cycle_mark_type, true));
sos->colTypes = lappend_oid(sos->colTypes, RECORDARRAYOID); sos->colTypes = lappend_oid(sos->colTypes, RECORDARRAYOID);
sos->colTypmods = lappend_int(sos->colTypmods, -1); sos->colTypmods = lappend_int(sos->colTypmods, -1);
sos->colCollations = lappend_oid(sos->colCollations, InvalidOid); sos->colCollations = lappend_oid(sos->colCollations, InvalidOid);
if (!sos->all) if (!sos->all)
sos->groupClauses = lappend(sos->groupClauses, sos->groupClauses = lappend(sos->groupClauses,
makeSortGroupClauseForSetOp(RECORDARRAYOID)); makeSortGroupClauseForSetOp(RECORDARRAYOID, true));
} }
/* /*
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "utils/arrayaccess.h" #include "utils/arrayaccess.h"
#include "utils/builtins.h" #include "utils/builtins.h"
#include "utils/datum.h" #include "utils/datum.h"
#include "utils/fmgroids.h"
#include "utils/lsyscache.h" #include "utils/lsyscache.h"
#include "utils/memutils.h" #include "utils/memutils.h"
#include "utils/selfuncs.h" #include "utils/selfuncs.h"
...@@ -3973,13 +3974,46 @@ hash_array(PG_FUNCTION_ARGS) ...@@ -3973,13 +3974,46 @@ hash_array(PG_FUNCTION_ARGS)
{ {
typentry = lookup_type_cache(element_type, typentry = lookup_type_cache(element_type,
TYPECACHE_HASH_PROC_FINFO); TYPECACHE_HASH_PROC_FINFO);
if (!OidIsValid(typentry->hash_proc_finfo.fn_oid)) if (!OidIsValid(typentry->hash_proc_finfo.fn_oid) && element_type != RECORDOID)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_FUNCTION), (errcode(ERRCODE_UNDEFINED_FUNCTION),
errmsg("could not identify a hash function for type %s", errmsg("could not identify a hash function for type %s",
format_type_be(element_type)))); format_type_be(element_type))));
/*
* The type cache doesn't believe that record is hashable (see
* cache_record_field_properties()), but since we're here, we're
* committed to hashing, so we can assume it does. Worst case, if any
* components of the record don't support hashing, we will fail at
* execution.
*/
if (element_type == RECORDOID)
{
MemoryContext oldcontext;
TypeCacheEntry *record_typentry;
oldcontext = MemoryContextSwitchTo(CacheMemoryContext);
/*
* Make fake type cache entry structure. Note that we can't just
* modify typentry, since that points directly into the type cache.
*/
record_typentry = palloc(sizeof(*record_typentry));
/* fill in what we need below */
record_typentry->typlen = typentry->typlen;
record_typentry->typbyval = typentry->typbyval;
record_typentry->typalign = typentry->typalign;
fmgr_info(F_HASH_RECORD, &record_typentry->hash_proc_finfo);
MemoryContextSwitchTo(oldcontext);
typentry = record_typentry;
}
fcinfo->flinfo->fn_extra = (void *) typentry; fcinfo->flinfo->fn_extra = (void *) typentry;
} }
typlen = typentry->typlen; typlen = typentry->typlen;
typbyval = typentry->typbyval; typbyval = typentry->typbyval;
typalign = typentry->typalign; typalign = typentry->typalign;
......
...@@ -1514,14 +1514,17 @@ cache_record_field_properties(TypeCacheEntry *typentry) ...@@ -1514,14 +1514,17 @@ cache_record_field_properties(TypeCacheEntry *typentry)
/* /*
* For type RECORD, we can't really tell what will work, since we don't * For type RECORD, we can't really tell what will work, since we don't
* have access here to the specific anonymous type. Just assume that * have access here to the specific anonymous type. Just assume that
* everything will (we may get a failure at runtime ...) * equality and comparison will (we may get a failure at runtime). We
* could also claim that hashing works, but then if code that has the
* option between a comparison-based (sort-based) and a hash-based plan
* chooses hashing, stuff could fail that would otherwise work if it chose
* a comparison-based plan. In practice more types support comparison
* than hashing.
*/ */
if (typentry->type_id == RECORDOID) if (typentry->type_id == RECORDOID)
{ {
typentry->flags |= (TCFLAGS_HAVE_FIELD_EQUALITY | typentry->flags |= (TCFLAGS_HAVE_FIELD_EQUALITY |
TCFLAGS_HAVE_FIELD_COMPARE | TCFLAGS_HAVE_FIELD_COMPARE);
TCFLAGS_HAVE_FIELD_HASHING |
TCFLAGS_HAVE_FIELD_EXTENDED_HASHING);
} }
else if (typentry->typtype == TYPTYPE_COMPOSITE) else if (typentry->typtype == TYPTYPE_COMPOSITE)
{ {
......
...@@ -48,6 +48,6 @@ extern void applyLockingClause(Query *qry, Index rtindex, ...@@ -48,6 +48,6 @@ extern void applyLockingClause(Query *qry, Index rtindex,
extern List *BuildOnConflictExcludedTargetlist(Relation targetrel, extern List *BuildOnConflictExcludedTargetlist(Relation targetrel,
Index exclRelIndex); Index exclRelIndex);
extern SortGroupClause *makeSortGroupClauseForSetOp(Oid rescoltype); extern SortGroupClause *makeSortGroupClauseForSetOp(Oid rescoltype, bool require_hash);
#endif /* ANALYZE_H */ #endif /* ANALYZE_H */
...@@ -648,34 +648,37 @@ reset enable_hashagg; ...@@ -648,34 +648,37 @@ reset enable_hashagg;
set enable_hashagg to on; set enable_hashagg to on;
explain (costs off) explain (costs off)
select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x); select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
QUERY PLAN QUERY PLAN
----------------------------------------- -----------------------------------------------
HashAggregate Unique
Group Key: "*VALUES*".column1 -> Sort
-> Append Sort Key: "*VALUES*".column1
-> Values Scan on "*VALUES*" -> Append
-> Values Scan on "*VALUES*_1" -> Values Scan on "*VALUES*"
(5 rows) -> Values Scan on "*VALUES*_1"
(6 rows)
select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x); select x from (values (row(1, 2)), (row(1, 3))) _(x) union select x from (values (row(1, 2)), (row(1, 4))) _(x);
x x
------- -------
(1,4)
(1,3)
(1,2) (1,2)
(1,3)
(1,4)
(3 rows) (3 rows)
explain (costs off) explain (costs off)
select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x); select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
QUERY PLAN QUERY PLAN
----------------------------------------------- -----------------------------------------------------
HashSetOp Intersect SetOp Intersect
-> Append -> Sort
-> Subquery Scan on "*SELECT* 1" Sort Key: "*SELECT* 1".x
-> Values Scan on "*VALUES*" -> Append
-> Subquery Scan on "*SELECT* 2" -> Subquery Scan on "*SELECT* 1"
-> Values Scan on "*VALUES*_1" -> Values Scan on "*VALUES*"
(6 rows) -> Subquery Scan on "*SELECT* 2"
-> Values Scan on "*VALUES*_1"
(8 rows)
select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x); select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (values (row(1, 2)), (row(1, 4))) _(x);
x x
...@@ -685,15 +688,17 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (va ...@@ -685,15 +688,17 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) intersect select x from (va
explain (costs off) explain (costs off)
select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x); select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
QUERY PLAN QUERY PLAN
----------------------------------------------- -----------------------------------------------------
HashSetOp Except SetOp Except
-> Append -> Sort
-> Subquery Scan on "*SELECT* 1" Sort Key: "*SELECT* 1".x
-> Values Scan on "*VALUES*" -> Append
-> Subquery Scan on "*SELECT* 2" -> Subquery Scan on "*SELECT* 1"
-> Values Scan on "*VALUES*_1" -> Values Scan on "*VALUES*"
(6 rows) -> Subquery Scan on "*SELECT* 2"
-> Values Scan on "*VALUES*_1"
(8 rows)
select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x); select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (values (row(1, 2)), (row(1, 4))) _(x);
x x
...@@ -702,21 +707,28 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (value ...@@ -702,21 +707,28 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (value
(1 row) (1 row)
-- non-hashable type -- non-hashable type
-- With an anonymous row type, the typcache reports that the type is -- With an anonymous row type, the typcache does not report that the
-- hashable, but then it will fail at run time. -- type is hashable. (Otherwise, this would fail at execution time.)
explain (costs off) explain (costs off)
select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x); select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
QUERY PLAN QUERY PLAN
----------------------------------------- -----------------------------------------------
HashAggregate Unique
Group Key: "*VALUES*".column1 -> Sort
-> Append Sort Key: "*VALUES*".column1
-> Values Scan on "*VALUES*" -> Append
-> Values Scan on "*VALUES*_1" -> Values Scan on "*VALUES*"
(5 rows) -> Values Scan on "*VALUES*_1"
(6 rows)
select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x); select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
ERROR: could not identify a hash function for type money x
-----------
($100.00)
($200.00)
($300.00)
(3 rows)
-- With a defined row type, the typcache can inspect the type's fields -- With a defined row type, the typcache can inspect the type's fields
-- for hashability. -- for hashability.
create type ct1 as (f1 money); create type ct1 as (f1 money);
......
...@@ -218,8 +218,8 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (value ...@@ -218,8 +218,8 @@ select x from (values (row(1, 2)), (row(1, 3))) _(x) except select x from (value
-- non-hashable type -- non-hashable type
-- With an anonymous row type, the typcache reports that the type is -- With an anonymous row type, the typcache does not report that the
-- hashable, but then it will fail at run time. -- type is hashable. (Otherwise, this would fail at execution time.)
explain (costs off) explain (costs off)
select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x); select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x); select x from (values (row(100::money)), (row(200::money))) _(x) union select x from (values (row(100::money)), (row(300::money))) _(x);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment