Commit ef655663 authored by Tom Lane's avatar Tom Lane

Further hacking on ruleutils' new column-alias-assignment code.

After further thought about implicit coercions appearing in a joinaliasvars
list, I realized that they represent an additional reason why we might need
to reference the join output column directly instead of referencing an
underlying column.  Consider SELECT x FROM t1 LEFT JOIN t2 USING (x) where
t1.x is of type date while t2.x is of type timestamptz.  The merged output
variable is of type timestamptz, but it won't go to null when t2 does,
therefore neither t1.x nor t2.x is a valid substitute reference.

The code in get_variable() actually gets this case right, since it knows
it shouldn't look through a coercion, but we failed to ensure that the
unqualified output column name would be globally unique.  To fix, modify
the code that trawls for a dangerous situation so that it actually scans
through an unnamed join's joinaliasvars list to see if there are any
non-simple-Var entries.
parent bb686c9a
...@@ -164,10 +164,10 @@ typedef struct ...@@ -164,10 +164,10 @@ typedef struct
* since they just inherit column names from their input RTEs, and we can't * since they just inherit column names from their input RTEs, and we can't
* rename the columns at the join level. Most of the time this isn't an issue * rename the columns at the join level. Most of the time this isn't an issue
* because we don't need to reference the join's output columns as such; we * because we don't need to reference the join's output columns as such; we
* can reference the input columns instead. That approach fails for merged * can reference the input columns instead. That approach can fail for merged
* FULL JOIN USING columns, however, so when we have one of those in an * JOIN USING columns, however, so when we have one of those in an unnamed
* unnamed join, we have to make that column's alias globally unique across * join, we have to make that column's alias globally unique across the whole
* the whole query to ensure it can be referenced unambiguously. * query to ensure it can be referenced unambiguously.
* *
* Another problem is that a JOIN USING clause requires the columns to be * Another problem is that a JOIN USING clause requires the columns to be
* merged to have the same aliases in both input RTEs. To handle that, we do * merged to have the same aliases in both input RTEs. To handle that, we do
...@@ -301,7 +301,7 @@ static bool refname_is_unique(char *refname, deparse_namespace *dpns, ...@@ -301,7 +301,7 @@ static bool refname_is_unique(char *refname, deparse_namespace *dpns,
static void set_deparse_for_query(deparse_namespace *dpns, Query *query, static void set_deparse_for_query(deparse_namespace *dpns, Query *query,
List *parent_namespaces); List *parent_namespaces);
static void set_simple_column_names(deparse_namespace *dpns); static void set_simple_column_names(deparse_namespace *dpns);
static bool has_unnamed_full_join_using(Node *jtnode); static bool has_dangerous_join_using(deparse_namespace *dpns, Node *jtnode);
static void set_using_names(deparse_namespace *dpns, Node *jtnode); static void set_using_names(deparse_namespace *dpns, Node *jtnode);
static void set_relation_column_names(deparse_namespace *dpns, static void set_relation_column_names(deparse_namespace *dpns,
RangeTblEntry *rte, RangeTblEntry *rte,
...@@ -2570,7 +2570,7 @@ set_deparse_for_query(deparse_namespace *dpns, Query *query, ...@@ -2570,7 +2570,7 @@ set_deparse_for_query(deparse_namespace *dpns, Query *query,
{ {
/* Detect whether global uniqueness of USING names is needed */ /* Detect whether global uniqueness of USING names is needed */
dpns->unique_using = dpns->unique_using =
has_unnamed_full_join_using((Node *) query->jointree); has_dangerous_join_using(dpns, (Node *) query->jointree);
/* /*
* Select names for columns merged by USING, via a recursive pass over * Select names for columns merged by USING, via a recursive pass over
...@@ -2630,25 +2630,26 @@ set_simple_column_names(deparse_namespace *dpns) ...@@ -2630,25 +2630,26 @@ set_simple_column_names(deparse_namespace *dpns)
} }
/* /*
* has_unnamed_full_join_using: search jointree for unnamed FULL JOIN USING * has_dangerous_join_using: search jointree for unnamed JOIN USING
* *
* Merged columns of a FULL JOIN USING act differently from either of the * Merged columns of a JOIN USING may act differently from either of the input
* input columns, so they have to be referenced as columns of the JOIN not * columns, either because they are merged with COALESCE (in a FULL JOIN) or
* as columns of either input. And this is problematic if the join is * because an implicit coercion of the underlying input column is required.
* unnamed (alias-less): we cannot qualify the column's name with an RTE * In such a case the column must be referenced as a column of the JOIN not as
* name, since there is none. (Forcibly assigning an alias to the join is * a column of either input. And this is problematic if the join is unnamed
* not a solution, since that will prevent legal references to tables below * (alias-less): we cannot qualify the column's name with an RTE name, since
* the join.) To ensure that every column in the query is unambiguously * there is none. (Forcibly assigning an alias to the join is not a solution,
* referenceable, we must assign such merged columns names that are globally * since that will prevent legal references to tables below the join.)
* unique across the whole query, aliasing other columns out of the way as * To ensure that every column in the query is unambiguously referenceable,
* necessary. * we must assign such merged columns names that are globally unique across
* the whole query, aliasing other columns out of the way as necessary.
* *
* Because the ensuing re-aliasing is fairly damaging to the readability of * Because the ensuing re-aliasing is fairly damaging to the readability of
* the query, we don't do this unless we have to. So, we must pre-scan * the query, we don't do this unless we have to. So, we must pre-scan
* the join tree to see if we have to, before starting set_using_names(). * the join tree to see if we have to, before starting set_using_names().
*/ */
static bool static bool
has_unnamed_full_join_using(Node *jtnode) has_dangerous_join_using(deparse_namespace *dpns, Node *jtnode)
{ {
if (IsA(jtnode, RangeTblRef)) if (IsA(jtnode, RangeTblRef))
{ {
...@@ -2661,7 +2662,7 @@ has_unnamed_full_join_using(Node *jtnode) ...@@ -2661,7 +2662,7 @@ has_unnamed_full_join_using(Node *jtnode)
foreach(lc, f->fromlist) foreach(lc, f->fromlist)
{ {
if (has_unnamed_full_join_using((Node *) lfirst(lc))) if (has_dangerous_join_using(dpns, (Node *) lfirst(lc)))
return true; return true;
} }
} }
...@@ -2669,16 +2670,30 @@ has_unnamed_full_join_using(Node *jtnode) ...@@ -2669,16 +2670,30 @@ has_unnamed_full_join_using(Node *jtnode)
{ {
JoinExpr *j = (JoinExpr *) jtnode; JoinExpr *j = (JoinExpr *) jtnode;
/* Is it an unnamed FULL JOIN with USING? */ /* Is it an unnamed JOIN with USING? */
if (j->alias == NULL && if (j->alias == NULL && j->usingClause)
j->jointype == JOIN_FULL && {
j->usingClause) /*
* Yes, so check each join alias var to see if any of them are not
* simple references to underlying columns. If so, we have a
* dangerous situation and must pick unique aliases.
*/
RangeTblEntry *jrte = rt_fetch(j->rtindex, dpns->rtable);
ListCell *lc;
foreach(lc, jrte->joinaliasvars)
{
Var *aliasvar = (Var *) lfirst(lc);
if (aliasvar != NULL && !IsA(aliasvar, Var))
return true; return true;
}
}
/* Nope, but inspect children */ /* Nope, but inspect children */
if (has_unnamed_full_join_using(j->larg)) if (has_dangerous_join_using(dpns, j->larg))
return true; return true;
if (has_unnamed_full_join_using(j->rarg)) if (has_dangerous_join_using(dpns, j->rarg))
return true; return true;
} }
else else
...@@ -2768,16 +2783,16 @@ set_using_names(deparse_namespace *dpns, Node *jtnode) ...@@ -2768,16 +2783,16 @@ set_using_names(deparse_namespace *dpns, Node *jtnode)
* *
* If dpns->unique_using is TRUE, we force all USING names to be * If dpns->unique_using is TRUE, we force all USING names to be
* unique across the whole query level. In principle we'd only need * unique across the whole query level. In principle we'd only need
* the names of USING columns in unnamed full joins to be globally * the names of dangerous USING columns to be globally unique, but to
* unique, but to safely assign all USING names in a single pass, we * safely assign all USING names in a single pass, we have to enforce
* have to enforce the same uniqueness rule for all of them. However, * the same uniqueness rule for all of them. However, if a USING
* if a USING column's name has been pushed down from the parent, we * column's name has been pushed down from the parent, we should use
* should use it as-is rather than making a uniqueness adjustment. * it as-is rather than making a uniqueness adjustment. This is
* This is necessary when we're at an unnamed join, and it creates no * necessary when we're at an unnamed join, and it creates no risk of
* risk of ambiguity. Also, if there's a user-written output alias * ambiguity. Also, if there's a user-written output alias for a
* for a merged column, we prefer to use that rather than the input * merged column, we prefer to use that rather than the input name;
* name; this simplifies the logic and seems likely to lead to less * this simplifies the logic and seems likely to lead to less aliasing
* aliasing overall. * overall.
* *
* If dpns->unique_using is FALSE, we only need USING names to be * If dpns->unique_using is FALSE, we only need USING names to be
* unique within their own join RTE. We still need to honor * unique within their own join RTE. We still need to honor
...@@ -5344,9 +5359,8 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context) ...@@ -5344,9 +5359,8 @@ get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context)
* If it's a simple reference to one of the input vars, then recursively * If it's a simple reference to one of the input vars, then recursively
* print the name of that var instead. When it's not a simple reference, * print the name of that var instead. When it's not a simple reference,
* we have to just print the unqualified join column name. (This can only * we have to just print the unqualified join column name. (This can only
* happen with columns that were merged by USING or NATURAL clauses in a * happen with "dangerous" merged columns in a JOIN USING; we took pains
* FULL JOIN; we took pains previously to make the unqualified column name * previously to make the unqualified column name unique in such cases.)
* unique in such cases.)
* *
* This wouldn't work in decompiling plan trees, because we don't store * This wouldn't work in decompiling plan trees, because we don't store
* joinaliasvars lists after planning; but a plan tree should never * joinaliasvars lists after planning; but a plan tree should never
......
...@@ -1243,6 +1243,34 @@ select pg_get_viewdef('vv4', true); ...@@ -1243,6 +1243,34 @@ select pg_get_viewdef('vv4', true);
FULL JOIN tt8 tt8y(x_1, z, z2) USING (x_1); FULL JOIN tt8 tt8y(x_1, z, z2) USING (x_1);
(1 row) (1 row)
-- Implicit coercions in a JOIN USING create issues similar to FULL JOIN
create table tt7a (x date, xx int, y int);
alter table tt7a drop column xx;
create table tt8a (x timestamptz, z int);
create view vv2a as
select * from (values(now(),2,3,now(),5)) v(a,b,c,d,e)
union all
select * from tt7a left join tt8a using (x), tt8a tt8ax;
select pg_get_viewdef('vv2a', true);
pg_get_viewdef
----------------------------------------------------------------
SELECT v.a, +
v.b, +
v.c, +
v.d, +
v.e +
FROM ( VALUES (now(),2,3,now(),5)) v(a, b, c, d, e)+
UNION ALL +
SELECT x AS a, +
tt7a.y AS b, +
tt8a.z AS c, +
tt8ax.x_1 AS d, +
tt8ax.z AS e +
FROM tt7a +
LEFT JOIN tt8a USING (x), +
tt8a tt8ax(x_1, z);
(1 row)
-- --
-- Also check dropping a column that existed when the view was made -- Also check dropping a column that existed when the view was made
-- --
......
...@@ -389,6 +389,19 @@ select pg_get_viewdef('vv2', true); ...@@ -389,6 +389,19 @@ select pg_get_viewdef('vv2', true);
select pg_get_viewdef('vv3', true); select pg_get_viewdef('vv3', true);
select pg_get_viewdef('vv4', true); select pg_get_viewdef('vv4', true);
-- Implicit coercions in a JOIN USING create issues similar to FULL JOIN
create table tt7a (x date, xx int, y int);
alter table tt7a drop column xx;
create table tt8a (x timestamptz, z int);
create view vv2a as
select * from (values(now(),2,3,now(),5)) v(a,b,c,d,e)
union all
select * from tt7a left join tt8a using (x), tt8a tt8ax;
select pg_get_viewdef('vv2a', true);
-- --
-- Also check dropping a column that existed when the view was made -- Also check dropping a column that existed when the view was made
-- --
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment