Revise SELECT reference page for outer joins, subselect in FROM,

ISO-compliant UNION/INTERSECT/EXCEPT. Revise discussion of rule rewriter to reflect new subselect-in-FROM implementation of views. Miscellaneous other cleanups.

Revise SELECT reference page for outer joins, subselect in FROM,
ISO-compliant UNION/INTERSECT/EXCEPT. Revise discussion of rule rewriter to reflect new subselect-in-FROM implementation of views. Miscellaneous other cleanups.
1045304a · Tom Lane · c4e3b0c3 · 1045304a · 1045304a · 1045304a
Commit 1045304a authored Dec 12, 2000 by Tom Lane
4 changed files
--- a/doc/src/sgml/ref/select.sgml
+++ b/doc/src/sgml/ref/select.sgml
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/ref/select.sgml,v 1.34 2000/12/08 20:11:11 momjian Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/ref/select.sgml,v 1.35 2000/12/12 05:07:59 tgl Exp $
 Postgres documentation
 -->

@@ -19,25 +19,36 @@ Postgres documentation
  </refpurpose></refnamediv>
 <refsynopsisdiv>
  <refsynopsisdivinfo>
-   <date>1999-07-20</date>
+   <date>2000-12-11</date>
  </refsynopsisdivinfo>
  <synopsis>
 SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replaceable> [, ...] ) ] ]
-    <replaceable class="PARAMETER">expression</replaceable> [ AS <replaceable class="PARAMETER">name</replaceable> ] [, ...]
+    * | <replaceable class="PARAMETER">expression</replaceable> [ AS <replaceable class="PARAMETER">output_name</replaceable> ] [, ...]
    [ INTO [ TEMPORARY | TEMP ] [ TABLE ] <replaceable class="PARAMETER">new_table</replaceable> ]
-    [ FROM [ ONLY ]<replaceable class="PARAMETER">table</replaceable> [ <replaceable class="PARAMETER">alias</replaceable> ] [, ...] ]
+    [ FROM <replaceable class="PARAMETER">from_item</replaceable> [, ...] ]
    [ WHERE <replaceable class="PARAMETER">condition</replaceable> ]
-    [ GROUP BY <replaceable class="PARAMETER">column</replaceable> [, ...] ]
+    [ GROUP BY <replaceable class="PARAMETER">expression</replaceable> [, ...] ]
    [ HAVING <replaceable class="PARAMETER">condition</replaceable> [, ...] ]
-    [ { UNION [ ALL ] | INTERSECT | EXCEPT } <replaceable class="PARAMETER">select</replaceable> ]
-    [ ORDER BY <replaceable class="PARAMETER">column</replaceable> [ ASC | DESC | USING <replaceable class="PARAMETER">operator</replaceable> ] [, ...] ]
+    [ { UNION | INTERSECT | EXCEPT [ ALL ] } <replaceable class="PARAMETER">select</replaceable> ]
+    [ ORDER BY <replaceable class="PARAMETER">expression</replaceable> [ ASC | DESC | USING <replaceable class="PARAMETER">operator</replaceable> ] [, ...] ]
    [ FOR UPDATE [ OF <replaceable class="PARAMETER">class_name</replaceable> [, ...] ] ]
    [ LIMIT { <replaceable class="PARAMETER">count</replaceable> | ALL } [ { OFFSET | , } <replaceable class="PARAMETER">start</replaceable> ]]
+
+where <replaceable class="PARAMETER">from_item</replaceable> can be:
+
+[ ONLY ] <replaceable class="PARAMETER">table_name</replaceable> [ * ]
+    [ [ AS ] <replaceable class="PARAMETER">alias</replaceable> [ ( <replaceable class="PARAMETER">column_alias_list</replaceable> ) ] ]
+|
+( <replaceable class="PARAMETER">select</replaceable> )
+    [ AS ] <replaceable class="PARAMETER">alias</replaceable> [ ( <replaceable class="PARAMETER">column_alias_list</replaceable> ) ]
+|
+<replaceable class="PARAMETER">from_item</replaceable> [ NATURAL ] <replaceable class="PARAMETER">join_type</replaceable> <replaceable class="PARAMETER">from_item</replaceable>
+    [ ON <replaceable class="PARAMETER">join_condition</replaceable> | USING ( <replaceable class="PARAMETER">join_column_list</replaceable> ) ]
  </synopsis>
  
  <refsect2 id="R2-SQL-SELECT-1">
   <refsect2info>
-    <date>2000-03-15</date>
+    <date>2000-12-11</date>
   </refsect2info>
   <title>
    Inputs
@@ -55,14 +66,14 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
     </varlistentry>
     
     <varlistentry>
-      <term><replaceable class="PARAMETER">name</replaceable></term>
+      <term><replaceable class="PARAMETER">output_name</replaceable></term>
      <listitem>
       <para>
-	Specifies another name for a column or an expression using
+	Specifies another name for an output column using
 	the AS clause.  This name is primarily used to label the column
 	for display.  It can also be used to refer to the column's value in
 	ORDER BY and GROUP BY clauses.  But the
-	<replaceable class="PARAMETER">name</replaceable>
+	<replaceable class="PARAMETER">output_name</replaceable>
 	cannot be used in the WHERE or HAVING clauses; write out the
 	expression instead.
       </para>
@@ -75,7 +86,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
     <listitem>
      <para>
 	If TEMPORARY or TEMP is specified,
-	the table is created unique to this session, and is
+	the output table is created unique to this session, and is
 	automatically dropped on session exit.
      </para>
     </listitem>
@@ -87,7 +98,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
       <para>
 	If the INTO TABLE clause is specified, the result of the
 	query will be stored in a new table with the indicated
-	name.
+	name, rather than being returned to the client.
 	The target table (<replaceable class="PARAMETER">new_table</replaceable>) will
 	be created automatically and must not exist before this command.
        Refer to <command>SELECT INTO</command> for more information.
@@ -95,7 +106,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
 	<note>
 	 <para>
 	  The <command>CREATE TABLE AS</command> statement will also
-	  create a new  table from a select query.
+	  create a new  table from a SELECT query.
 	 </para>
 	</note>
       </para>
@@ -103,41 +114,66 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
     </varlistentry>
     
     <varlistentry>
-      <term><replaceable class="PARAMETER">table</replaceable></term>
+      <term><replaceable class="PARAMETER">from_item</replaceable></term>
      <listitem>
       <para>
-	The name of an existing table referenced by the FROM clause.
+        A table reference, sub-SELECT, or JOIN clause.  See below for details.
       </para>
      </listitem>
     </varlistentry>
     
     <varlistentry>
-      <term><replaceable class="PARAMETER">alias</replaceable></term>
+      <term><replaceable class="PARAMETER">condition</replaceable></term>
      <listitem>
       <para>
-	An alternate name for the preceding
-	<replaceable class="PARAMETER">table</replaceable>.
-	It is used for brevity or to eliminate ambiguity for joins
-	within a single table.
+	A boolean expression giving a result of true or false.
+	See the WHERE and HAVING clause descriptions below.
       </para>
      </listitem>
     </varlistentry>
     
     <varlistentry>
-      <term><replaceable class="PARAMETER">condition</replaceable></term>
+      <term><replaceable class="PARAMETER">select</replaceable></term>
      <listitem>
       <para>
-	A boolean expression giving a result of true or false.
-	See the WHERE clause.
+	A select statement with all features except the ORDER BY, FOR UPDATE,
+	and LIMIT clauses (even those can be used when the select is
+	parenthesized).
       </para>
      </listitem>
     </varlistentry>
     
+    </variablelist>
+   </para>
+
+   <para>
+    FROM items can contain:
+    <variablelist>
+     
     <varlistentry>
-      <term><replaceable class="PARAMETER">column</replaceable></term>
+      <term><replaceable class="PARAMETER">table_name</replaceable></term>
      <listitem>
       <para>
-	The name of a table's column.
+	The name of an existing table or view.  If ONLY is specified, only that
+	table is scanned.  If ONLY is not specified, the table and all its
+	descendant tables (if any) are scanned.  * can be appended to the
+	table name to indicate that descendant tables are to be scanned,
+	but as of <Productname>Postgres</Productname> 7.1 this is the default
+	behavior.  (In releases before 7.1, ONLY was the default behavior.)
+       </para>
+      </listitem>
+     </varlistentry>
+     
+     <varlistentry>
+      <term><replaceable class="PARAMETER">alias</replaceable></term>
+      <listitem>
+       <para>
+	A substitute name for the preceding
+	<replaceable class="PARAMETER">table_name</replaceable>.
+	An alias is used for brevity or to eliminate ambiguity for self-joins
+	(where the same table is scanned multiple times).  If an alias is
+	written, a column alias list can also be written to provide
+	substitute names for one or more columns of the table.
       </para>
      </listitem>
     </varlistentry>
@@ -146,12 +182,54 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
      <term><replaceable class="PARAMETER">select</replaceable></term>
      <listitem>
       <para>
-	A select statement with all features except the ORDER BY and
-	LIMIT clauses.
+	A sub-SELECT can appear in the FROM clause.  This acts as though
+	its output were created as a temporary table for the duration of
+	this single SELECT command.  Note that the sub-SELECT must be
+	surrounded by parentheses, and an alias <emphasis>must</emphasis>
+	be provided for it.
+       </para>
+      </listitem>
+     </varlistentry>
+     
+     <varlistentry>
+      <term><replaceable class="PARAMETER">join_type</replaceable></term>
+      <listitem>
+       <para>
+       One of
+       <command>[ INNER ] JOIN</command>,
+       <command>LEFT [ OUTER ] JOIN</command>,
+       <command>RIGHT [ OUTER ] JOIN</command>,
+       <command>FULL [ OUTER ] JOIN</command>, or
+       <command>CROSS JOIN</command>.
+       For INNER and OUTER join types, exactly one of NATURAL,
+       ON <replaceable class="PARAMETER">join_condition</replaceable>, or
+       USING ( <replaceable class="PARAMETER">join_column_list</replaceable> )
+       must appear.  For CROSS JOIN, none of these items may appear.
+       </para>
+      </listitem>
+     </varlistentry>
+     
+     <varlistentry>
+      <term><replaceable class="PARAMETER">join_condition</replaceable></term>
+      <listitem>
+       <para>
+       A qualification condition.  This is similar to the WHERE condition
+       except that it only applies to the two from_items being joined in
+       this JOIN clause.
       </para>
      </listitem>
     </varlistentry>
     
+     <varlistentry>
+      <term><replaceable class="PARAMETER">join_column_list</replaceable></term>
+      <listitem>
+       <para>
+       A USING column list ( a, b, ... ) is shorthand for the ON condition
+       left_table.a = right_table.a AND left_table.b = right_table.b ...
+       </para>
+      </listitem>
+     </varlistentry>
+
    </variablelist>
   </para>
  </refsect2>
@@ -192,7 +270,7 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac

 <refsect1 id="R1-SQL-SELECT-1">
  <refsect1info>
-   <date>2000-03-15</date>
+   <date>2000-12-11</date>
  </refsect1info>
  <title>
   Description
@@ -203,12 +281,15 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
   if WHERE is omitted, all rows are candidates.
   (See <xref linkend="sql-where" endterm="sql-where-title">.)
  </para>
+
  <para>
-  <command>ONLY</command> will eliminate rows from subclasses of the table.
-  This was previously the default result, and getting subclasses was
-  obtained by appending <command>*</command> to the table name.
-  The old behaviour is available via the command 
-  <command>SET SQL_Inheritance TO OFF;</command>
+   Actually, the returned rows are not directly the rows produced by the
+   FROM/WHERE/GROUP BY/HAVING clauses; rather, the output rows are formed
+   by computing the SELECT output expressions for each selected row.
+   <command>*</command> can be written in the output list as a shorthand
+   for all the columns of the selected rows.  Also, one can write
+   <replaceable class="PARAMETER">table_name</replaceable><command>.*</command>
+   as a shorthand for the columns coming from just that table.
  </para>

  <para>
@@ -257,19 +338,28 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
  </para>
   
  <para>
-   The UNION operator allows the result to be the collection of rows
+   SELECT queries can be combined using UNION, INTERSECT, and EXCEPT
+   operators.  Use parentheses if necessary to determine the ordering
+   of these operators.
+  </para>
+   
+  <para>
+   The UNION operator computes the collection of rows
   returned by the queries involved.
+   Duplicate rows are eliminated unless ALL is specified.
   (See <xref linkend="sql-union" endterm="sql-union-title">.)
  </para>
   
  <para>
-   The INTERSECT operator gives you the rows that are common to both queries.
+   The INTERSECT operator computes the rows that are common to both queries.
+   Duplicate rows are eliminated unless ALL is specified.
   (See <xref linkend="sql-intersect" endterm="sql-intersect-title">.)
  </para>
   
  <para>
-   The EXCEPT operator gives you the rows returned by the first query but
+   The EXCEPT operator computes the rows returned by the first query but
   not the second query.
+   Duplicate rows are eliminated unless ALL is specified.
   (See <xref linkend="sql-except" endterm="sql-except-title">.)
  </para>
   
@@ -289,6 +379,95 @@ SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replac
   (See the <command>GRANT</command>/<command>REVOKE</command> statements).
  </para>
   
+  <refsect2 id="SQL-FROM">
+   <refsect2info>
+    <date>2000-12-11</date>
+   </refsect2info>
+   <title id="sql-from-title">
+    FROM Clause
+   </title>
+
+   <para>
+    The FROM clause specifies one or more source tables for the SELECT.
+    If multiple sources are specified, the result is conceptually the
+    Cartesian product of all the rows in all the sources --- but usually
+    qualification conditions are added to restrict the returned rows to
+    a small subset of the Cartesian product.
+   </para>
+
+   <para>
+    When a FROM item is a simple table name, it implicitly includes rows
+    from subclasses (inheritance children) of the table.
+    <command>ONLY</command> will
+    suppress rows from subclasses of the table.  Before
+    <Productname>Postgres</Productname> 7.1,
+    this was the default result, and adding subclasses was done
+    by appending <command>*</command> to the table name.
+    This old behaviour is available via the command 
+    <command>SET SQL_Inheritance TO OFF;</command>
+   </para>
+
+   <para>
+    A FROM item can also be a parenthesized sub-SELECT (note that an
+    alias clause is required for a sub-SELECT!).  This is an extremely
+    handy feature since it's the only way to get multiple levels of
+    grouping, aggregation, or sorting in a single query.
+   </para>
+
+   <para>
+    Finally, a FROM item can be a JOIN clause, which combines two simpler
+    FROM items.  (Use parentheses if necessary to determine the order
+    of nesting.)
+   </para>
+
+   <para>
+    A CROSS JOIN or INNER JOIN is a simple Cartesian product,
+    the same as you get from listing the two items at the top level of FROM.
+    CROSS JOIN is equivalent to INNER JOIN ON (TRUE), that is, no rows are
+    removed by qualification.  These join types are just a notational
+    convenience, since they do nothing you couldn't do with plain FROM and
+    WHERE.
+   </para>
+
+   <para>
+    LEFT OUTER JOIN returns all rows in the qualified Cartesian product
+    (ie, all combined rows that pass its ON condition), plus one copy of each
+    row in the left-hand table for which there was no right-hand row that
+    passed the ON condition.  This left-hand row is extended to the full
+    width of the joined table by inserting NULLs for the right-hand columns.
+    Note that only the JOIN's own ON or USING condition is considered while
+    deciding which rows have matches.  Outer ON or WHERE conditions are
+    applied afterwards.
+   </para>
+
+   <para>
+    Conversely, RIGHT OUTER JOIN returns all the joined rows, plus one row
+    for each unmatched right-hand row (extended with nulls on the left).
+    This is just a notational
+    convenience, since you could convert it to a LEFT OUTER JOIN by switching
+    the left and right inputs.
+   </para>
+
+   <para>
+    FULL OUTER JOIN returns all the joined rows, plus one row for each
+    unmatched left-hand row (extended with nulls on the right), plus one row
+    for each unmatched right-hand row (extended with nulls on the left).
+   </para>
+
+   <para>
+    For all the JOIN types except CROSS JOIN, you must write exactly one of
+    ON <replaceable class="PARAMETER">join_condition</replaceable>,
+    USING ( <replaceable class="PARAMETER">join_column_list</replaceable> ),
+    or NATURAL.  ON is the most general case: you can write any qualification
+    expression involving the two tables to be joined.
+    A USING column list ( a, b, ... ) is shorthand for the ON condition
+    left_table.a = right_table.a AND left_table.b = right_table.b ...
+    Also, USING implies that only one of each pair of equivalent columns will
+    be included in the JOIN output, not both.  NATURAL is shorthand for
+    a USING list that mentions all similarly-named columns in the tables.
+   </para>
+  </refsect2>
+   
  <refsect2 id="SQL-WHERE">
   <refsect2info>
    <date>2000-03-15</date>
@@ -340,7 +519,7 @@ WHERE <replaceable class="PARAMETER">boolean_expr</replaceable>
    GROUP BY specifies a grouped table derived by the application
    of this clause:
    <synopsis>
-GROUP BY <replaceable class="PARAMETER">column</replaceable> [, ...]
+GROUP BY <replaceable class="PARAMETER">expression</replaceable> [, ...]
    </synopsis>
   </para>

@@ -357,9 +536,10 @@ GROUP BY <replaceable class="PARAMETER">column</replaceable> [, ...]
   </para>

   <para>
-    An item in GROUP BY can also be the name or ordinal number of an output
-    column (SELECT expression), or it can be an arbitrary expression formed
-    from input-column values.  In case of ambiguity, a GROUP BY name will
+    A GROUP BY item can be an input column name, or the name or ordinal
+    number of an output column (SELECT expression), or it can be an arbitrary
+    expression formed from input-column values.  In case of ambiguity, a GROUP
+    BY name will 
    be interpreted as an input-column name rather than an output column name.
   </para>
  </refsect2>
@@ -375,17 +555,17 @@ GROUP BY <replaceable class="PARAMETER">column</replaceable> [, ...]
    The optional HAVING condition has the general form:
    
    <synopsis>
-HAVING <replaceable class="PARAMETER">cond_expr</replaceable>
+HAVING <replaceable class="PARAMETER">boolean_expr</replaceable>
    </synopsis>
    
-    where <replaceable class="PARAMETER">cond_expr</replaceable> is the same
+    where <replaceable class="PARAMETER">boolean_expr</replaceable> is the same
    as specified for the WHERE clause.
   </para>
    
   <para>
    HAVING specifies a grouped table derived by the elimination
    of group rows that do not satisfy the
-    <replaceable class="PARAMETER">cond_expr</replaceable>.
+    <replaceable class="PARAMETER">boolean_expr</replaceable>.
    HAVING is different from WHERE:
    WHERE filters individual rows before application of GROUP BY,
    while HAVING filters group rows created by GROUP BY.
@@ -393,7 +573,7 @@ HAVING <replaceable class="PARAMETER">cond_expr</replaceable>

   <para>
    Each column referenced in 
-    <replaceable class="PARAMETER">cond_expr</replaceable> shall unambiguously
+    <replaceable class="PARAMETER">boolean_expr</replaceable> shall unambiguously
    reference a grouping column, unless the reference appears within an
    aggregate function.
   </para>
@@ -408,15 +588,17 @@ HAVING <replaceable class="PARAMETER">cond_expr</replaceable>
   </title>
   <para>
    <synopsis>
-ORDER BY <replaceable class="PARAMETER">column</replaceable> [ ASC | DESC ] [, ...]
+ORDER BY <replaceable class="PARAMETER">expression</replaceable> [ ASC | DESC | USING <replaceable class="PARAMETER">operator</replaceable> ] [, ...]
    </synopsis></para>
    
   <para>
-    <replaceable class="PARAMETER">column</replaceable> can be either a
-    result column name or an ordinal number.
+    An ORDER BY item can be the name or ordinal
+    number of an output column (SELECT expression), or it can be an arbitrary
+    expression formed from input-column values.  In case of ambiguity, an
+    ORDER BY name will be interpreted as an output-column name.
   </para>
   <para>
-    The ordinal numbers refers to the ordinal (left-to-right) position
+    The ordinal number refers to the ordinal (left-to-right) position
    of the result column. This feature makes it possible to define an ordering
    on the basis of a column that does not have a proper name.
    This is never absolutely necessary because it is always possible
@@ -447,13 +629,13 @@ SELECT name FROM distributors ORDER BY code;
    or ASC (ascending) after each column name in the ORDER BY clause.
    If not specified, ASC is assumed by default.  Alternatively, a
    specific ordering operator name may be specified.  ASC is equivalent
-    to USING '&lt;' and DESC is equivalent to USING '&gt;'.
+    to USING &lt; and DESC is equivalent to USING &gt;.
   </para>
  </refsect2>
  
  <refsect2 id="SQL-UNION">
   <refsect2info>
-    <date>1998-09-24</date>
+    <date>2000-12-11</date>
   </refsect2info>
   <title id="sql-union-title">
    UNION Clause
@@ -461,16 +643,21 @@ SELECT name FROM distributors ORDER BY code;
   <para>
    <synopsis>
 <replaceable class="PARAMETER">table_query</replaceable> UNION [ ALL ] <replaceable class="PARAMETER">table_query</replaceable>
-    [ ORDER BY <replaceable class="PARAMETER">column</replaceable> [ ASC | DESC ] [, ...] ]
+    [ ORDER BY <replaceable class="PARAMETER">expression</replaceable> [ ASC | DESC | USING <replaceable class="PARAMETER">operator</replaceable> ] [, ...] ]
+    [ LIMIT { <replaceable class="PARAMETER">count</replaceable> | ALL } [ { OFFSET | , } <replaceable class="PARAMETER">start</replaceable> ]]
    </synopsis>

    where
    <replaceable class="PARAMETER">table_query</replaceable>
-    specifies any select expression without an ORDER BY or LIMIT clause.
+    specifies any select expression without an ORDER BY, FOR UPDATE,
+    or LIMIT clause.  (ORDER BY and LIMIT can be attached to a sub-expression
+    if it is enclosed in parentheses.  Without parentheses, these clauses
+    will be taken to apply to the result of the UNION, not to its right-hand
+    input expression.)
   </para>
    
   <para>
-    The UNION operator allows the result to be the collection of rows
+    The UNION operator computes the collection (set union) of the rows
    returned by the queries involved.
    The two SELECTs that represent the direct operands of the UNION must
    produce the same number of columns, and corresponding columns must be
@@ -478,79 +665,98 @@ SELECT name FROM distributors ORDER BY code;
   </para>
    
   <para>
-    By default, the result of UNION does not contain any duplicate rows
-    unless the ALL clause is specified.
+    The result of UNION does not contain any duplicate rows
+    unless the ALL option is specified.  ALL prevents elimination of
+    duplicates.
   </para>
    
   <para>
    Multiple UNION operators in the same SELECT statement are
-    evaluated left to right.
-    Note that the ALL keyword is not global in nature, being 
-    applied only for the current pair of table results.
+    evaluated left to right, unless otherwise indicated by parentheses.
+   </para>
+    
+   <para>
+    Currently, FOR UPDATE may not be specified either for a UNION result
+    or for the inputs of a UNION.
   </para>

  </refsect2>

  <refsect2 id="SQL-INTERSECT">
   <refsect2info>
-    <date>1998-09-24</date>
+    <date>2000-12-11</date>
   </refsect2info>
   <title id="sql-intersect-title">
    INTERSECT Clause
   </title>
   <para>
    <synopsis>
-<replaceable class="PARAMETER">table_query</replaceable> INTERSECT <replaceable class="PARAMETER">table_query</replaceable>
-    [ ORDER BY <replaceable class="PARAMETER">column</replaceable> [ ASC | DESC ] [, ...] ]
+<replaceable class="PARAMETER">table_query</replaceable> INTERSECT [ ALL ] <replaceable class="PARAMETER">table_query</replaceable>
+    [ ORDER BY <replaceable class="PARAMETER">expression</replaceable> [ ASC | DESC | USING <replaceable class="PARAMETER">operator</replaceable> ] [, ...] ]
+    [ LIMIT { <replaceable class="PARAMETER">count</replaceable> | ALL } [ { OFFSET | , } <replaceable class="PARAMETER">start</replaceable> ]]
    </synopsis>
    
    where
    <replaceable class="PARAMETER">table_query</replaceable>
-    specifies any select expression without an ORDER BY or LIMIT clause.
+    specifies any select expression without an ORDER BY, FOR UPDATE,
+    or LIMIT clause.
   </para>

   <para>
-    The INTERSECT operator gives you the rows that are common to both queries.
-    The two SELECTs that represent the direct operands of the INTERSECT must
-    produce the same number of columns, and corresponding columns must be
-    of compatible data types.
+    INTERSECT is similar to UNION, except that it produces only rows that
+    appear in both query outputs, rather than rows that appear in either.
+   </para>
+    
+   <para>
+    The result of INTERSECT does not contain any duplicate rows
+    unless the ALL option is specified.  With ALL, a row that has
+    m duplicates in L and n duplicates in R will appear min(m,n) times.
   </para>
    
   <para>
    Multiple INTERSECT operators in the same SELECT statement are
    evaluated left to right, unless parentheses dictate otherwise.
+    INTERSECT binds more tightly than UNION --- that is,
+    A UNION B INTERSECT C will be read as 
+    A UNION (B INTERSECT C) unless otherwise specified by parentheses.
   </para>
  </refsect2>

  <refsect2 id="SQL-EXCEPT">
   <refsect2info>
-    <date>1998-09-24</date>
+    <date>2000-12-11</date>
   </refsect2info>
   <title id="sql-except-title">
    EXCEPT Clause
   </title>
   <para>
    <synopsis>
-<replaceable class="PARAMETER">table_query</replaceable> EXCEPT <replaceable class="PARAMETER">table_query</replaceable>
-     [ ORDER BY <replaceable class="PARAMETER">column</replaceable> [ ASC | DESC ] [, ...] ]
+<replaceable class="PARAMETER">table_query</replaceable> EXCEPT [ ALL ] <replaceable class="PARAMETER">table_query</replaceable>
+    [ ORDER BY <replaceable class="PARAMETER">expression</replaceable> [ ASC | DESC | USING <replaceable class="PARAMETER">operator</replaceable> ] [, ...] ]
+    [ LIMIT { <replaceable class="PARAMETER">count</replaceable> | ALL } [ { OFFSET | , } <replaceable class="PARAMETER">start</replaceable> ]]
    </synopsis>
    
    where
    <replaceable class="PARAMETER">table_query</replaceable>
-    specifies any select expression without an ORDER BY or LIMIT clause.
+    specifies any select expression without an ORDER BY, FOR UPDATE,
+    or LIMIT clause.
   </para>

   <para>
-    The EXCEPT operator gives you the rows returned by the first query but
-    not the second query.
-    The two SELECTs that represent the direct operands of the EXCEPT must
-    produce the same number of columns, and corresponding columns must be
-    of compatible data types.
+    EXCEPT is similar to UNION, except that it produces only rows that
+    appear in the left query's output but not in the right query's output.
   </para>
-
+    
+   <para>
+    The result of EXCEPT does not contain any duplicate rows
+    unless the ALL option is specified.  With ALL, a row that has
+    m duplicates in L and n duplicates in R will appear max(m-n,0) times.
+   </para>
+    
   <para>
    Multiple EXCEPT operators in the same SELECT statement are
    evaluated left to right, unless parentheses dictate otherwise.
+    EXCEPT binds at the same level as UNION.
   </para>
  </refsect2>

@@ -758,7 +964,22 @@ SELECT actors.name
   <para>
 <productname>Postgres</productname> allows one to omit 
 the <command>FROM</command> clause from a query. This feature
-was retained from the original PostQuel query language:
+was retained from the original PostQuel query language.  It has
+a straightforward use to compute the results of simple constant
+expressions:
+
+  <programlisting>
+SELECT 2+2;
+
+ ?column?
+----------
+        4
+  </programlisting>
+
+Some other DBMSes cannot do this except by introducing a dummy one-row
+table to do the select from.  A less obvious use is to abbreviate a
+normal select from one or more tables:
+
  <programlisting>
 SELECT distributors.* WHERE name = 'Westward';

@@ -766,6 +987,26 @@ SELECT distributors.* WHERE name = 'Westward';
 -----+----------
 108 | Westward
  </programlisting>
+
+This works because an implicit FROM item is added for each table that is
+referenced in the query but not mentioned in FROM.  While this is a convenient
+shorthand, it's easy to misuse.  For example, the query
+  <programlisting>
+SELECT distributors.* FROM distributors d;
+  </programlisting>
+is probably a mistake; most likely the user meant
+  <programlisting>
+SELECT d.* FROM distributors d;
+  </programlisting>
+rather than the unconstrained join
+  <programlisting>
+SELECT distributors.* FROM distributors d, distributors distributors;
+  </programlisting>
+that he will actually get.  To help detect this sort of mistake,
+<Productname>Postgres</Productname> 7.1
+and later will warn if the implicit-FROM feature is used in a query that also
+contains an explicit FROM clause.
+
   </para>
  </refsect2>

@@ -791,9 +1032,9 @@ SELECT distributors.* WHERE name = 'Westward';
     is just noise and can be 
     omitted without affecting the meaning.
     The <productname>Postgres</productname> parser requires this keyword when
-     renaming columns because the type extensibility features lead to
+     renaming output columns because the type extensibility features lead to
     parsing ambiguities
-     in this context.</para>
+     in this context.  "AS" is optional in FROM items, however.</para>
     
    <para>
     The DISTINCT ON phrase is not part of <acronym>SQL92</acronym>.
@@ -818,11 +1059,11 @@ SELECT distributors.* WHERE name = 'Westward';
     <date>1998-09-24</date>
    </refsect3info>
    <title>
-     UNION Clause
+     UNION/INTERSECT/EXCEPT Clause
    </title>
    <para>
-     The <acronym>SQL92</acronym> syntax for UNION allows an
-     additional CORRESPONDING BY clause:
+     The <acronym>SQL92</acronym> syntax for UNION/INTERSECT/EXCEPT allows an
+     additional CORRESPONDING BY option:
     <synopsis> 
 <replaceable class="PARAMETER">table_query</replaceable> UNION [ALL]
    [CORRESPONDING [BY (<replaceable class="PARAMETER">column</replaceable> [,...])]]

--- a/doc/src/sgml/ref/select_into.sgml
+++ b/doc/src/sgml/ref/select_into.sgml
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/ref/select_into.sgml,v 1.6 2000/07/21 18:51:24 momjian Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/ref/select_into.sgml,v 1.7 2000/12/12 05:07:59 tgl Exp $
 Postgres documentation
 -->

@@ -19,20 +19,31 @@ Postgres documentation
  </refpurpose></refnamediv>
 <refsynopsisdiv>
  <refsynopsisdivinfo>
-   <date>1999-07-20</date>
+   <date>2000-12-11</date>
  </refsynopsisdivinfo>
  <synopsis>
 SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replaceable> [, ...] ) ] ]
-    <replaceable class="PARAMETER">expression</replaceable> [ AS <replaceable class="PARAMETER">name</replaceable> ] [, ...]
-    [ INTO [ TEMPORARY | TEMP ] [ TABLE ] <replaceable class="PARAMETER">new_table</replaceable> ]
-    [ FROM <replaceable class="PARAMETER">table</replaceable> [ <replaceable class="PARAMETER">alias</replaceable> ] [, ...] ]
+    * | <replaceable class="PARAMETER">expression</replaceable> [ AS <replaceable class="PARAMETER">output_name</replaceable> ] [, ...]
+    INTO [ TEMPORARY | TEMP ] [ TABLE ] <replaceable class="PARAMETER">new_table</replaceable>
+    [ FROM <replaceable class="PARAMETER">from_item</replaceable> [, ...] ]
    [ WHERE <replaceable class="PARAMETER">condition</replaceable> ]
-    [ GROUP BY <replaceable class="PARAMETER">column</replaceable> [, ...] ]
+    [ GROUP BY <replaceable class="PARAMETER">expression</replaceable> [, ...] ]
    [ HAVING <replaceable class="PARAMETER">condition</replaceable> [, ...] ]
-    [ { UNION [ ALL ] | INTERSECT | EXCEPT } <replaceable class="PARAMETER">select</replaceable> ]
-    [ ORDER BY <replaceable class="PARAMETER">column</replaceable> [ ASC | DESC | USING <replaceable class="PARAMETER">operator</replaceable> ] [, ...] ]
+    [ { UNION | INTERSECT | EXCEPT [ ALL ] } <replaceable class="PARAMETER">select</replaceable> ]
+    [ ORDER BY <replaceable class="PARAMETER">expression</replaceable> [ ASC | DESC | USING <replaceable class="PARAMETER">operator</replaceable> ] [, ...] ]
    [ FOR UPDATE [ OF <replaceable class="PARAMETER">class_name</replaceable> [, ...] ] ]
-    [ LIMIT { <replaceable class="PARAMETER">count</replaceable> | ALL } [ { OFFSET | , } <replaceable class="PARAMETER">start</replaceable> ] ]
+    [ LIMIT { <replaceable class="PARAMETER">count</replaceable> | ALL } [ { OFFSET | , } <replaceable class="PARAMETER">start</replaceable> ]]
+
+where <replaceable class="PARAMETER">from_item</replaceable> can be:
+
+[ ONLY ] <replaceable class="PARAMETER">table_name</replaceable> [ * ]
+    [ [ AS ] <replaceable class="PARAMETER">alias</replaceable> [ ( <replaceable class="PARAMETER">column_alias_list</replaceable> ) ] ]
+|
+( <replaceable class="PARAMETER">select</replaceable> )
+    [ AS ] <replaceable class="PARAMETER">alias</replaceable> [ ( <replaceable class="PARAMETER">column_alias_list</replaceable> ) ]
+|
+<replaceable class="PARAMETER">from_item</replaceable> [ NATURAL ] <replaceable class="PARAMETER">join_type</replaceable> <replaceable class="PARAMETER">from_item</replaceable>
+    [ ON <replaceable class="PARAMETER">join_condition</replaceable> | USING ( <replaceable class="PARAMETER">join_column_list</replaceable> ) ]
  </synopsis>
  
  <refsect2 id="R2-SQL-SELECTINTO-1">

--- a/doc/src/sgml/rules.sgml
+++ b/doc/src/sgml/rules.sgml
 <Chapter Id="rules">
 <Title>The <ProductName>Postgres</ProductName> Rule System</Title>

+   <note>
+    <title>Author</title>
+    <para>
+     Written by Jan Wieck.  Updates for 7.1 by Tom Lane.
+    </para>
+   </note>
+
 <Para>
     Production rule systems are conceptually simple, but
     there are many subtle points involved in actually using
@@ -21,7 +28,7 @@
     is totally different from stored procedures and triggers.
     It modifies  queries  to
     take rules into consideration, and then passes the modified 
-     query to the query optimizer for  execution.   It
+     query to the query planner for planning and execution.   It
     is  very powerful, and can be used for many things such
     as query language procedures, views, and versions.  The
     power  of  this  rule system is discussed in 
@@ -34,15 +41,15 @@

 <Para>
    To understand how the rule system works it is necessary to know
-    when it is invoked and what it's input and results are.
+    when it is invoked and what its input and results are.
 </Para>

 <Para>
-    The rule system is located between the query parser and the optimizer.
+    The rule system is located between the query parser and the planner.
    It takes the output of the parser, one querytree, and the rewrite
    rules from the <FileName>pg_rewrite</FileName> catalog, which are
    querytrees too with some extra information, and creates zero or many
-    querytrees as result. So it's input and output are always things
+    querytrees as result. So its input and output are always things
    the parser itself could have produced and thus, anything it sees
    is basically representable as an <Acronym>SQL</Acronym> statement.
 </Para>
@@ -101,8 +108,8 @@
    </Term>
    <ListItem>
    <Para>
-        The rangtable is a list of relations that are used in the query.
-	In a SELECT statement that are the relations given after
+        The rangetable is a list of relations that are used in the query.
+	In a SELECT statement these are the relations given after
 	the FROM keyword.
    </Para>

@@ -153,13 +160,15 @@
        The targetlist is a list of expressions that define the result
 	of the query. In the case of a SELECT, the expressions are what
 	builds the final output of the query. They are the expressions
-	between the SELECT and the FROM keywords (* is just an
-	abbreviation for all the attribute names of a relation). 
+	between the SELECT and the FROM keywords.  (* is just an
+	abbreviation for all the attribute names of a relation.  It is
+	expanded by the parser into the individual attributes, so the
+	rule system never sees it.)
    </Para>

    <Para>
        DELETE queries don't need a targetlist because they don't
-	produce any result. In fact the optimizer will add a special
+	produce any result. In fact the planner will add a special CTID
 	entry to the empty targetlist. But this is after the rule
 	system and will be discussed later. For the rule system the
 	targetlist is empty.
@@ -167,25 +176,25 @@

    <Para>
        In INSERT queries the targetlist describes the new rows that
-	should go into the resultrelation. Missing columns of the 
-	resultrelation will be added by the optimizer with a constant
-	NULL expression. It is the expressions in the VALUES clause
-	or the ones from the SELECT clause on INSERT ... SELECT.
+	should go into the resultrelation. It is the expressions in the VALUES
+	clause or the ones from the SELECT clause in INSERT ... SELECT.
+	Missing columns of the resultrelation will be filled in by the
+	planner with a constant NULL expression.
    </Para>

    <Para>
-        On UPDATE queries, it describes the new rows that should
-	replace the old ones. Here now the optimizer will add missing
-	columns by inserting expressions that put the values from the
-	old rows into the new one. And it will add the special entry
-	like for DELETE too. It is the expressions from the
-	SET attribute = expression part of the query.
+        In UPDATE queries, the targetlist describes the new rows that should
+	replace the old ones. In the rule system, it contains just the
+	expressions from the SET attribute = expression part of the query.
+	The planner will add missing columns by inserting expressions that
+	copy the values from the old row into the new one. And it will add
+	the special CTID entry just as for DELETE too.
    </Para>

    <Para>
        Every entry in the targetlist contains an expression that can
 	be a constant value, a variable pointing to an attribute of one
-	of the relations in the rangetable, a parameter or an expression
+	of the relations in the rangetable, a parameter, or an expression
 	tree made of function calls, constants, variables, operators etc.
    </Para>
    </ListItem>
@@ -197,7 +206,7 @@
    </Term>
    <ListItem>
    <Para>
-        The queries qualification is an expression much like one of those
+        The query's qualification is an expression much like one of those
 	contained in the targetlist entries. The result value of this
 	expression is a boolean that tells if the operation
 	(INSERT, UPDATE, DELETE or SELECT) for the final result row should be
@@ -207,6 +216,28 @@
    </ListItem>
    </VarListEntry>

+    <VarListEntry>
+    <Term>
+        the join tree
+    </Term>
+    <ListItem>
+    <Para>
+        The query's join tree shows the structure of the FROM clause.
+	For a simple query like SELECT FROM a, b, c the join tree is just
+	a list of the FROM items, because we are allowed to join them in
+	any order.  But when JOIN expressions --- particularly outer joins
+	--- are used, we have to join in the order shown by the JOINs.
+	The join tree shows the structure of the JOIN expressions.  The
+	restrictions associated with particular JOIN clauses (from ON or
+	USING expressions) are stored as qualification expressions attached
+	to those join tree nodes.  It turns out to be convenient to store
+	the top-level WHERE expression as a qualification attached to the
+	top-level join tree item, too.  So really the join tree represents
+	both the FROM and WHERE clauses of a SELECT.
+    </Para>
+    </ListItem>
+    </VarListEntry>
+
    <VarListEntry>
    <Term>
        the others
@@ -214,11 +245,10 @@
    <ListItem>
    <Para>
        The other parts of the querytree like the ORDER BY 
-	clause arent of interest here. The rule system
+	clause aren't of interest here. The rule system
 	substitutes entries there while applying rules, but that
 	doesn't have much to do with the fundamentals of the rule
-	system. GROUP BY is a special thing when it appears in
-        a view definition and still needs to be documented.
+	system.
    </Para>
    </ListItem>
    </VarListEntry>
@@ -255,7 +285,7 @@
    This has some side effects. One of them is that
    the information about a view in the <ProductName>Postgres</ProductName>
    system catalogs is exactly the same as it is for a table. So for the
-    query parsers, there is absolutely no difference between
+    query parser, there is absolutely no difference between
    a table and a view. They are the same thing - relations. That is the
    important one for now.
 </Para>
@@ -274,14 +304,14 @@
 </Para>

 <Para>
-    Currently, there could be only one action and it must be a
-    SELECT action that is INSTEAD. This restriction was required
-    to make rules safe enough to open them for ordinary users and
+    Currently, there can be only one action in an ON SELECT rule, and it must
+    be an unconditional SELECT action that is INSTEAD. This restriction was
+    required to make rules safe enough to open them for ordinary users and
    it restricts rules ON SELECT to real view rules.
 </Para>

 <Para>
-    The example for this document are two join views that do some calculations
+    The examples for this document are two join views that do some calculations
    and some more views using them in turn.
    One of the two first views is customized later by adding rules for
    INSERT, UPDATE and DELETE operations so that the final result will
@@ -293,7 +323,7 @@
 </Para>

 <Para>
-    The database needed to play on the examples is named al_bundy.
+    The database needed to play with the examples is named al_bundy.
    You'll see soon why this is the database name. And it needs the
    procedural language PL/pgSQL installed, because
    we need a little min() function returning the lower of 2
@@ -312,7 +342,7 @@
 </Para>

 <Para>
-    The real tables we need in the first two rule system descripitons
+    The real tables we need in the first two rule system descriptions
    are these:

 <ProgramListing>
@@ -388,15 +418,15 @@
    will create a relation shoelace and an entry 
    in <FileName>pg_rewrite</FileName>
    that tells that there is a rewrite rule that must be applied
-    whenever the relation shoelace is referenced in a queries rangetable.
-    The rule has no rule qualification (discussed in the
-    non SELECT rules since SELECT rules currently cannot have them) and
+    whenever the relation shoelace is referenced in a query's rangetable.
+    The rule has no rule qualification (discussed later, with the
+    non SELECT rules, since SELECT rules currently cannot have them) and
    it is INSTEAD. Note that rule qualifications are not the same as
-    query qualifications! The rules action has a qualification.
+    query qualifications! The rule's action has a query qualification.
 </Para>

 <Para>
-    The rules action is one querytree that is an exact copy of the
+    The rule's action is one querytree that is a copy of the
    SELECT statement in the view creation command.
    
    <Note>
@@ -489,73 +519,48 @@
    Note that the parser changed the calculation and qualification into
    calls to the appropriate functions. But
    in fact this changes nothing.
-    The first step in rewriting is merging the two rangetables. The resulting
-    parsetree then reads
-
-<ProgramListing>
-    SELECT shoelace.sl_name, shoelace.sl_avail,
-           shoelace.sl_color, shoelace.sl_len,
-           shoelace.sl_unit, shoelace.sl_len_cm
-      FROM shoelace shoelace, <FirstTerm>shoelace *OLD*</FirstTerm>,
-           <FirstTerm>shoelace *NEW*</FirstTerm>, <FirstTerm>shoelace_data s</FirstTerm>,
-           <FirstTerm>unit u</FirstTerm>;
-</ProgramListing>
+</Para>

-    In step 2 it adds the qualification from the rule action to the
-    parsetree resulting in
+<Para>
+    To expand the view, the rewriter simply creates a subselect rangetable
+    entry containing the rule's action parsetree, and substitutes this
+    rangetable entry for the original one that referenced the view.  The 
+    resulting rewritten parsetree is almost the same as if Al had typed

 <ProgramListing>
    SELECT shoelace.sl_name, shoelace.sl_avail,
           shoelace.sl_color, shoelace.sl_len,
           shoelace.sl_unit, shoelace.sl_len_cm
-      FROM shoelace shoelace, shoelace *OLD*,
-           shoelace *NEW*, shoelace_data s,
-           unit u
-     <FirstTerm>WHERE bpchareq(s.sl_unit, u.un_name)</FirstTerm>;
-</ProgramListing>
-
-    And in step 3 it replaces all the variables in the parsetree, that
-    reference the rangetable entry (the one for
-    <Filename>shoelace</Filename> that is currently processed)
-    by the corresponding targetlist expressions
-    from the rule action. This results in the final query
-
-<ProgramListing>
-    SELECT <FirstTerm>s.sl_name</FirstTerm>, <FirstTerm>s.sl_avail</FirstTerm>, 
-           <FirstTerm>s.sl_color</FirstTerm>, <FirstTerm>s.sl_len</FirstTerm>, 
-           <FirstTerm>s.sl_unit</FirstTerm>, <FirstTerm>float8mul(s.sl_len, u.un_fact)</FirstTerm> AS sl_len_cm
-      FROM shoelace shoelace, shoelace *OLD*,
-           shoelace *NEW*, shoelace_data s,
-           unit u
-     WHERE bpchareq(s.sl_unit, u.un_name);
-</ProgramListing>
-
-    Turning this back into a real <Acronym>SQL</Acronym> statement a human
-    user would type reads
-
-<ProgramListing>
-    SELECT s.sl_name, s.sl_avail,
-           s.sl_color, s.sl_len,
-           s.sl_unit, s.sl_len * u.un_fact AS sl_len_cm
-      FROM shoelace_data s, unit u
-     WHERE s.sl_unit = u.un_name;
-</ProgramListing>
-
-    That was the first rule applied. While this was done, the rangetable has
-    grown. So the rule system continues checking the range table entries.
-    The next one is number 2 (shoelace *OLD*).
-    Relation <Filename>shoelace</Filename>
-    has a rule, but this rangetable entry isn't referenced
-    in any of the variables of the parsetree, so it is ignored. Since all the
-    remaining rangetable entries either have no rules in
-    <Filename>pg_rewrite</Filename> or aren't referenced,
-    it reaches the end of the rangetable.
-    Rewriting is complete and the above is the final result given into
-    the optimizer.
-    The optimizer ignores the extra rangetable entries that aren't
-    referenced by variables in the parsetree and the plan produced
-    by the planner/optimizer would be exactly the same as if Al had typed
-    the above SELECT query instead of the view selection.
+      FROM (SELECT s.sl_name,
+                   s.sl_avail,
+                   s.sl_color,
+                   s.sl_len,
+                   s.sl_unit,
+                   s.sl_len * u.un_fact AS sl_len_cm
+              FROM shoelace_data s, unit u
+             WHERE s.sl_unit = u.un_name) shoelace;
+</ProgramListing>
+
+     There is one difference however: the sub-query's rangetable has two
+     extra entries shoelace *OLD*, shoelace *NEW*.  These entries don't
+     participate directly in the query, since they aren't referenced by
+     the sub-query's join tree or targetlist.  The rewriter uses them
+     to store the access permission check info that was originally present
+     in the rangetable entry that referenced the view.  In this way, the
+     executor will still check that the user has proper permissions to access
+     the view, even though there's no direct use of the view in the rewritten
+     query.
+</Para>
+
+<Para>
+    That was the first rule applied.  The rule system will continue checking
+    the remaining rangetable entries in the top query (in this example there
+    are no more), and it will recursively check the rangetable entries in
+    the added sub-query to see if any of them reference views.  (But it
+    won't expand *OLD* or *NEW* --- otherwise we'd have infinite recursion!)
+    In this example, there are no rewrite rules for shoelace_data or unit,
+    so rewriting is complete and the above is the final result given to
+    the planner.
 </Para>

 <Para>
@@ -570,7 +575,7 @@
    Al needs to know for which shoes currently in the store
    he has the matching shoelaces (color and size) and where the
    total number of exactly matching pairs is greater or equal to two.
-    We theach him how to do and he asks his database:
+    We teach him what to do and he asks his database:

 <ProgramListing>
    al_bundy=> SELECT * FROM shoe_ready WHERE total_avail >= 2;
@@ -597,106 +602,77 @@
     WHERE int4ge(shoe_ready.total_avail, 2);
 </ProgramListing>

-    The first rule applied will be that one for the 
-    <Filename>shoe_ready</Filename> relation and it results in the
+    The first rule applied will be the one for the 
+    <Filename>shoe_ready</Filename> view and it results in the
    parsetree

 <ProgramListing>
-    SELECT <FirstTerm>rsh.shoename</FirstTerm>, <FirstTerm>rsh.sh_avail</FirstTerm>,
-           <FirstTerm>rsl.sl_name</FirstTerm>, <FirstTerm>rsl.sl_avail</FirstTerm>,
-           <FirstTerm>min(rsh.sh_avail, rsl.sl_avail) AS total_avail</FirstTerm>
-      FROM shoe_ready shoe_ready, <FirstTerm>shoe_ready *OLD*</FirstTerm>,
-           <FirstTerm>shoe_ready *NEW*</FirstTerm>, <FirstTerm>shoe rsh</FirstTerm>,
-           <FirstTerm>shoelace rsl</FirstTerm>
-     WHERE int4ge(<FirstTerm>min(rsh.sh_avail, rsl.sl_avail)</FirstTerm>, 2)
-       <FirstTerm>AND (bpchareq(rsl.sl_color, rsh.slcolor)
-            AND float8ge(rsl.sl_len_cm, rsh.slminlen_cm)
-            AND float8le(rsl.sl_len_cm, rsh.slmaxlen_cm)
-           )</FirstTerm>;
-</ProgramListing>
-
-    In reality the AND clauses in the qualification will be
-    operator nodes of type AND with a left and right expression. But
-    that makes it lesser readable as it already is, and there are more
-    rules to apply. So I only put them into some parantheses to group
-    them into logical units in the order they where added and we continue
-    with the rule for relation
-    <Filename>shoe</Filename> as it is the next rangetable entry
-    that is referenced and has a rule. The result of applying it is
-
-<ProgramListing>
-    SELECT <FirstTerm>sh.shoename</FirstTerm>, <FirstTerm>sh.sh_avail</FirstTerm>,
-           rsl.sl_name, rsl.sl_avail,
-           min(<FirstTerm>sh.sh_avail</FirstTerm>, rsl.sl_avail) AS total_avail,
-      FROM shoe_ready shoe_ready, shoe_ready *OLD*,
-           shoe_ready *NEW*, shoe rsh,
-           shoelace rsl, <FirstTerm>shoe *OLD*</FirstTerm>,
-           <FirstTerm>shoe *NEW*</FirstTerm>, <FirstTerm>shoe_data sh</FirstTerm>,
-           <FirstTerm>unit un</FirstTerm>
-     WHERE (int4ge(min(<FirstTerm>sh.sh_avail</FirstTerm>, rsl.sl_avail), 2)
-            AND (bpchareq(rsl.sl_color, <FirstTerm>sh.slcolor</FirstTerm>)
-                 AND float8ge(rsl.sl_len_cm, 
-                              <FirstTerm>float8mul(sh.slminlen, un.un_fact)</FirstTerm>)
-                 AND float8le(rsl.sl_len_cm, 
-                              <FirstTerm>float8mul(sh.slmaxlen, un.un_fact)</FirstTerm>)
-                )
-           )
-       <FirstTerm>AND bpchareq(sh.slunit, un.un_name)</FirstTerm>;
-</ProgramListing>
-
-    And finally we apply the already well known rule for
-    <Filename>shoelace</Filename> (this time on a parsetree that is
-    a little more complex) and get
-
-<ProgramListing>
-    SELECT sh.shoename, sh.sh_avail,
-           <FirstTerm>s.sl_name</FirstTerm>, <FirstTerm>s.sl_avail</FirstTerm>,
-           min(sh.sh_avail, <FirstTerm>s.sl_avail</FirstTerm>) AS total_avail
-      FROM shoe_ready shoe_ready, shoe_ready *OLD*,
-           shoe_ready *NEW*, shoe rsh,
-           shoelace rsl, shoe *OLD*,
-           shoe *NEW*, shoe_data sh,
-           unit un, <FirstTerm>shoelace *OLD*</FirstTerm>,
-           <FirstTerm>shoelace *NEW*</FirstTerm>, <FirstTerm>shoelace_data s</FirstTerm>,
-           <FirstTerm>unit u</FirstTerm>
-     WHERE (    (int4ge(min(sh.sh_avail, <FirstTerm>s.sl_avail</FirstTerm>), 2)
-                 AND (bpchareq(<FirstTerm>s.sl_color</FirstTerm>, sh.slcolor)
-                      AND float8ge(<FirstTerm>float8mul(s.sl_len, u.un_fact)</FirstTerm>, 
-                                   float8mul(sh.slminlen, un.un_fact))
-                      AND float8le(<FirstTerm>float8mul(s.sl_len, u.un_fact)</FirstTerm>, 
-                                   float8mul(sh.slmaxlen, un.un_fact))
-                     )
-                )
-            AND bpchareq(sh.slunit, un.un_name)
-           )
-       <FirstTerm>AND bpchareq(s.sl_unit, u.un_name)</FirstTerm>;
-</ProgramListing>
-
-    Again we reduce it to a real <Acronym>SQL</Acronym> statement
-    that is equivalent to the final output of the rule system:
-
-<ProgramListing>
-    SELECT sh.shoename, sh.sh_avail,
-           s.sl_name, s.sl_avail,
-           min(sh.sh_avail, s.sl_avail) AS total_avail
-      FROM shoe_data sh, shoelace_data s, unit u, unit un
-     WHERE min(sh.sh_avail, s.sl_avail) >= 2
-       AND s.sl_color = sh.slcolor
-       AND s.sl_len * u.un_fact >= sh.slminlen * un.un_fact
-       AND s.sl_len * u.un_fact <= sh.slmaxlen * un.un_fact
-       AND sh.sl_unit = un.un_name
-       AND s.sl_unit = u.un_name;
-</ProgramListing>
-
-    Recursive processing of rules rewrote one SELECT from a view
-    into a parsetree, that is equivalent to exactly that what Al
-    had to type if there would be no views at all.
+    SELECT shoe_ready.shoename, shoe_ready.sh_avail,
+           shoe_ready.sl_name, shoe_ready.sl_avail,
+           shoe_ready.total_avail
+      FROM (SELECT rsh.shoename,
+                   rsh.sh_avail,
+                   rsl.sl_name,
+                   rsl.sl_avail,
+                   min(rsh.sh_avail, rsl.sl_avail) AS total_avail
+              FROM shoe rsh, shoelace rsl
+             WHERE rsl.sl_color = rsh.slcolor
+               AND rsl.sl_len_cm >= rsh.slminlen_cm
+               AND rsl.sl_len_cm <= rsh.slmaxlen_cm) shoe_ready
+     WHERE int4ge(shoe_ready.total_avail, 2);
+</ProgramListing>
+
+    Similarly, the rules for <Filename>shoe</Filename> and
+    <Filename>shoelace</Filename> are substituted into the rangetable of
+    the sub-query, leading to a three-level final querytree:
+
+<ProgramListing>
+    SELECT shoe_ready.shoename, shoe_ready.sh_avail,
+           shoe_ready.sl_name, shoe_ready.sl_avail,
+           shoe_ready.total_avail
+      FROM (SELECT rsh.shoename,
+                   rsh.sh_avail,
+                   rsl.sl_name,
+                   rsl.sl_avail,
+                   min(rsh.sh_avail, rsl.sl_avail) AS total_avail
+              FROM (SELECT sh.shoename,
+                           sh.sh_avail,
+                           sh.slcolor,
+                           sh.slminlen,
+                           sh.slminlen * un.un_fact AS slminlen_cm,
+                           sh.slmaxlen,
+                           sh.slmaxlen * un.un_fact AS slmaxlen_cm,
+                           sh.slunit
+                      FROM shoe_data sh, unit un
+                     WHERE sh.slunit = un.un_name) rsh,
+                   (SELECT s.sl_name,
+                           s.sl_avail,
+                           s.sl_color,
+                           s.sl_len,
+                           s.sl_unit,
+                           s.sl_len * u.un_fact AS sl_len_cm
+                      FROM shoelace_data s, unit u
+                     WHERE s.sl_unit = u.un_name) rsl
+             WHERE rsl.sl_color = rsh.slcolor
+               AND rsl.sl_len_cm >= rsh.slminlen_cm
+               AND rsl.sl_len_cm <= rsh.slmaxlen_cm) shoe_ready
+     WHERE int4ge(shoe_ready.total_avail, 2);
+</ProgramListing>
+
+    It turns out that the planner will collapse this tree into a two-level
+    querytree: the bottommost selects will be "pulled up" into the middle
+    select since there's no need to process them separately.  But the
+    middle select will remain separate from the top, because it contains
+    aggregate functions.  If we pulled those up it would change the behavior
+    of the topmost select, which we don't want.  However, collapsing the
+    query tree is an optimization that the rewrite system doesn't
+    have to concern itself with.

    <Note>
    <Title>Note</Title>
    <Para>
    There is currently no recursion stopping mechanism for view
-    rules in the rule system (only for the other rules).
+    rules in the rule system (only for the other kinds of rules).
    This doesn't hurt much, because the only way to push this
    into an endless loop (blowing up the
    backend until it reaches the memory limit)
@@ -704,7 +680,7 @@
    view rules by hand with CREATE RULE in such a way, that
    one selects from the other that selects from the one.
    This could never happen if CREATE VIEW is used because
-    on the first CREATE VIEW, the second relation does not exist
+    for the first CREATE VIEW, the second relation does not exist
    and thus the first view cannot select from the second.
    </Para>
    </Note>
@@ -718,14 +694,14 @@
 <Para>
    Two details of the parsetree aren't touched in the description of
    view rules above. These are the commandtype and the resultrelation.
-    In fact, view rules don't need these informations.
+    In fact, view rules don't need this information.
 </Para>

 <Para>
    There are only a few differences between a parsetree for a SELECT
    and one for any other command. Obviously they have another commandtype
    and this time the resultrelation points to the rangetable entry where
-    the result should go. Anything else is absolutely the same. 
+    the result should go. Everything else is absolutely the same. 
    So having two tables t1 and t2 with attributes
    a and b, the parsetrees for the two statements

@@ -757,11 +733,17 @@
 	    ranges for equality.
 	</Para>
        </ListItem>
+
+        <ListItem>
+	<Para>
+	    The jointrees show a simple join between t1 and t2.
+	</Para>
+        </ListItem>
    </ItemizedList>

    The consequence is, that both parsetrees result in similar execution
    plans. They are both joins over the two tables. For the UPDATE
-    the missing columns from t1 are added to the targetlist by the optimizer 
+    the missing columns from t1 are added to the targetlist by the planner 
    and the final parsetree will read as

 <ProgramListing>
@@ -781,21 +763,17 @@
    is a SELECT command and the other is an UPDATE is handled in the
    caller of the executor. The caller still knows (looking at the
    parsetree) that this is an UPDATE, and he knows that this result
-    should go into table t1. But which of the 666 rows that are there
-    has to be replaced by the new row? The plan executed is a join
-    with a qualification that potentially could produce any number of
-    rows between 0 and 666 in unknown order.
+    should go into table t1. But which of the rows that are there
+    has to be replaced by the new row?
 </Para>

 <Para>
    To resolve this problem, another entry is added to the targetlist
-    in UPDATE and DELETE statements. The current tuple ID (ctid). This
-    is a system attribute with a special feature. It contains the
-    block and position in the block for the row. Knowing the table,
-    the ctid can be used to find one specific row in a 1.5GB sized table
-    containing millions of rows by fetching one single data block.
-    After adding the ctid to the targetlist, the final result set
-    could be defined as
+    in UPDATE (and also in DELETE) statements: the current tuple ID (ctid).
+    This is a system attribute containing the file
+    block number and position in the block for the row. Knowing the table,
+    the ctid can be used to retrieve the original t1 row to be updated.
+    After adding the ctid to the targetlist, the query actually looks like

 <ProgramListing>
    SELECT t1.a, t2.b, t1.ctid FROM t1, t2 WHERE t1.a = t2.a;
@@ -812,7 +790,7 @@
 </Para>

 <Para>
-    Knowing that all, we can simply apply view rules in absolutely
+    Knowing all that, we can simply apply view rules in absolutely
    the same way to any command. There is no difference.
 </Para>
 </Sect2>
@@ -832,137 +810,32 @@

 <Para>
    The benefit of implementing views with the rule system is,
-    that the optimizer has all
+    that the planner has all
    the information about which tables have to be scanned plus the
    relationships between these tables plus the restrictive
    qualifications from the views plus the qualifications from
    the original query
    in one single parsetree. And this is still the situation
    when the original query is already a join over views.
-    Now the optimizer has to decide which is
+    Now the planner has to decide which is
    the best path to execute the query. The more information
-    the optimizer has, the better this decision can be. And
+    the planner has, the better this decision can be. And
    the rule system as implemented in <ProductName>Postgres</ProductName>
    ensures, that this is all information available about the query
    up to now.
 </Para>
 </Sect3>
-
-<Sect3>
-<Title>Concerns</Title>
-
-<Para>
-    There was a long time where the <ProductName>Postgres</ProductName> 
-    rule system was considered broken. The use of rules was not
-    recommended and the only part working was view rules. And also
-    these view rules gave problems because the rule system wasn't able
-    to apply them properly on statements other than a SELECT (for
-    example an UPDATE
-    that used data from a view didn't work).
-</Para>
-
-<Para>
-    During that time, development moved on and many features were
-    added to the parser and optimizer. The rule system got more and more
-    out of sync with their capabilities and it became harder and harder
-    to start fixing it. Thus, no one did.
-</Para>
-
-<Para>
-    For 6.4, someone locked the door, took a deep breath and shuffled
-    that damned thing up. What came out was a rule system with the
-    capabilities described in this document. But there are still some
-    constructs not handled and some where it fails due to 
-    things that are currently not
-    supported by the <ProductName>Postgres</ProductName> query
-    optimizer.
-
-    <ItemizedList>
-        <ListItem>
-	<Para>
-	    Views with aggregate columns have bad problems. Aggregate
-	    expressions in qualifications must be used in subselects.
-	    Currently it is not possible to do a join of two views,
-	    each having an aggregate column, and compare the two aggregate values
-	    in the qualification. In the meantime it is possible to
-	    put these aggregate expressions into functions with
-	    the appropriate arguments and use
-	    them in the view definition.
-	</Para>
-        </ListItem>
-
-        <ListItem>
-	<Para>
-	    Views of unions are currently not supported. Well it's easy
-	    to rewrite a simple SELECT into a union. But it is a little
-	    difficult if the view is part of a join doing an update.
-	</Para>
-        </ListItem>
-
-        <ListItem>
-	<Para>
-	    ORDER BY clauses in view definitions aren't supported.
-	</Para>
-        </ListItem>
-
-        <ListItem>
-	<Para>
-	    DISTINCT isn't supported in view definitions.
-	</Para>
-        </ListItem>
-    </ItemizedList>
-
-    There is no good reason why the optimizer should not
-    handle parsetree constructs that the parser could never produce
-    due to limitations in the <Acronym>SQL</Acronym> syntax.
-    The author hopes that these items disappear in the future.
-</Para>
-</Sect3>
 </Sect2>

 <Sect2>
-<Title>Implementation Side Effects</Title>
+<Title>What about updating a view?</Title>

 <Para>
-    Using the described rule system to implement views has a funny
-    side effect. The following does not seem to work:
-
-<ProgramListing>
-    al_bundy=> INSERT INTO shoe (shoename, sh_avail, slcolor)
-    al_bundy->     VALUES ('sh5', 0, 'black');
-    INSERT 20128 1
-    al_bundy=> SELECT shoename, sh_avail, slcolor FROM shoe_data;
-    shoename  |sh_avail|slcolor   
-    ----------+--------+----------
-    sh1       |       2|black     
-    sh3       |       4|brown     
-    sh2       |       0|black     
-    sh4       |       3|brown     
-    (4 rows)
-</ProgramListing>
-
-    The interesting thing is that the return code for INSERT gave
-    us an object ID and told that 1 row has been inserted.
-    But it doesn't appear in <Filename>shoe_data</Filename>.
-    Looking into the database
-    directory we can see, that the database file for the
-    view relation <Filename>shoe</Filename> seems now to have
-    a data block. And that is definitely the case.
-</Para>
-
-<Para>
-    We can also issue a DELETE and if it does not have
-    a qualification, it tells us that rows have been deleted
-    and the next vacuum run will reset the file to zero size.
-</Para>
-
-<Para>
-    The reason for that behaviour is, that the parsetree for the
-    INSERT does not reference the <Filename>shoe</Filename> relation
-    in any variable. The targetlist contains only constant values.
-    So there is no rule to apply and it goes
-    down unchanged into execution and the row is inserted. And
-    so for the DELETE.
+    What happens if a view is named as the target relation for an INSERT,
+    UPDATE, or DELETE?  After doing the substitutions described above,
+    we will have a querytree in which the resultrelation points at a
+    subquery rangetable entry.  This will not work, so the rewriter throws
+    an error if it sees it has produced such a thing.
 </Para>

 <Para>
@@ -977,7 +850,7 @@
 <Title>Rules on INSERT, UPDATE and DELETE</Title>

 <Sect2>
-<Title>Differences to View Rules</Title>
+<Title>Differences from View Rules</Title>

 <Para>
    Rules that are defined ON INSERT, UPDATE and DELETE are
@@ -1071,7 +944,7 @@
 	    <ListItem>
 	    <Para>
 	        The parsetree from the rule action where the
-		original parsetrees qualification has been added.
+		original parsetree's qualification has been added.
 	    </Para>
 	    </ListItem>
 	</ItemizedList>
@@ -1085,7 +958,7 @@
 	    <ListItem>
 	    <Para>
 	        The parsetree from the rule action where the
-		original parsetrees qualification has been added.
+		original parsetree's qualification has been added.
 	    </Para>
 	    </ListItem>
 	</ItemizedList>
@@ -1099,7 +972,7 @@
 	    <ListItem>
 	    <Para>
 	        The parsetree from the rule action where the rule
-		qualification and the original parsetrees 
+		qualification and the original parsetree's 
 		qualification have been added.
 	    </Para>
 	    </ListItem>
@@ -1114,7 +987,7 @@
 	    <ListItem>
 	    <Para>
 	        The parsetree from the rule action where the rule
-		qualification and the original parsetrees 
+		qualification and the original parsetree's 
 		qualification have been added.
 	    </Para>
 	    </ListItem>
@@ -1133,7 +1006,7 @@
 <Para>
    Finally, if the rule is not INSTEAD, the unchanged original parsetree is
    added to the list. Since only qualified INSTEAD rules already add the
-    original parsetree, we end up with a total maximum of two parsetrees
+    original parsetree, we end up with either one or two output parsetrees
    for a rule with one action.
 </Para>

@@ -1155,19 +1028,26 @@
    rangetable entries for NEW and OLD, some substitutions have to be made
    before they can be used. For any reference to NEW, the targetlist of
    the original query is searched for a corresponding entry. If found,
-    that entries expression is placed into the reference. Otherwise
-    NEW means the same as OLD. Any reference to OLD is replaced by a
+    that entry's expression replaces the reference. Otherwise
+    NEW means the same as OLD (for an UPDATE) or is replaced by NULL
+    (for an INSERT). Any reference to OLD is replaced by a
    reference to the rangetable entry which is the resultrelation.
 </Para>

+<Para>
+    After we are done applying update rules, we apply view rules to the
+    produced parsetree(s).  Views cannot insert new update actions so
+    there is no need to apply update rules to the output of view rewriting.
+</Para>
+
 <Sect3>
 <Title>A First Rule Step by Step</Title>

 <Para>
    We want to trace changes to the sl_avail column in the
    <Filename>shoelace_data</Filename> relation. So we setup a
-    log table and a rule that writes us entries every time
-    and UPDATE is performed on <Filename>shoelace_data</Filename>.
+    log table and a rule that conditionally writes a log entry when
+    an UPDATE is performed on <Filename>shoelace_data</Filename>.

 <ProgramListing>
    CREATE TABLE shoelace_log (
@@ -1193,9 +1073,10 @@
    is a datetime and tries to make a constant from it - with success.
    So a constant datetime value would be stored in the rule action
    and all log entries would have the time of the CREATE RULE statement.
-    Not exactly what we want. The casting causes that the parser
-    constructs a datetime('now'::text) from it and this will be 
-    evaluated when the rule is executed.
+    Not exactly what we want. The casting causes the parser to
+    construct a datetime('now'::text) expression and this will be 
+    evaluated when the rule is executed.  (Another way to do this is to
+    use the function now() instead of a literal constant.)
 </Para>

 <Para>
@@ -1237,64 +1118,60 @@
    and one action

 <ProgramListing>
-    INSERT INTO shoelace_log SELECT 
+    INSERT INTO shoelace_log VALUES(
           *NEW*.sl_name, *NEW*.sl_avail,
-           getpgusername(), datetime('now'::text)
-      FROM shoelace_data *NEW*, shoelace_data *OLD*,
-           shoelace_log shoelace_log;
-</ProgramListing>
-
-    Don't trust the output of the pg_rules system view. It specially
-    handles the situation that there are only references to NEW
-    and OLD in the INSERT and outputs the VALUES format of INSERT.
-    In fact there is no difference between an INSERT ... VALUES
-    and an INSERT ... SELECT on parsetree level. They both have
-    rangetables, targetlists and maybe qualifications etc. The
-    optimizer later decides, if to create an execution plan of
-    type result, seqscan, indexscan, join or whatever for that
-    parsetree. If there are no references to
-    rangetable entries leftin the parsetree , it becomes
-    a result execution plan
-    (the INSERT ... VALUES version). The rule action above can
-    truely result in both variants.
+           getpgusername(), datetime('now'::text))
+      FROM shoelace_data *NEW*, shoelace_data *OLD*;
+</ProgramListing>
+
+    This is a little strange-looking since you can't normally write
+    INSERT ... VALUES ... FROM.  The FROM clause here is just to indicate
+    that there are rangetable entries in the parsetree for *NEW* and *OLD*.
+    These are needed so that they can be referenced by variables in the
+    INSERT command's querytree.
 </Para>

 <Para>
    The rule is a qualified non-INSTEAD rule, so the rule system
-    has to return two parsetrees. The modified rule action and the original
+    has to return two parsetrees: the modified rule action and the original
    parsetree. In the first step the rangetable of the original query is
-    incorporated into the rules action parsetree. This results in
+    incorporated into the rule's action parsetree. This results in

 <ProgramListing>
-    INSERT INTO shoelace_log SELECT 
-           *NEW*.sl_name, *NEW*.sl_avai,
-           getpgusername(), datetime('now'::text)
-      FROM <FirstTerm>shoelace_data shoelace_data</FirstTerm>, shoelace_data *NEW*,
-           shoelace_data *OLD*, shoelace_log shoelace_log;
+    INSERT INTO shoelace_log VALUES(
+           *NEW*.sl_name, *NEW*.sl_avail,
+           getpgusername(), datetime('now'::text))
+      FROM shoelace_data *NEW*, shoelace_data *OLD*,
+           <FirstTerm>shoelace_data shoelace_data</FirstTerm>;
 </ProgramListing>

    In step 2 the rule qualification is added to it, so the result set
    is restricted to rows where sl_avail changes.

 <ProgramListing>
-    INSERT INTO shoelace_log SELECT 
-           *NEW*.sl_name, *NEW*.sl_avai,
-           getpgusername(), datetime('now'::text)
-      FROM shoelace_data shoelace_data, shoelace_data *NEW*,
-           shoelace_data *OLD*, shoelace_log shoelace_log
+    INSERT INTO shoelace_log VALUES(
+           *NEW*.sl_name, *NEW*.sl_avail,
+           getpgusername(), datetime('now'::text))
+      FROM shoelace_data *NEW*, shoelace_data *OLD*,
+           shoelace_data shoelace_data
     <FirstTerm>WHERE int4ne(*NEW*.sl_avail, *OLD*.sl_avail)</FirstTerm>;
 </ProgramListing>

-    In step 3 the original parsetrees qualification is added,
+    This is even stranger-looking, since INSERT ... VALUES doesn't have
+    a WHERE clause either, but the planner and executor will have no
+    difficulty with it.  They need to support this same functionality
+    anyway for INSERT ... SELECT.
+
+    In step 3 the original parsetree's qualification is added,
    restricting the resultset further to only the rows touched
    by the original parsetree.

 <ProgramListing>
-    INSERT INTO shoelace_log SELECT 
-           *NEW*.sl_name, *NEW*.sl_avai,
-           getpgusername(), datetime('now'::text)
-      FROM shoelace_data shoelace_data, shoelace_data *NEW*,
-           shoelace_data *OLD*, shoelace_log shoelace_log
+    INSERT INTO shoelace_log VALUES(
+           *NEW*.sl_name, *NEW*.sl_avail,
+           getpgusername(), datetime('now'::text))
+      FROM shoelace_data *NEW*, shoelace_data *OLD*,
+           shoelace_data shoelace_data
     WHERE int4ne(*NEW*.sl_avail, *OLD*.sl_avail)
       <FirstTerm>AND bpchareq(shoelace_data.sl_name, 'sl7')</FirstTerm>;
 </ProgramListing>
@@ -1304,34 +1181,35 @@
    from the result relation.

 <ProgramListing>
-    INSERT INTO shoelace_log SELECT 
+    INSERT INTO shoelace_log VALUES(
           <FirstTerm>shoelace_data.sl_name</FirstTerm>, <FirstTerm>6</FirstTerm>,
-           getpgusername(), datetime('now'::text)
-      FROM shoelace_data shoelace_data, shoelace_data *NEW*,
-           shoelace_data *OLD*, shoelace_log shoelace_log
+           getpgusername(), datetime('now'::text))
+      FROM shoelace_data *NEW*, shoelace_data *OLD*,
+           shoelace_data shoelace_data
     WHERE int4ne(<FirstTerm>6</FirstTerm>, *OLD*.sl_avail)
       AND bpchareq(shoelace_data.sl_name, 'sl7');
 </ProgramListing>

-    Step 5 replaces OLD references into resultrelation references.
+    Step 5 changes OLD references into resultrelation references.

 <ProgramListing>
-    INSERT INTO shoelace_log SELECT 
+    INSERT INTO shoelace_log VALUES(
           shoelace_data.sl_name, 6,
-           getpgusername(), datetime('now'::text)
-      FROM shoelace_data shoelace_data, shoelace_data *NEW*,
-           shoelace_data *OLD*, shoelace_log shoelace_log
+           getpgusername(), datetime('now'::text))
+      FROM shoelace_data *NEW*, shoelace_data *OLD*,
+           shoelace_data shoelace_data
     WHERE int4ne(6, <FirstTerm>shoelace_data.sl_avail</FirstTerm>)
       AND bpchareq(shoelace_data.sl_name, 'sl7');
 </ProgramListing>

-    That's it. So reduced to the max the return from the rule system
+    That's it.  Since the rule is not INSTEAD, we also output the
+    original parsetree.  In short, the output from the rule system
    is a list of two parsetrees that are the same as the statements:

 <ProgramListing>
-    INSERT INTO shoelace_log SELECT
+    INSERT INTO shoelace_log VALUES(
           shoelace_data.sl_name, 6,
-           getpgusername(), 'now'
+           getpgusername(), datetime('now'::text))
      FROM shoelace_data
     WHERE 6 != shoelace_data.sl_avail
       AND shoelace_data.sl_name = 'sl7';
@@ -1341,31 +1219,29 @@
 </ProgramListing>

    These are executed in this order and that is exactly what
-    the rule defines. The subtitutions and the qualifications
-    added ensure, that if the original query would be an
+    the rule defines. The substitutions and the qualifications
+    added ensure that if the original query would be, say,

 <ProgramListing>
    UPDATE shoelace_data SET sl_color = 'green'
     WHERE sl_name = 'sl7';
 </ProgramListing>

-    No log entry would get written because due to the fact that this
+    no log entry would get written.  This
    time the original parsetree does not contain a targetlist
-    entry for sl_avail, NEW.sl_avail will get replaced by
+    entry for sl_avail, so NEW.sl_avail will get replaced by
    shoelace_data.sl_avail resulting in the extra query

 <ProgramListing>
-    INSERT INTO shoelace_log SELECT
+    INSERT INTO shoelace_log VALUES(
           shoelace_data.sl_name, <FirstTerm>shoelace_data.sl_avail</FirstTerm>,
-           getpgusername(), 'now'
+           getpgusername(), 'now')
      FROM shoelace_data
     WHERE <FirstTerm>shoelace_data.sl_avail</FirstTerm> != shoelace_data.sl_avail
       AND shoelace_data.sl_name = 'sl7';
 </ProgramListing>

-    and that qualification will never be true. Since the is no
-    difference on parsetree level between an INSERT ... SELECT,
-    and an INSERT ... VALUES, it will also
+    and that qualification will never be true. It will also
    work if the original query modifies multiple rows. So if Al
    would issue the command

@@ -1410,8 +1286,8 @@

 <Para>
    A simple way to protect view relations from the mentioned
-    possibility that someone can INSERT, UPDATE and DELETE
-    invisible data on them is to let those parsetrees get
+    possibility that someone can try to INSERT, UPDATE and DELETE
+    on them is to let those parsetrees get
    thrown away. We create the rules

 <ProgramListing>
@@ -1434,10 +1310,10 @@
    <Note>
    <Title>Note</Title>
    <Para>
-    This fact might irritate frontend applications because
+    This way might irritate frontend applications because
    absolutely nothing happened on the database and thus, the
    backend will not return anything for the query. Not
-    even a PGRES_EMPTY_QUERY or so will be available in libpq.
+    even a PGRES_EMPTY_QUERY will be available in libpq.
    In psql, nothing happens. This might change in the future.
    </Para>
    </Note>
@@ -1482,7 +1358,7 @@
    we don't want him to manually update the shoelace view.
    Instead we setup two little tables, one where he can
    insert the items from the partlist and one with a special
-    trick. The create commands for anything are:
+    trick. The create commands for these are:

 <ProgramListing>
    CREATE TABLE shoelace_arrive (
@@ -1514,7 +1390,7 @@
    (3 rows)
 </ProgramListing>

-    is exactly that what's on the part list. We take a quick look
+    is exactly what's on the part list. We take a quick look
    at the current data,
    
 <ProgramListing>
@@ -1565,7 +1441,7 @@
 </ProgramListing>

    It's a long way from the one INSERT ... SELECT to these
-    results. And it's description will be the last in this
+    results. And its description will be the last in this
    document (but not the last example :-). First there was the parsers output

 <ProgramListing>
@@ -1606,7 +1482,7 @@
 </ProgramListing>

    Again it's an INSTEAD rule and the previous parsetree is trashed.
-    Note that this query sill uses the view <Filename>shoelace</Filename>
+    Note that this query still uses the view <Filename>shoelace</Filename>.
    But the rule system isn't finished with this loop so it continues
    and applies the rule '_RETshoelace' on it and we get

@@ -1688,7 +1564,7 @@
    There is a little detail that's a bit ugly. Looking at
    the two queries turns out, that the <Filename>shoelace_data</Filename>
    relation appears twice in the rangetable where it could definitely
-    be reduced to one. The optimizer does not handle it and so the
+    be reduced to one. The planner does not handle it and so the
    execution plan for the rule systems output of the INSERT will be

 <ProgramListing>
@@ -1725,7 +1601,7 @@ Merge Join

 <Para>
    A final demonstration of the <ProductName>Postgres</ProductName>
-    rule system and it's power. There is a cute blonde that
+    rule system and its power. There is a cute blonde that
    sells shoelaces. And what Al could never realize, she's not
    only cute, she's smart too - a little too smart. Thus, it
    happens from time to time that Al orders shoelaces that
@@ -1752,7 +1628,7 @@ Merge Join
            (SELECT shoename FROM shoe WHERE slcolor = sl_color);
 </ProgramListing>

-    It's output is
+    Its output is

 <ProgramListing>
    al_bundy=> SELECT * FROM shoelace_obsolete;
@@ -1840,8 +1716,8 @@ Merge Join
    rewrite rules that are defined for it.
    The <ProductName>Postgres</ProductName> rule system changes the
    behaviour of the default access control system. Relations that
-    are used due to rules get checked during the rewrite against the
-    permissions of the relation owner, the rule is defined on.
+    are used due to rules get checked against the
+    permissions of the rule owner, not the user invoking the rule.
    This means, that a user does only need the required permissions
    for the tables/views he names in his queries. 
 </Para>
@@ -1865,8 +1741,9 @@ Merge Join
    that only entries where private is false are wanted. Since the
    user is the owner of phone_number, the read access to phone_data
    is now checked against his permissions and the query is considered
-    granted. The check for accessing phone_number is still performed,
-    so nobody than the secretary can use it.
+    granted. The check for accessing phone_number is also performed,
+    but this is done against the invoking user, so nobody but the user and the
+    secretary can use it.
 </Para>

 <Para>
@@ -1897,7 +1774,7 @@ Merge Join
    of the previous section, the owner of the tables in Al's database
    could GRANT SELECT, INSERT, UPDATE and DELETE on the shoelace view to al.
    But only SELECT on shoelace_log. The rule action to write log entries
-    will still be executed successfull. And Al could see the log entries.
+    will still be executed successfully. And Al could see the log entries.
    But he cannot create fake entries, nor could he manipulate or remove
    existing ones.

@@ -1999,7 +1876,7 @@ Merge Join
                           AND software.hostname = computer.hostname;
 </ProgramListing>

-    Since there are appropriate indices setup, the optimizer
+    Since there are appropriate indices setup, the planner
    will create a plan of

 <ProgramListing>
@@ -2041,7 +1918,7 @@ Merge Join
      ->  Index Scan using soft_hostidx on software
 </ProgramListing>

-    This shows, that the optimizer does not realize that the
+    This shows, that the planner does not realize that the
    qualification for the hostname on computer could also be
    used for an index scan on software when there are
    multiple qualification expressions combined with AND, what
@@ -2092,7 +1969,7 @@ Merge Join
    change of an attribute if an action should be performed or
    not. In <ProductName>Postgres</ProductName> version 6.4, the
    attribute specification for rule events is disabled (it will have
-    it's comeback latest in 6.5, maybe earlier 
+    its comeback latest in 6.5, maybe earlier 
    - stay tuned). So for now the only way to
    create a rule as in the shoelace_log example is to do it with
    a rule qualification. That results in an extra query that is
@@ -2101,10 +1978,10 @@ Merge Join
    of the initial query. When this is enabled again, it will be
    one more advantage of rules over triggers. Optimization of
    a trigger must fail by definition in this case, because the
-    fact that it's actions will only be done when a specific attribute
-    is updated is hidden in it's functionality. The definition of
+    fact that its actions will only be done when a specific attribute
+    is updated is hidden in its functionality. The definition of
    a trigger only allows to specify it on row level, so whenever a
-    row is touched, the trigger must be called to make it's
+    row is touched, the trigger must be called to make its
    decision. The rule system will know it by looking up the
    targetlist and will suppress the additional query completely
    if the attribute isn't touched. So the rule, qualified or not,
@@ -2112,9 +1989,9 @@ Merge Join
 </Para>

 <Para>
-    Rules will only be significant slower than triggers if
+    Rules will only be significantly slower than triggers if
    their actions result in large and bad qualified joins, a situation
-    where the optimizer fails. They are a big hammer.
+    where the planner fails. They are a big hammer.
    Using a big hammer without caution can cause big damage. But
    used with the right touch, they can hit any nail on the head.
 </Para>

--- a/doc/src/sgml/sql.sgml
+++ b/doc/src/sgml/sql.sgml
 <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/sql.sgml,v 1.14 2000/12/09 22:59:25 momjian Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/sql.sgml,v 1.15 2000/12/12 05:07:58 tgl Exp $
 -->

 <chapter id="sql">
@@ -854,17 +854,17 @@ A &lt; B + 3.
     used to retrieve data. The syntax is:

     <synopsis>
-SELECT [ALL|DISTINCT]
-    { * | <replaceable class="parameter">expr_1</replaceable> [AS <replaceable class="parameter">c_alias_1</replaceable>] [, ... 
-     [, <replaceable class="parameter">expr_k</replaceable> [AS <replaceable class="parameter">c_alias_k</replaceable>]]]}
-    FROM <replaceable class="parameter">table_name_1</replaceable> [<replaceable class="parameter">t_alias_1</replaceable>] 
-     [, ... [, <replaceable class="parameter">table_name_n</replaceable> [<replaceable class="parameter">t_alias_n</replaceable>]]]
-    [WHERE <replaceable class="parameter">condition</replaceable>]
-    [GROUP BY <replaceable class="parameter">name_of_attr_i</replaceable> 
-     [,... [, <replaceable class="parameter">name_of_attr_j</replaceable>]] [HAVING <replaceable class="parameter">condition</replaceable>]]
-    [{UNION [ALL] | INTERSECT | EXCEPT} SELECT ...]
-    [ORDER BY <replaceable class="parameter">name_of_attr_i</replaceable> [ASC|DESC] 
-     [, ... [, <replaceable class="parameter">name_of_attr_j</replaceable> [ASC|DESC]]]];
+SELECT [ ALL | DISTINCT [ ON ( <replaceable class="PARAMETER">expression</replaceable> [, ...] ) ] ]
+    * | <replaceable class="PARAMETER">expression</replaceable> [ AS <replaceable class="PARAMETER">output_name</replaceable> ] [, ...]
+    [ INTO [ TEMPORARY | TEMP ] [ TABLE ] <replaceable class="PARAMETER">new_table</replaceable> ]
+    [ FROM <replaceable class="PARAMETER">from_item</replaceable> [, ...] ]
+    [ WHERE <replaceable class="PARAMETER">condition</replaceable> ]
+    [ GROUP BY <replaceable class="PARAMETER">expression</replaceable> [, ...] ]
+    [ HAVING <replaceable class="PARAMETER">condition</replaceable> [, ...] ]
+    [ { UNION | INTERSECT | EXCEPT [ ALL ] } <replaceable class="PARAMETER">select</replaceable> ]
+    [ ORDER BY <replaceable class="PARAMETER">expression</replaceable> [ ASC | DESC | USING <replaceable class="PARAMETER">operator</replaceable> ] [, ...] ]
+    [ FOR UPDATE [ OF <replaceable class="PARAMETER">class_name</replaceable> [, ...] ] ]
+    [ LIMIT { <replaceable class="PARAMETER">count</replaceable> | ALL } [ { OFFSET | , } <replaceable class="PARAMETER">start</replaceable> ]]
     </synopsis>
    </para>

@@ -1037,11 +1037,13 @@ SELECT S.SNAME, P.PNAME
     <para>
      <acronym>SQL</acronym> provides aggregate operators
      (e.g. AVG, COUNT, SUM, MIN, MAX) that
-      take the name of an attribute as an argument. The value of the
-      aggregate operator is calculated over all values of the specified
-      attribute (column) of the whole table. If groups are specified in the
-      query the calculation is done only over the values of a group (see next
-      section).
+      take an expression as argument.  The expression is evaluated at
+      each row that satisfies the WHERE clause, and the aggregate operator
+      is calculated over this set of input values.  Normally, an aggregate
+      delivers a single result for a whole SELECT statement.  But if
+      grouping is specified in the query, then a separate calculation is done
+      over the rows of each group, and an aggregate result is delivered per
+      group (see next section).

      <example>
       <title id="aggregates-example">Aggregates</title>
@@ -1094,11 +1096,11 @@ SELECT COUNT(PNO)
     <para>
      <acronym>SQL</acronym> allows one to partition the tuples of a table
      into groups. Then the
-      aggregate operators described above can be applied to the groups
-      (i.e. the value of the aggregate operator is no longer calculated over
+      aggregate operators described above can be applied to the groups ---
+      i.e. the value of the aggregate operator is no longer calculated over
      all the values of the specified column but over all values of a
      group. Thus the aggregate operator is evaluated individually for every
-      group.) 
+      group.
     </para>

     <para>
@@ -1179,19 +1181,26 @@ SELECT S.SNO, S.SNAME, COUNT(SE.PNO)

       <para>
 	In our example we got four groups and now we can apply the aggregate
-	operator COUNT to every group leading to the total result of the query
+	operator COUNT to every group leading to the final result of the query
 	given above.
       </para>
      </example>
     </para>

     <para>
-      Note that for the result of a query using GROUP BY and aggregate
-      operators to make sense the attributes grouped by must also appear in
-      the target list. All further attributes not appearing in the GROUP
-      BY clause can only be selected by using an aggregate function. On
-      the other hand you can not use aggregate functions on attributes
-      appearing in the GROUP BY clause.
+      Note that for a query using GROUP BY and aggregate
+      operators to make sense the target list can only refer directly to
+      the attributes being grouped by.  Other attributes may only be used
+      inside the argument of an aggregate function.  Otherwise there would
+      not be a unique value to associate with the other attributes.
+     </para>
+
+     <para>
+      Also observe that it makes no sense to ask for an aggregate of an
+      aggregate, eg, AVG(MAX(sno)), because a SELECT only does one pass
+      of grouping and aggregation.  You can get a result of this kind by
+      using a temporary table or a sub-SELECT in the FROM clause to
+      do the first level of aggregation.
     </para>
    </sect3>

@@ -1201,11 +1210,14 @@ SELECT S.SNO, S.SNAME, COUNT(SE.PNO)
     <para>
      The HAVING clause works much like the WHERE clause and is used to
      consider only those groups satisfying the qualification given in the
-      HAVING clause. The expressions allowed in the HAVING clause must
-      involve aggregate functions. Every expression using only  plain
-      attributes belongs to the WHERE clause. On the other hand every
-      expression involving an aggregate function must be put to the HAVING
-      clause. 
+      HAVING clause.  Essentially, WHERE filters out unwanted input rows
+      before grouping and aggregation are done, whereas HAVING filters out
+      unwanted group rows post-GROUP.  Therefore, WHERE cannot refer to the
+      results of aggregate functions.  On the other hand, there's no point
+      in writing a HAVING condition that doesn't involve an aggregate
+      function!  If your condition doesn't involve aggregates, you might
+      as well write it in WHERE, and thereby avoid the computation of
+      aggregates for groups that you're just going to throw away anyway.

      <example>
       <title id="having-example">Having</title>
@@ -1280,7 +1292,9 @@ SELECT *
 	SELECT. For every tuple of the outer SELECT the inner SELECT has to be
 	evaluated. After every evaluation we know the price of the tuple named
 	'Screw' and we can check if the price of the actual tuple is
-	greater. 
+	greater.  (Actually, in this example the inner query need only be
+	evaluated once, since it does not depend on the state of the outer
+	query.)
       </para>

       <para>
@@ -1299,9 +1313,43 @@ SELECT *
       <para>
 	In our example the result will be empty because every supplier sells
 	at least one part. Note that we use S.SNO from the outer SELECT within
-	the WHERE clause of the inner SELECT. As described above the subquery
-	is evaluated for every tuple from the outer query i.e. the value for
-	S.SNO is always taken from the actual tuple of the outer SELECT. 
+	the WHERE clause of the inner SELECT. Here the subquery must be
+	evaluated for every tuple from the outer query, i.e. the value for
+	S.SNO is always taken from the current tuple of the outer SELECT. 
+       </para>
+      </example>
+     </para>
+    </sect3>
+
+    <sect3>
+     <title>Subqueries in FROM</title>
+
+     <para>
+      A somewhat different way of using subqueries is to put them in the
+      FROM clause.  This is a useful feature because a subquery of this
+      kind can output multiple columns and rows, whereas a subquery used
+      in an expression must deliver just a single result.  It also lets
+      us get more than one round of grouping/aggregation without resorting
+      to a temporary table.
+
+      <example>
+       <title id="subselect-in-from-example">Subselect in FROM</title>
+
+       <para>
+        If we want to know the highest average part price among all our
+	suppliers, we can't write MAX(AVG(PRICE)), but we can write:
+
+	<programlisting>
+SELECT MAX(subtable.avgprice)
+    FROM (SELECT AVG(P.PRICE) AS avgprice
+          FROM SUPPLIER S, PART P, SELLS SE
+          WHERE S.SNO = SE.SNO AND
+                P.PNO = SE.PNO
+          GROUP BY S.SNO) subtable;
+	</programlisting>
+
+	The subquery returns one row per supplier (because of its GROUP BY)
+	and then we aggregate over those rows in the outer query.
       </para>
      </example>
     </para>
@@ -1311,7 +1359,7 @@ SELECT *
     <title>Union, Intersect, Except</title>

     <para>
-      These operations calculate the union, intersect and set theoretic
+      These operations calculate the union, intersection and set theoretic
      difference of the tuples derived by two subqueries.

      <example>
@@ -1341,7 +1389,7 @@ gives the result:
       </para>

       <para>
-	Here an example for INTERSECT:
+	Here is an example for INTERSECT:

 	<programlisting>
 SELECT S.SNO, S.SNAME, S.CITY
@@ -1361,7 +1409,7 @@ SELECT S.SNO, S.SNAME, S.CITY
  2  | Jones | Paris
 	</programlisting>

-	The only tuple returned by both parts of the query is the one having $SNO=2$.
+	The only tuple returned by both parts of the query is the one having SNO=2.
       </para>

       <para>
@@ -1469,14 +1517,11 @@ CREATE TABLE SELLS
       <listitem>
 	<para>
 	 DECIMAL (<replaceable class="parameter">p</replaceable>[,<replaceable class="parameter">q</replaceable>]):
-	 signed packed decimal number of
+	 signed packed decimal number of up to
 	 <replaceable class="parameter">p</replaceable>
-	 digits precision with assumed
+	 digits, with
 	 <replaceable class="parameter">q</replaceable>
-	 of them right to the decimal point.
-
-(15 &ge; <replaceable class="parameter">p</replaceable> &ge; <replaceable class="parameter">q</replaceable> &ge; 0).
-
+	 digits to the right of the decimal point.
 	 If <replaceable class="parameter">q</replaceable>
 	 is omitted it is assumed to be 0.
 	</para>
@@ -1547,8 +1592,8 @@ CREATE INDEX I ON SUPPLIER (SNAME);
       <para>
 	The created index is maintained automatically, i.e. whenever a new tuple
 	is inserted into the relation SUPPLIER the index I is adapted. Note
-	that the only changes a user can percept when an index is present
-	are an increased speed.
+	that the only changes a user can perceive when an index is present
+	are increased speed for SELECT and decreases in speed of updates.
       </para>
      </example>
     </para>