Complete TODO item:

o -Allow dump/load of CSV format This adds new keywords to COPY and \copy: CSV - enable CSV mode (comma separated variable) QUOTE - specify quote character ESCAPE - specify escape character FORCE - force quoting of specified column LITERAL - suppress null comparison for columns Doc changes included. Regression updates coming from Andrew.

Complete TODO item:
o -Allow dump/load of CSV format This adds new keywords to COPY and \copy: CSV - enable CSV mode (comma separated variable) QUOTE - specify quote character ESCAPE - specify escape character FORCE - force quoting of specified column LITERAL - suppress null comparison for columns Doc changes included. Regression updates coming from Andrew.
862b20b3 · Bruce Momjian · 83ab1c04 · 862b20b3 · 862b20b3 · 862b20b3
Commit 862b20b3 authored Apr 19, 2004 by Bruce Momjian
7 changed files
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.55 2003/12/13 23:59:07 neilc Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/copy.sgml,v 1.56 2004/04/19 17:22:30 momjian Exp $
 PostgreSQL documentation
 -->
@@ -26,7 +26,10 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
          [ BINARY ] 
          [ OIDS ]
          [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
-          [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ] ]
+          [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
+          [ CSV [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ] 
+                [ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ]
+                [ LITERAL <replaceable class="parameter">column</replaceable> [, ...] ]
 COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable class="parameter">column</replaceable> [, ...] ) ]
    TO { '<replaceable class="parameter">filename</replaceable>' | STDOUT }
@@ -34,7 +37,10 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
          [ BINARY ]
          [ OIDS ]
          [ DELIMITER [ AS ] '<replaceable class="parameter">delimiter</replaceable>' ]
-          [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ] ]
+          [ NULL [ AS ] '<replaceable class="parameter">null string</replaceable>' ]
+          [ CSV [ QUOTE [ AS ] '<replaceable class="parameter">quote</replaceable>' ] 
+                [ ESCAPE [ AS ] '<replaceable class="parameter">escape</replaceable>' ]
+                [ FORCE <replaceable class="parameter">column</replaceable> [, ...] ]
 </synopsis>
 </refsynopsisdiv>
@@ -146,7 +152,8 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
    <listitem>
     <para>
      The single character that separates columns within each row
-      (line) of the file.  The default is a tab character.
+      (line) of the file.  The default is a tab character in text mode,
+      a comma in <literal>CSV</> mode.
     </para>
    </listitem>
   </varlistentry>
@@ -156,20 +163,86 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
    <listitem>
     <para>
      The string that represents a null value. The default is
-      <literal>\N</literal> (backslash-N). You might prefer an empty
+      <literal>\N</literal> (backslash-N) in text mode, and a empty
-      string, for example.
+      value with no quotes in <literal>CSV</> mode. You might prefer an
+      empty string even in text mode for cases where you don't want to
+      distinguish nulls from empty strings.
     </para>
     <note>
      <para>
-       On a <command>COPY FROM</command>, any data item that matches
+       When using <command>COPY FROM</command>, any data item that matches
       this string will be stored as a null value, so you should make
       sure that you use the same string as you used with
       <command>COPY TO</command>.
      </para>
     </note>
+    </listitem>
+   </varlistentry>
+   <varlistentry>
+    <term><literal>CSV</literal></term>
+    <listitem>
+     <para>
+      Enables Comma Separated Variable (<literal>CSV</>) mode.  (Also called
+      Comma Separated Value).  It sets the default <literal>DELIMITER</> to 
+      comma, and <literal>QUOTE</> and <literal>ESCAPE</> values to 
+      double-quote.
+     </para>
+    </listitem>
+   </varlistentry>
+   <varlistentry>
+    <term><replaceable class="parameter">quote</replaceable></term>
+    <listitem>
+     <para>
+      Specifies the quotation character in <literal>CSV</> mode.
+      The default is double-quote.
+     </para>
    </listitem>
   </varlistentry>
+   <varlistentry>
+    <term><replaceable class="parameter">escape</replaceable></term>
+    <listitem>
+     <para>
+      Specifies the character that should appear before a <literal>QUOTE</>
+      data character value in <literal>CSV</> mode.  The default is the
+      <literal>QUOTE</> value (usually double-quote).
+     </para>
+    </listitem>
+   </varlistentry>
+   <varlistentry>
+    <term><literal>FORCE</></term>
+    <listitem>
+     <para>
+      In <literal>CSV</> <command>COPY TO</> mode, forces quoting
+      to be used for all non-<literal>NULL</> values in each specified 
+      column.  <literal>NULL</> output is never quoted.
+     </para>
+    </listitem>
+   </varlistentry>
+   <varlistentry>
+    <term><literal>LITERAL</></term>
+    <listitem>
+     <para>
+      In <literal>CSV</> <command>COPY FROM</> mode, for each column specified,
+      do not do a <literal>null string</> comparison;  instead load the value 
+      literally.  <literal>QUOTE</> and <literal>ESCAPE</> processing are still
+      performed.
+     </para>
+     <para>
+      If the <literal>null string</> is <literal>''</> (the default 
+      in <literal>CSV</> mode), a missing input value (<literal>delimiter, 
+      delimiter</>), will load as a zero-length string.  <literal>Delimiter, quote, 
+      quote, delimiter</> is always treated as a zero-length string on input.
+     </para>
+    </listitem>
+   </varlistentry>
  </variablelist>
 </refsect1>
@@ -233,6 +306,17 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
    constraints on the destination table. However, it will not invoke rules.
   </para>
+   <para>
+    <command>COPY</command> input and output is affected by
+    <varname>DateStyle </varname>. For portability with other
+    <productname>PostgreSQL</productname> installations which might use
+    non-default <varname>DateStyle</varname> settings,
+    <varname>DateStyle</varname> should be set to <literal>ISO</> before
+    using <command>COPY</>. In <literal>CSV</> mode, use <literal>ISO</>
+    or a <varname>DateStyle</varname> setting appropriate for the
+    external application.
+   </para>
   <para>
    <command>COPY</command> stops operation at the first error. This
    should not lead to problems in the event of a <command>COPY
@@ -253,7 +337,8 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
   <para>
    When <command>COPY</command> is used without the <literal>BINARY</literal> option,
-    the data read or written is a text file with one line per table row.
+    the data read or written is a text file with one line per table row,
+    unless <literal>CSV</> mode is used.
    Columns in a row are separated by the delimiter character.
    The column values themselves are strings generated by the
    output function, or acceptable to the input function, of each
@@ -379,6 +464,63 @@ COPY <replaceable class="parameter">tablename</replaceable> [ ( <replaceable cla
   </para>
  </refsect2>
+  <refsect2>
+   <title>CSV Format</title>
+   <para>
+    This format is used for importing and exporting the Comma
+    Separated Variable (<literal>CSV</>) file format used by many other
+    programs, such as spreadsheets. Instead of the escaping used by
+    <productname>PostgreSQL</productname>'s standard text mode, it
+    produces and recognises the common CSV escaping mechanism.
+   </para>
+   <para>
+    The values in each record are separated by the <literal>DELIMITER</>
+    character. If the value contains the delimiter character, the
+    <literal>QUOTE</> character, the <literal>NULL</> string, a carriage
+    return, or line feed character, then the whole value is prefixed and
+    suffixed by the <literal>QUOTE</> character, and any occurrence
+    within the value of a <literal>QUOTE</> character or the
+    <literal>ESCAPE</> character is preceded by the escape character.
+    You can also use <literal>FORCE</> to force quotes when outputting
+    non-<literal>NULL</> values in specific columns.
+   </para>
+   <para> 
+    In general, the <literal>CSV</> format has no way to distinguish a
+    <literal>NULL</> from an empty string.
+    <productname>PostgreSQL</productname>'s COPY handles this by
+    quoting. A <literal>NULL</> is output as the <literal>NULL</> string 
+    and is not quoted, while a data value matching the <literal>NULL</> string 
+    is quoted. Therefore, using the default settings, a <literal>NULL</> is
+    written as an unquoted empty string, while an empty string is
+    written with double quotes (<literal>""</>). Reading values follows 
+    similar rules. You can use <literal>LITERAL</> to prevent <literal>NULL</>
+    input comparisons for specific columns.
+   </para>
+   <note>
+    <para>
+     CSV mode will both recognize and produce CSV files with quoted
+     values containing embedded carriage returns and line feeds. Thus
+     the files are not strictly one line per table row like text-mode
+     files.
+    </para>
+   </note>
+   <note>
+    <para>
+     Many programs produce strange and occasionally perverse CSV files,
+     so the file format is more a convention than a standard. Thus you
+     might encounter some files that cannot be imported using this
+     mechanism, and <command>COPY</> might produce files that other
+     programs can not process.
+    </para>
+   </note>
+  </refsect2>
  <refsect2>
   <title>Binary Format</title>

--- a/doc/src/sgml/ref/psql-ref.sgml
+++ b/doc/src/sgml/ref/psql-ref.sgml
 <!--
-$PostgreSQL: pgsql/doc/src/sgml/ref/psql-ref.sgml,v 1.110 2004/04/12 15:58:52 momjian Exp $
+$PostgreSQL: pgsql/doc/src/sgml/ref/psql-ref.sgml,v 1.111 2004/04/19 17:22:30 momjian Exp $
 PostgreSQL documentation
 -->
@@ -711,6 +711,10 @@ testdb=>
            [ <literal>oids</literal> ] 
            [ <literal>delimiter [as] </literal> '<replaceable class="parameter">character</replaceable>' ]
            [ <literal>null [as] </literal> '<replaceable class="parameter">string</replaceable>' ]</literal>
+            [ <literal>csv [ quote [as] </literal> '<replaceable class="parameter">character</replaceable>' ]
+                           [ <literal>escape [as] </literal> '<replaceable class="parameter">character</replaceable>' ]
+                           [ <literal>force</> <replaceable class="parameter">column_list</replaceable> ]
+                           [ <literal>literal</> <replaceable class="parameter">column_list</replaceable> ] ]
        </term>
        <listitem>

--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.220 2004/04/15 22:36:03 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.221 2004/04/19 17:22:30 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -70,7 +70,8 @@ typedef enum CopyDest
 typedef enum CopyReadResult
 {
 	NORMAL_ATTR,
-	END_OF_LINE
+	END_OF_LINE,
+	UNTERMINATED_FIELD
 } CopyReadResult;
 /*
@@ -130,15 +131,22 @@ static bool line_buf_converted;
 /* non-export function prototypes */
 static void CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
-	   char *delim, char *null_print);
+	   char *delim, char *null_print, bool csv_mode, char *quote, char *escape,
+	   List *force_atts);
 static void CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
-		 char *delim, char *null_print);
+		 char *delim, char *null_print, bool csv_mode, char *quote, char *escape,
+		 List *literal_atts);
 static bool CopyReadLine(void);
 static char *CopyReadAttribute(const char *delim, const char *null_print,
 							   CopyReadResult *result, bool *isnull);
+static char *CopyReadAttributeCSV(const char *delim, const char *null_print,
+							   char *quote, char *escape,
+							   CopyReadResult *result, bool *isnull);
 static Datum CopyReadBinaryAttribute(int column_no, FmgrInfo *flinfo,
 						Oid typelem, bool *isnull);
 static void CopyAttributeOut(char *string, char *delim);
+static void CopyAttributeOutCSV(char *string, char *delim, char *quote,
+								char *escape, bool force_quote);
 static List *CopyGetAttnums(Relation rel, List *attnamelist);
 static void limit_printout_length(StringInfo buf);
@@ -682,8 +690,15 @@ DoCopy(const CopyStmt *stmt)
 	List	   *attnumlist;
 	bool		binary = false;
 	bool		oids = false;
+	bool        csv_mode = false;
 	char	   *delim = NULL;
+	char	   *quote = NULL;
+	char	   *escape = NULL;
 	char	   *null_print = NULL;
+	List	   *force = NIL;
+	List	   *literal = NIL;
+	List	   *force_atts = NIL;
+	List	   *literal_atts = NIL;
 	Relation	rel;
 	AclMode		required_access = (is_from ? ACL_INSERT : ACL_SELECT);
 	AclResult	aclresult;
@@ -725,6 +740,46 @@ DoCopy(const CopyStmt *stmt)
 						 errmsg("conflicting or redundant options")));
 			null_print = strVal(defel->arg);
 		}
+		else if (strcmp(defel->defname, "csv") == 0)
+		{
+			if (csv_mode)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("conflicting or redundant options")));
+			csv_mode = intVal(defel->arg);
+		}
+		else if (strcmp(defel->defname, "quote") == 0)
+		{
+			if (quote)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("conflicting or redundant options")));
+			quote = strVal(defel->arg);
+		}
+		else if (strcmp(defel->defname, "escape") == 0)
+		{
+			if (escape)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("conflicting or redundant options")));
+			escape = strVal(defel->arg);
+		}
+		else if (strcmp(defel->defname, "force") == 0)
+		{
+			if (force)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("conflicting or redundant options")));
+			force = (List *)defel->arg;
+		}
+		else if (strcmp(defel->defname, "literal") == 0)
+		{
+			if (literal)
+				ereport(ERROR,
+						(errcode(ERRCODE_SYNTAX_ERROR),
+						 errmsg("conflicting or redundant options")));
+			literal = (List *)defel->arg;
+		}
 		else
 			elog(ERROR, "option \"%s\" not recognized",
 				 defel->defname);
@@ -735,6 +790,11 @@ DoCopy(const CopyStmt *stmt)
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("cannot specify DELIMITER in BINARY mode")));
+	if (binary && csv_mode)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("cannot specify CSV in BINARY mode")));
 	if (binary && null_print)
 		ereport(ERROR,
 				(errcode(ERRCODE_SYNTAX_ERROR),
@@ -742,10 +802,92 @@ DoCopy(const CopyStmt *stmt)
 	/* Set defaults */
 	if (!delim)
-		delim = "\t";
+		delim = csv_mode ? "," : "\t";
 	if (!null_print)
-		null_print = "\\N";
+		null_print = csv_mode ? "" : "\\N";
+	if (csv_mode)
+	{
+		if (!quote)
+			quote = "\"";
+		if (!escape)
+			escape = quote;
+	}
+	/*
+	 * Only single-character delimiter strings are supported.
+	 */
+	if (strlen(delim) != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY delimiter must be a single character")));
+	/*
+	 * Check quote
+	 */
+	if (!csv_mode && quote != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY quote available only in CSV mode")));
+	if (csv_mode && strlen(quote) != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY quote must be a single character")));
+	/*
+	 * Check escape
+	 */
+	if (!csv_mode && escape != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY escape available only in CSV mode")));
+	if (csv_mode && strlen(escape) != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY escape must be a single character")));
+	/*
+	 * Check force
+	 */
+	if (!csv_mode && force != NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY force available only in CSV mode")));
+	if (force != NIL && is_from)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY force only available using COPY TO")));
+	/*
+	 * Check literal
+	 */
+	if (!csv_mode && literal != NIL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY literal available only in CSV mode")));
+	if (literal != NIL && !is_from)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY literal only available using COPY FROM")));
+	/*
+	 * Don't allow the delimiter to appear in the null string.
+	 */
+	if (strchr(null_print, delim[0]) != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("COPY delimiter must not appear in the NULL specification")));
+	/*
+	 * Don't allow the csv quote char to appear in the null string.
+	 */
+	if (csv_mode && strchr(null_print, quote[0]) != NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("CSV quote character must not appear in the NULL specification")));
 	/*
 	 * Open and lock the relation, using the appropriate lock type.
@@ -771,22 +913,6 @@ DoCopy(const CopyStmt *stmt)
 				 errhint("Anyone can COPY to stdout or from stdin. "
 					   "psql's \\copy command also works for anyone.")));
-	/*
-	 * Presently, only single-character delimiter strings are supported.
-	 */
-	if (strlen(delim) != 1)
-		ereport(ERROR,
-				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				 errmsg("COPY delimiter must be a single character")));
-	/*
-	 * Don't allow the delimiter to appear in the null string.
-	 */
-	if (strchr(null_print, delim[0]) != NULL)
-		ereport(ERROR,
-				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				 errmsg("COPY delimiter must not appear in the NULL specification")));
 	/*
 	 * Don't allow COPY w/ OIDs to or from a table without them
 	 */
@@ -801,6 +927,52 @@ DoCopy(const CopyStmt *stmt)
 	 */
 	attnumlist = CopyGetAttnums(rel, attnamelist);
+	/*
+	 * Check that FORCE references valid COPY columns
+	 */
+	if (force)
+	{
+		TupleDesc	tupDesc = RelationGetDescr(rel);
+		Form_pg_attribute *attr = tupDesc->attrs;
+		List	   *cur;
+		force_atts = CopyGetAttnums(rel, force);
+		foreach(cur, force_atts)
+		{
+			int			attnum = lfirsti(cur);
+			if (!intMember(attnum, attnumlist))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+						 errmsg("FORCE column \"%s\" not referenced by COPY",
+								NameStr(attr[attnum - 1]->attname))));
+		}
+	}
+	/*
+	 * Check that LITERAL references valid COPY columns
+	 */
+	if (literal)
+	{
+		List	   *cur;
+		TupleDesc	tupDesc = RelationGetDescr(rel);
+		Form_pg_attribute *attr = tupDesc->attrs;
+		literal_atts = CopyGetAttnums(rel, literal);
+		foreach(cur, literal_atts)
+		{
+			int			attnum = lfirsti(cur);
+			if (!intMember(attnum, attnumlist))
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
+						 errmsg("LITERAL column \"%s\" not referenced by COPY",
+								NameStr(attr[attnum - 1]->attname))));
+		}
+	}
 	/*
 	 * Set up variables to avoid per-attribute overhead.
 	 */
@@ -864,7 +1036,8 @@ DoCopy(const CopyStmt *stmt)
 						 errmsg("\"%s\" is a directory", filename)));
 			}
 		}
-		CopyFrom(rel, attnumlist, binary, oids, delim, null_print);
+		CopyFrom(rel, attnumlist, binary, oids, delim, null_print, csv_mode,
+				 quote, escape, literal_atts);
 	}
 	else
 	{							/* copy from database to file */
@@ -926,7 +1099,8 @@ DoCopy(const CopyStmt *stmt)
 						 errmsg("\"%s\" is a directory", filename)));
 			}
 		}
-		CopyTo(rel, attnumlist, binary, oids, delim, null_print);
+		CopyTo(rel, attnumlist, binary, oids, delim, null_print, csv_mode,
+				quote, escape, force_atts);
 	}
 	if (!pipe)
@@ -958,7 +1132,8 @@ DoCopy(const CopyStmt *stmt)
 */
 static void
 CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
-	   char *delim, char *null_print)
+	   char *delim, char *null_print, bool csv_mode, char *quote,
+	   char *escape, List *force_atts)
 {
 	HeapTuple	tuple;
 	TupleDesc	tupDesc;
@@ -967,6 +1142,7 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
 	int			attr_count;
 	Form_pg_attribute *attr;
 	FmgrInfo   *out_functions;
+	bool	   *force_quote;
 	Oid		   *elements;
 	bool	   *isvarlena;
 	char	   *string;
@@ -988,11 +1164,12 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
 	out_functions = (FmgrInfo *) palloc((num_phys_attrs + 1) * sizeof(FmgrInfo));
 	elements = (Oid *) palloc((num_phys_attrs + 1) * sizeof(Oid));
 	isvarlena = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool));
+	force_quote = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool));
 	foreach(cur, attnumlist)
 	{
 		int			attnum = lfirsti(cur);
 		Oid			out_func_oid;
 		if (binary)
 			getTypeBinaryOutputInfo(attr[attnum - 1]->atttypid,
 									&out_func_oid, &elements[attnum - 1],
@@ -1002,6 +1179,11 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
 							  &out_func_oid, &elements[attnum - 1],
 							  &isvarlena[attnum - 1]);
 		fmgr_info(out_func_oid, &out_functions[attnum - 1]);
+		if (intMember(attnum, force_atts))
+			force_quote[attnum - 1] = true;
+		else
+			force_quote[attnum - 1] = false;
 	}
 	/*
@@ -1051,7 +1233,6 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
 	while ((tuple = heap_getnext(scandesc, ForwardScanDirection)) != NULL)
 	{
 		bool		need_delim = false;
 		CHECK_FOR_INTERRUPTS();
 		MemoryContextReset(mycontext);
@@ -1113,7 +1294,15 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
 														   value,
 								  ObjectIdGetDatum(elements[attnum - 1]),
 							Int32GetDatum(attr[attnum - 1]->atttypmod)));
-					CopyAttributeOut(string, delim);
+					if (csv_mode)
+					{
+						CopyAttributeOutCSV(string, delim, quote, escape,
+											(strcmp(string, null_print) == 0 ||
+											force_quote[attnum - 1]));
+					}
+					else
+						CopyAttributeOut(string, delim);
 				}
 				else
 				{
@@ -1148,6 +1337,7 @@ CopyTo(Relation rel, List *attnumlist, bool binary, bool oids,
 	pfree(out_functions);
 	pfree(elements);
 	pfree(isvarlena);
+	pfree(force_quote);
 }
@@ -1243,7 +1433,8 @@ limit_printout_length(StringInfo buf)
 */
 static void
 CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
-		 char *delim, char *null_print)
+		 char *delim, char *null_print, bool csv_mode, char *quote,
+		 char *escape, List *literal_atts)
 {
 	HeapTuple	tuple;
 	TupleDesc	tupDesc;
@@ -1256,9 +1447,10 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
 	Oid		   *elements;
 	Oid			oid_in_element;
 	ExprState **constraintexprs;
+	bool	   *literal_nullstr;
 	bool		hasConstraints = false;
-	int			i;
 	int			attnum;
+	int			i;
 	List	   *cur;
 	Oid			in_func_oid;
 	Datum	   *values;
@@ -1317,6 +1509,7 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
 	defmap = (int *) palloc((num_phys_attrs + 1) * sizeof(int));
 	defexprs = (ExprState **) palloc((num_phys_attrs + 1) * sizeof(ExprState *));
 	constraintexprs = (ExprState **) palloc0((num_phys_attrs + 1) * sizeof(ExprState *));
+	literal_nullstr = (bool *) palloc((num_phys_attrs + 1) * sizeof(bool));
 	for (attnum = 1; attnum <= num_phys_attrs; attnum++)
 	{
@@ -1333,6 +1526,11 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
 							 &in_func_oid, &elements[attnum - 1]);
 		fmgr_info(in_func_oid, &in_functions[attnum - 1]);
+		if (intMember(attnum, literal_atts))
+			literal_nullstr[attnum - 1] = true;
+		else
+			literal_nullstr[attnum - 1] = false;
 		/* Get default info if needed */
 		if (!intMember(attnum, attnumlist))
 		{
@@ -1389,9 +1587,7 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
 	ExecBSInsertTriggers(estate, resultRelInfo);
 	if (!binary)
-	{
 		file_has_oids = oids;	/* must rely on user to tell us this... */
-	}
 	else
 	{
 		/* Read and verify binary header */
@@ -1500,6 +1696,7 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
 			if (file_has_oids)
 			{
+				/* can't be in CSV mode here */
 				string = CopyReadAttribute(delim, null_print,
 										   &result, &isnull);
@@ -1538,14 +1735,27 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
 							 errmsg("missing data for column \"%s\"",
 									NameStr(attr[m]->attname))));
-				string = CopyReadAttribute(delim, null_print,
+				if (csv_mode)
-										   &result, &isnull);
-				if (isnull)
 				{
-					/* we read an SQL NULL, no need to do anything */
+					string = CopyReadAttributeCSV(delim, null_print, quote,
+												  escape, &result, &isnull);
+					if (result == UNTERMINATED_FIELD)
+						ereport(ERROR,
+								(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+								 errmsg("unterminated CSV quoted field")));
 				}
 				else
+					string = CopyReadAttribute(delim, null_print, 
+											   &result, &isnull);
+				if (csv_mode && isnull && literal_nullstr[m])
+				{
+					string = null_print;	/* set to NULL string */
+					isnull = false;
+				}
+ 				/* we read an SQL NULL, no need to do anything */
+				if (!isnull)
 				{
 					copy_attname = NameStr(attr[m]->attname);
 					values[m] = FunctionCall3(&in_functions[m],
@@ -1732,11 +1942,12 @@ CopyFrom(Relation rel, List *attnumlist, bool binary, bool oids,
 	pfree(values);
 	pfree(nulls);
-	if (!binary)
+	pfree(in_functions);
-	{
+	pfree(elements);
-		pfree(in_functions);
+	pfree(defmap);
-		pfree(elements);
+	pfree(defexprs);
-	}
+	pfree(constraintexprs);
+	pfree(literal_nullstr);
 	ExecDropTupleTable(tupleTable, true);
@@ -2070,6 +2281,152 @@ CopyReadAttribute(const char *delim, const char *null_print,
 	return attribute_buf.data;
 }
+/*
+ * Read the value of a single attribute in CSV mode, 
+ * performing de-escaping as needed. Escaping does not follow the normal
+ * PostgreSQL text mode, but instead "standard" (i.e. common) CSV usage.
+ *
+ * Quoted fields can span lines, in which case the line end is embedded
+ * in the returned string.
+ *
+ * null_print is the null marker string.  Note that this is compared to
+ * the pre-de-escaped input string (thus if it is quoted it is not a NULL).
+ *
+ * *result is set to indicate what terminated the read:
+ *		NORMAL_ATTR:	column delimiter
+ *		END_OF_LINE:	end of line
+ *      UNTERMINATED_FIELD no quote detected at end of a quoted field
+ *
+ * In any case, the string read up to the terminator (or end of file)
+ * is returned.
+ *
+ * *isnull is set true or false depending on whether the input matched
+ * the null marker.  Note that the caller cannot check this since the
+ * returned string will be the post-de-escaping equivalent, which may
+ * look the same as some valid data string.
+ *----------
+ */
+static char *
+CopyReadAttributeCSV(const char *delim, const char *null_print, char *quote,
+					 char *escape, CopyReadResult *result, bool *isnull)
+{
+	char        delimc = delim[0];
+	char        quotec = quote[0];
+	char        escapec = escape[0];
+	char		c;
+	int			start_cursor = line_buf.cursor;
+	int			end_cursor = start_cursor;
+	int			input_len;
+	bool        in_quote = false;
+	bool        saw_quote = false;
+	/* reset attribute_buf to empty */
+	attribute_buf.len = 0;
+	attribute_buf.data[0] = '\0';
+	/* set default status */
+	*result = END_OF_LINE;
+	for (;;)
+	{
+		/* handle multiline quoted fields */
+		if (in_quote && line_buf.cursor >= line_buf.len)
+		{
+			bool done;
+			switch(eol_type)
+			{
+				case EOL_NL:
+					appendStringInfoString(&attribute_buf,"\n");
+					break;
+				case EOL_CR:
+					appendStringInfoString(&attribute_buf,"\r");
+					break;
+				case EOL_CRNL:
+					appendStringInfoString(&attribute_buf,"\r\n");
+					break;
+				case EOL_UNKNOWN:
+					/* shouldn't happen - just keep going */
+					break;
+			}
+			copy_lineno++;
+			done = CopyReadLine();
+			if (done && line_buf.len == 0)
+				break;
+			start_cursor = line_buf.cursor;
+		}
+		end_cursor = line_buf.cursor;
+		if (line_buf.cursor >= line_buf.len)
+			break;
+		c = line_buf.data[line_buf.cursor++];
+		/* 
+		 * unquoted field delimiter 
+		 */
+		if (!in_quote && c == delimc)
+		{
+			*result = NORMAL_ATTR;
+			break;
+		}
+		/* 
+		 * start of quoted field (or part of field) 
+		 */
+		if (!in_quote && c == quotec)
+		{
+			saw_quote = true;
+			in_quote = true;
+			continue;
+		}
+		/* 
+		 * escape within a quoted field
+		 */
+		if (in_quote && c == escapec)
+		{
+			/* 
+			 * peek at the next char if available, and escape it if it
+			 * is an escape char or a quote char
+			 */
+			if (line_buf.cursor <= line_buf.len)
+			{
+				char nextc = line_buf.data[line_buf.cursor];
+				if (nextc == escapec || nextc == quotec)
+				{
+					appendStringInfoCharMacro(&attribute_buf, nextc);
+					line_buf.cursor++;
+					continue;
+				}
+			}
+		}
+		/*
+		 * end of quoted field. 
+		 * Must do this test after testing for escape in case quote char
+		 * and escape char are the same (which is the common case).
+		 */
+		if (in_quote && c == quotec)
+		{
+			in_quote = false;
+			continue;
+		}
+		appendStringInfoCharMacro(&attribute_buf, c);
+	}
+	if (in_quote)
+		*result = UNTERMINATED_FIELD;
+	/* check whether raw input matched null marker */
+	input_len = end_cursor - start_cursor;
+	if (!saw_quote && input_len == strlen(null_print) &&
+		strncmp(&line_buf.data[start_cursor], null_print, input_len) == 0)
+		*isnull = true;
+	else
+		*isnull = false;
+	return attribute_buf.data;
+}
 /*
 * Read a binary attribute
 */
@@ -2195,6 +2552,73 @@ CopyAttributeOut(char *server_string, char *delim)
 	}
 }
+/*
+ * Send CSV representation of one attribute, with conversion and 
+ * CSV type escaping
+ */
+static void
+CopyAttributeOutCSV(char *server_string, char *delim, char *quote,
+					char *escape, bool force_quote)
+{
+	char	   *string;
+	char		c;
+	char		delimc = delim[0];
+	char        quotec = quote[0];
+ 	char        escapec = escape[0];
+	bool        need_quote = force_quote;
+	char        *test_string;
+	bool		same_encoding;
+	int			mblen;
+	int			i;
+	same_encoding = (server_encoding == client_encoding);
+	if (!same_encoding)
+		string = (char *) pg_server_to_client((unsigned char *) server_string,
+											  strlen(server_string));
+	else
+		string = server_string;
+	/* have to run through the string twice,
+	 * first time to see if it needs quoting, second to actually send it
+	 */
+	for(test_string = string; 
+		!need_quote && (c = *test_string) != '\0'; 
+		test_string += mblen)
+	{
+		if (c == delimc || c == quotec || c == '\n' || c == '\r')
+			need_quote = true;
+		if (!same_encoding)
+			mblen = pg_encoding_mblen(client_encoding, test_string);
+		else
+			mblen = 1;
+	}
+	if (need_quote)
+		CopySendChar(quotec);
+	for (; (c = *string) != '\0'; string += mblen)
+	{
+		if (c == quotec || c == escapec)
+			CopySendChar(escapec);
+		CopySendChar(c);
+		if (!same_encoding)
+		{
+			/* send additional bytes of the char, if any */
+			mblen = pg_encoding_mblen(client_encoding, string);
+			for (i = 1; i < mblen; i++)
+				CopySendChar(string[i]);
+		}
+		else
+			mblen = 1;
+	}
+	if (need_quote)
+		CopySendChar(quotec);
+}
 /*
 * CopyGetAttnums - build an integer list of attnums to be copied
 *

--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -11,7 +11,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.450 2004/04/05 03:07:26 momjian Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.451 2004/04/19 17:22:30 momjian Exp $
 *
 * HISTORY
 *	  AUTHOR			DATE			MAJOR EVENT
@@ -343,7 +343,7 @@ static void doNegateFloat(Value *v);
 	CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
 	CLUSTER COALESCE COLLATE COLUMN COMMENT COMMIT
 	COMMITTED CONSTRAINT CONSTRAINTS CONVERSION_P CONVERT COPY CREATE CREATEDB
-	CREATEUSER CROSS CURRENT_DATE CURRENT_TIME
+	CREATEUSER CROSS CSV CURRENT_DATE CURRENT_TIME
 	CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE
 	DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS
@@ -370,7 +370,7 @@ static void doNegateFloat(Value *v);
 	KEY
 	LANCOMPILER LANGUAGE LARGE_P LAST_P LEADING LEFT LEVEL LIKE LIMIT
-	LISTEN LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION
+	LISTEN LITERAL LOAD LOCAL LOCALTIME LOCALTIMESTAMP LOCATION
 	LOCK_P
 	MATCH MAXVALUE MINUTE_P MINVALUE MODE MONTH_P MOVE
@@ -386,6 +386,8 @@ static void doNegateFloat(Value *v);
 	PRECISION PRESERVE PREPARE PRIMARY 
 	PRIOR PRIVILEGES PROCEDURAL PROCEDURE
+	QUOTE
 	READ REAL RECHECK REFERENCES REINDEX RELATIVE_P RENAME REPEATABLE REPLACE
 	RESET RESTART RESTRICT RETURNS REVOKE RIGHT ROLLBACK ROW ROWS
 	RULE
@@ -1360,6 +1362,26 @@ copy_opt_item:
 				{
 					$$ = makeDefElem("null", (Node *)makeString($3));
 				}
+			| CSV
+				{
+					$$ = makeDefElem("csv", (Node *)makeInteger(TRUE));
+				}
+			| QUOTE opt_as Sconst
+				{
+					$$ = makeDefElem("quote", (Node *)makeString($3));
+				}
+			| ESCAPE opt_as Sconst
+				{
+					$$ = makeDefElem("escape", (Node *)makeString($3));
+				}
+			| FORCE columnList
+				{
+					$$ = makeDefElem("force", (Node *)$2);
+				}
+			| LITERAL columnList
+				{
+					$$ = makeDefElem("literal", (Node *)$2);
+				}
 		;
 /* The following exist for backward compatibility */
@@ -7420,6 +7442,7 @@ unreserved_keyword:
 			| COPY
 			| CREATEDB
 			| CREATEUSER
+			| CSV
 			| CURSOR
 			| CYCLE
 			| DATABASE
@@ -7473,6 +7496,7 @@ unreserved_keyword:
 			| LAST_P
 			| LEVEL
 			| LISTEN
+			| LITERAL
 			| LOAD
 			| LOCAL
 			| LOCATION
@@ -7507,6 +7531,7 @@ unreserved_keyword:
 			| PRIVILEGES
 			| PROCEDURAL
 			| PROCEDURE
+			| QUOTE
 			| READ
 			| RECHECK
 			| REINDEX

--- a/src/backend/parser/keywords.c
+++ b/src/backend/parser/keywords.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.147 2004/03/11 01:47:40 ishii Exp $
+ *	  $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.148 2004/04/19 17:22:31 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -90,6 +90,7 @@ static const ScanKeyword ScanKeywords[] = {
 	{"createdb", CREATEDB},
 	{"createuser", CREATEUSER},
 	{"cross", CROSS},
+	{"csv", CSV},
 	{"current_date", CURRENT_DATE},
 	{"current_time", CURRENT_TIME},
 	{"current_timestamp", CURRENT_TIMESTAMP},
@@ -186,6 +187,7 @@ static const ScanKeyword ScanKeywords[] = {
 	{"like", LIKE},
 	{"limit", LIMIT},
 	{"listen", LISTEN},
+	{"literal", LITERAL},
 	{"load", LOAD},
 	{"local", LOCAL},
 	{"localtime", LOCALTIME},
@@ -248,6 +250,7 @@ static const ScanKeyword ScanKeywords[] = {
 	{"privileges", PRIVILEGES},
 	{"procedural", PROCEDURAL},
 	{"procedure", PROCEDURE},
+	{"quote", QUOTE},
 	{"read", READ},
 	{"real", REAL},
 	{"recheck", RECHECK},

--- a/src/backend/tcop/fastpath.c
+++ b/src/backend/tcop/fastpath.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/tcop/fastpath.c,v 1.71 2004/01/07 18:56:27 neilc Exp $
+ *	  $PostgreSQL: pgsql/src/backend/tcop/fastpath.c,v 1.72 2004/04/19 17:22:31 momjian Exp $
 *
 * NOTES
 *	  This cruft is the server side of PQfn.
@@ -154,8 +154,7 @@ SendFunctionResult(Datum retval, bool isnull, Oid rettype, int16 format)
 			bool		typisvarlena;
 			char	   *outputstr;
-			getTypeOutputInfo(rettype,
+			getTypeOutputInfo(rettype, &typoutput, &typelem, &typisvarlena);
-							  &typoutput, &typelem, &typisvarlena);
 			outputstr = DatumGetCString(OidFunctionCall3(typoutput,
 														 retval,
 											   ObjectIdGetDatum(typelem),

--- a/src/bin/psql/copy.c
+++ b/src/bin/psql/copy.c
@@ -3,7 +3,7 @@
 *
 * Copyright (c) 2000-2003, PostgreSQL Global Development Group
 *
- * $PostgreSQL: pgsql/src/bin/psql/copy.c,v 1.43 2004/04/12 15:58:52 momjian Exp $
+ * $PostgreSQL: pgsql/src/bin/psql/copy.c,v 1.44 2004/04/19 17:22:31 momjian Exp $
 */
 #include "postgres_fe.h"
 #include "copy.h"
@@ -66,8 +66,13 @@ struct copy_options
 	bool		from;
 	bool		binary;
 	bool		oids;
+	bool		csv_mode;
 	char	   *delim;
 	char	   *null;
+	char	   *quote;
+	char	   *escape;
+	char 	   *force_list;
+	char 	   *literal_list;
 };
@@ -81,6 +86,10 @@ free_copy_options(struct copy_options * ptr)
 	free(ptr->file);
 	free(ptr->delim);
 	free(ptr->null);
+	free(ptr->quote);
+	free(ptr->escape);
+	free(ptr->force_list);
+	free(ptr->literal_list);
 	free(ptr);
 }
@@ -272,11 +281,19 @@ parse_slash_copy(const char *args)
 		while (token)
 		{
+			bool fetch_next;
+			fetch_next = true;
 			/* someday allow BINARY here */
 			if (strcasecmp(token, "oids") == 0)
 			{
 				result->oids = true;
 			}
+			else if (strcasecmp(token, "csv") == 0)
+			{
+				result->csv_mode = true;
+			}
 			else if (strcasecmp(token, "delimiter") == 0)
 			{
 				token = strtokx(NULL, whitespace, NULL, "'",
@@ -301,11 +318,78 @@ parse_slash_copy(const char *args)
 				else
 					goto error;
 			}
+			else if (strcasecmp(token, "quote") == 0)
+			{
+				token = strtokx(NULL, whitespace, NULL, "'",
+								'\\', false, pset.encoding);
+				if (token && strcasecmp(token, "as") == 0)
+					token = strtokx(NULL, whitespace, NULL, "'",
+									'\\', false, pset.encoding);
+				if (token)
+					result->quote = pg_strdup(token);
+				else
+					goto error;
+			}
+			else if (strcasecmp(token, "escape") == 0)
+			{
+				token = strtokx(NULL, whitespace, NULL, "'",
+								'\\', false, pset.encoding);
+				if (token && strcasecmp(token, "as") == 0)
+					token = strtokx(NULL, whitespace, NULL, "'",
+									'\\', false, pset.encoding);
+				if (token)
+					result->escape = pg_strdup(token);
+				else
+					goto error;
+			}
+			else if (strcasecmp(token, "force") == 0)
+			{
+				/* handle column list */
+				fetch_next = false;
+				for (;;)
+				{
+					token = strtokx(NULL, whitespace, ",", "\"",
+									0, false, pset.encoding);
+					if (!token || strchr(",", token[0]))
+						goto error;
+					if (!result->force_list)
+						result->force_list = pg_strdup(token);
+					else
+						xstrcat(&result->force_list, token);
+					token = strtokx(NULL, whitespace, ",", "\"",
+									0, false, pset.encoding);
+					if (!token || token[0] != ',')
+						break;
+					xstrcat(&result->force_list, token);
+				}
+			}
+			else if (strcasecmp(token, "literal") == 0)
+			{
+				/* handle column list */
+				fetch_next = false;
+				for (;;)
+				{
+					token = strtokx(NULL, whitespace, ",", "\"",
+									0, false, pset.encoding);
+					if (!token || strchr(",", token[0]))
+						goto error;
+					if (!result->literal_list)
+						result->literal_list = pg_strdup(token);
+					else
+						xstrcat(&result->literal_list, token);
+					token = strtokx(NULL, whitespace, ",", "\"",
+									0, false, pset.encoding);
+					if (!token || token[0] != ',')
+						break;
+					xstrcat(&result->literal_list, token);
+				}
+			}
 			else
 				goto error;
-			token = strtokx(NULL, whitespace, NULL, NULL,
+			if (fetch_next)
-							0, false, pset.encoding);
+				token = strtokx(NULL, whitespace, NULL, NULL,
+								0, false, pset.encoding);
 		}
 	}
@@ -340,7 +424,7 @@ do_copy(const char *args)
 	PGresult   *result;
 	bool		success;
 	struct stat st;
 	/* parse options */
 	options = parse_slash_copy(args);
@@ -379,6 +463,7 @@ do_copy(const char *args)
 							  options->delim);
 	}
+	/* There is no backward-compatible CSV syntax */
 	if (options->null)
 	{
 		if (options->null[0] == '\'')
@@ -387,6 +472,37 @@ do_copy(const char *args)
 			appendPQExpBuffer(&query, " WITH NULL AS '%s'", options->null);
 	}
+	if (options->csv_mode)
+	{
+		appendPQExpBuffer(&query, " CSV");
+	}
+	if (options->quote)
+	{
+		if (options->quote[0] == '\'')
+			appendPQExpBuffer(&query, " QUOTE AS %s", options->quote);
+		else
+			appendPQExpBuffer(&query, " QUOTE AS '%s'", options->quote);
+	}
+	if (options->escape)
+	{
+		if (options->escape[0] == '\'')
+			appendPQExpBuffer(&query, " ESCAPE AS %s", options->escape);
+		else
+			appendPQExpBuffer(&query, " ESCAPE AS '%s'", options->escape);
+	}
+	if (options->force_list)
+	{
+		appendPQExpBuffer(&query, " FORCE %s", options->force_list);
+	}
+	if (options->literal_list)
+	{
+		appendPQExpBuffer(&query, " LITERAL %s", options->literal_list);
+	}
 	if (options->from)
 	{
 		if (options->file)