Commit af1a614e authored by Andrew Dunstan's avatar Andrew Dunstan

Allow the low level COPY routines to read arbitrary numbers of fields.

This doesn't involve any user-visible change in behavior, but will be
useful when the COPY routines are exposed to allow their use by Foreign
Data Wrapper routines, which will be able to use these routines to read
irregular CSV files, for example.
parent 95e42a2c
...@@ -141,6 +141,11 @@ typedef struct CopyStateData ...@@ -141,6 +141,11 @@ typedef struct CopyStateData
*/ */
StringInfoData attribute_buf; StringInfoData attribute_buf;
/* field raw data pointers found by COPY FROM */
int max_fields;
char ** raw_fields;
/* /*
* Similarly, line_buf holds the whole input line being processed. The * Similarly, line_buf holds the whole input line being processed. The
* input cycle is first to read the whole line into line_buf, convert it * input cycle is first to read the whole line into line_buf, convert it
...@@ -250,10 +255,8 @@ static void CopyOneRowTo(CopyState cstate, Oid tupleOid, ...@@ -250,10 +255,8 @@ static void CopyOneRowTo(CopyState cstate, Oid tupleOid,
static void CopyFrom(CopyState cstate); static void CopyFrom(CopyState cstate);
static bool CopyReadLine(CopyState cstate); static bool CopyReadLine(CopyState cstate);
static bool CopyReadLineText(CopyState cstate); static bool CopyReadLineText(CopyState cstate);
static int CopyReadAttributesText(CopyState cstate, int maxfields, static int CopyReadAttributesText(CopyState cstate);
char **fieldvals); static int CopyReadAttributesCSV(CopyState cstate);
static int CopyReadAttributesCSV(CopyState cstate, int maxfields,
char **fieldvals);
static Datum CopyReadBinaryAttribute(CopyState cstate, static Datum CopyReadBinaryAttribute(CopyState cstate,
int column_no, FmgrInfo *flinfo, int column_no, FmgrInfo *flinfo,
Oid typioparam, int32 typmod, Oid typioparam, int32 typmod,
...@@ -1921,7 +1924,11 @@ CopyFrom(CopyState cstate) ...@@ -1921,7 +1924,11 @@ CopyFrom(CopyState cstate)
/* create workspace for CopyReadAttributes results */ /* create workspace for CopyReadAttributes results */
nfields = file_has_oids ? (attr_count + 1) : attr_count; nfields = file_has_oids ? (attr_count + 1) : attr_count;
field_strings = (char **) palloc(nfields * sizeof(char *)); if (! cstate->binary)
{
cstate->max_fields = nfields;
cstate->raw_fields = (char **) palloc(nfields * sizeof(char *));
}
/* Initialize state variables */ /* Initialize state variables */
cstate->fe_eof = false; cstate->fe_eof = false;
...@@ -1985,10 +1992,18 @@ CopyFrom(CopyState cstate) ...@@ -1985,10 +1992,18 @@ CopyFrom(CopyState cstate)
/* Parse the line into de-escaped field values */ /* Parse the line into de-escaped field values */
if (cstate->csv_mode) if (cstate->csv_mode)
fldct = CopyReadAttributesCSV(cstate, nfields, field_strings); fldct = CopyReadAttributesCSV(cstate);
else else
fldct = CopyReadAttributesText(cstate, nfields, field_strings); fldct = CopyReadAttributesText(cstate);
/* check for overflowing fields */
if (nfields > 0 && fldct > nfields)
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("extra data after last expected column")));
fieldno = 0; fieldno = 0;
field_strings = cstate->raw_fields;
/* Read the OID field if present */ /* Read the OID field if present */
if (file_has_oids) if (file_has_oids)
...@@ -2218,7 +2233,8 @@ CopyFrom(CopyState cstate) ...@@ -2218,7 +2233,8 @@ CopyFrom(CopyState cstate)
pfree(values); pfree(values);
pfree(nulls); pfree(nulls);
pfree(field_strings); if (! cstate->binary)
pfree(cstate->raw_fields);
pfree(in_functions); pfree(in_functions);
pfree(typioparams); pfree(typioparams);
...@@ -2717,21 +2733,22 @@ GetDecimalFromHex(char hex) ...@@ -2717,21 +2733,22 @@ GetDecimalFromHex(char hex)
* performing de-escaping as needed. * performing de-escaping as needed.
* *
* The input is in line_buf. We use attribute_buf to hold the result * The input is in line_buf. We use attribute_buf to hold the result
* strings. fieldvals[k] is set to point to the k'th attribute string, * strings. cstate->raw_fields[k] is set to point to the k'th attribute
* or NULL when the input matches the null marker string. (Note that the * string, or NULL when the input matches the null marker string.
* caller cannot check for nulls since the returned string would be the * This array is expanded as necessary.
* post-de-escaping equivalent, which may look the same as some valid data *
* string.) * (Note that the caller cannot check for nulls since the returned
* string would be the post-de-escaping equivalent, which may look
* the same as some valid data string.)
* *
* delim is the column delimiter string (must be just one byte for now). * delim is the column delimiter string (must be just one byte for now).
* null_print is the null marker string. Note that this is compared to * null_print is the null marker string. Note that this is compared to
* the pre-de-escaped input string. * the pre-de-escaped input string.
* *
* The return value is the number of fields actually read. (We error out * The return value is the number of fields actually read.
* if this would exceed maxfields, which is the length of fieldvals[].)
*/ */
static int static int
CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals) CopyReadAttributesText(CopyState cstate)
{ {
char delimc = cstate->delim[0]; char delimc = cstate->delim[0];
int fieldno; int fieldno;
...@@ -2743,7 +2760,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals) ...@@ -2743,7 +2760,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
* We need a special case for zero-column tables: check that the input * We need a special case for zero-column tables: check that the input
* line is empty, and return. * line is empty, and return.
*/ */
if (maxfields <= 0) if (cstate->max_fields <= 0)
{ {
if (cstate->line_buf.len != 0) if (cstate->line_buf.len != 0)
ereport(ERROR, ereport(ERROR,
...@@ -2759,7 +2776,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals) ...@@ -2759,7 +2776,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
* data line, so we can just force attribute_buf to be large enough and * data line, so we can just force attribute_buf to be large enough and
* then transfer data without any checks for enough space. We need to do * then transfer data without any checks for enough space. We need to do
* it this way because enlarging attribute_buf mid-stream would invalidate * it this way because enlarging attribute_buf mid-stream would invalidate
* pointers already stored into fieldvals[]. * pointers already stored into cstate->raw_fields[].
*/ */
if (cstate->attribute_buf.maxlen <= cstate->line_buf.len) if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len); enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
...@@ -2779,15 +2796,17 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals) ...@@ -2779,15 +2796,17 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
int input_len; int input_len;
bool saw_non_ascii = false; bool saw_non_ascii = false;
/* Make sure space remains in fieldvals[] */ /* Make sure there is enough space for the next value */
if (fieldno >= maxfields) if (fieldno >= cstate->max_fields)
ereport(ERROR, {
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT), cstate->max_fields *= 2;
errmsg("extra data after last expected column"))); cstate->raw_fields =
repalloc(cstate->raw_fields, cstate->max_fields*sizeof(char *));
}
/* Remember start of field on both input and output sides */ /* Remember start of field on both input and output sides */
start_ptr = cur_ptr; start_ptr = cur_ptr;
fieldvals[fieldno] = output_ptr; cstate->raw_fields[fieldno] = output_ptr;
/* Scan data for field */ /* Scan data for field */
for (;;) for (;;)
...@@ -2912,7 +2931,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals) ...@@ -2912,7 +2931,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
*/ */
if (saw_non_ascii) if (saw_non_ascii)
{ {
char *fld = fieldvals[fieldno]; char *fld = cstate->raw_fields[fieldno];
pg_verifymbstr(fld, output_ptr - (fld + 1), false); pg_verifymbstr(fld, output_ptr - (fld + 1), false);
} }
...@@ -2921,7 +2940,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals) ...@@ -2921,7 +2940,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
input_len = end_ptr - start_ptr; input_len = end_ptr - start_ptr;
if (input_len == cstate->null_print_len && if (input_len == cstate->null_print_len &&
strncmp(start_ptr, cstate->null_print, input_len) == 0) strncmp(start_ptr, cstate->null_print, input_len) == 0)
fieldvals[fieldno] = NULL; cstate->raw_fields[fieldno] = NULL;
fieldno++; fieldno++;
/* Done if we hit EOL instead of a delim */ /* Done if we hit EOL instead of a delim */
...@@ -2944,7 +2963,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals) ...@@ -2944,7 +2963,7 @@ CopyReadAttributesText(CopyState cstate, int maxfields, char **fieldvals)
* "standard" (i.e. common) CSV usage. * "standard" (i.e. common) CSV usage.
*/ */
static int static int
CopyReadAttributesCSV(CopyState cstate, int maxfields, char **fieldvals) CopyReadAttributesCSV(CopyState cstate)
{ {
char delimc = cstate->delim[0]; char delimc = cstate->delim[0];
char quotec = cstate->quote[0]; char quotec = cstate->quote[0];
...@@ -2958,7 +2977,7 @@ CopyReadAttributesCSV(CopyState cstate, int maxfields, char **fieldvals) ...@@ -2958,7 +2977,7 @@ CopyReadAttributesCSV(CopyState cstate, int maxfields, char **fieldvals)
* We need a special case for zero-column tables: check that the input * We need a special case for zero-column tables: check that the input
* line is empty, and return. * line is empty, and return.
*/ */
if (maxfields <= 0) if (cstate->max_fields <= 0)
{ {
if (cstate->line_buf.len != 0) if (cstate->line_buf.len != 0)
ereport(ERROR, ereport(ERROR,
...@@ -2974,7 +2993,7 @@ CopyReadAttributesCSV(CopyState cstate, int maxfields, char **fieldvals) ...@@ -2974,7 +2993,7 @@ CopyReadAttributesCSV(CopyState cstate, int maxfields, char **fieldvals)
* data line, so we can just force attribute_buf to be large enough and * data line, so we can just force attribute_buf to be large enough and
* then transfer data without any checks for enough space. We need to do * then transfer data without any checks for enough space. We need to do
* it this way because enlarging attribute_buf mid-stream would invalidate * it this way because enlarging attribute_buf mid-stream would invalidate
* pointers already stored into fieldvals[]. * pointers already stored into cstate->raw_fields[].
*/ */
if (cstate->attribute_buf.maxlen <= cstate->line_buf.len) if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len); enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
...@@ -2994,15 +3013,17 @@ CopyReadAttributesCSV(CopyState cstate, int maxfields, char **fieldvals) ...@@ -2994,15 +3013,17 @@ CopyReadAttributesCSV(CopyState cstate, int maxfields, char **fieldvals)
char *end_ptr; char *end_ptr;
int input_len; int input_len;
/* Make sure space remains in fieldvals[] */ /* Make sure there is enough space for the next value */
if (fieldno >= maxfields) if (fieldno >= cstate->max_fields)
ereport(ERROR, {
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT), cstate->max_fields *= 2;
errmsg("extra data after last expected column"))); cstate->raw_fields =
repalloc(cstate->raw_fields, cstate->max_fields*sizeof(char *));
}
/* Remember start of field on both input and output sides */ /* Remember start of field on both input and output sides */
start_ptr = cur_ptr; start_ptr = cur_ptr;
fieldvals[fieldno] = output_ptr; cstate->raw_fields[fieldno] = output_ptr;
/* /*
* Scan data for field, * Scan data for field,
...@@ -3090,7 +3111,7 @@ endfield: ...@@ -3090,7 +3111,7 @@ endfield:
input_len = end_ptr - start_ptr; input_len = end_ptr - start_ptr;
if (!saw_quote && input_len == cstate->null_print_len && if (!saw_quote && input_len == cstate->null_print_len &&
strncmp(start_ptr, cstate->null_print, input_len) == 0) strncmp(start_ptr, cstate->null_print, input_len) == 0)
fieldvals[fieldno] = NULL; cstate->raw_fields[fieldno] = NULL;
fieldno++; fieldno++;
/* Done if we hit EOL instead of a delim */ /* Done if we hit EOL instead of a delim */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment