Commit c532d15d authored by Heikki Linnakangas's avatar Heikki Linnakangas

Split copy.c into four files.

Copy.c has grown really large. Split it into more manageable parts:

- copy.c now contains only a few functions that are common to COPY FROM
  and COPY TO.

- copyto.c contains code for COPY TO.

- copyfrom.c contains code for initializing COPY FROM, and inserting the
  tuples to the correct table.

- copyfromparse.c contains code for reading from the client/file/program,
  and parsing the input text/CSV/binary format into tuples.

All of these parts are fairly complicated, and fairly independent of each
other. There is a patch being discussed to implement parallel COPY FROM,
which will add a lot of new code to the COPY FROM path, and another patch
which would allow INSERTs to use the same multi-insert machinery as COPY
FROM, both of which will require refactoring that code. With those two
patches, there's going to be a lot of code churn in copy.c anyway, so now
seems like a good time to do this refactoring.

The CopyStateData struct is also split. All the formatting options, like
FORMAT, QUOTE, ESCAPE, are put in a new CopyFormatOption struct, which
is used by both COPY FROM and TO. Other state data are kept in separate
CopyFromStateData and CopyToStateData structs.

Reviewed-by: Soumyadeep Chakraborty, Erik Rijkers, Vignesh C, Andres Freund
Discussion: https://www.postgresql.org/message-id/8e15b560-f387-7acc-ac90-763986617bfb%40iki.fi
parent 17958972
......@@ -105,7 +105,7 @@ typedef struct FileFdwExecutionState
bool is_program; /* true if filename represents an OS command */
List *options; /* merged COPY options, excluding filename and
* is_program */
CopyState cstate; /* COPY execution state */
CopyFromState cstate; /* COPY execution state */
} FileFdwExecutionState;
/*
......@@ -655,7 +655,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
char *filename;
bool is_program;
List *options;
CopyState cstate;
CopyFromState cstate;
FileFdwExecutionState *festate;
/*
......@@ -677,6 +677,7 @@ fileBeginForeignScan(ForeignScanState *node, int eflags)
*/
cstate = BeginCopyFrom(NULL,
node->ss.ss_currentRelation,
NULL,
filename,
is_program,
NULL,
......@@ -752,6 +753,7 @@ fileReScanForeignScan(ForeignScanState *node)
festate->cstate = BeginCopyFrom(NULL,
node->ss.ss_currentRelation,
NULL,
festate->filename,
festate->is_program,
NULL,
......@@ -1107,7 +1109,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
char *filename;
bool is_program;
List *options;
CopyState cstate;
CopyFromState cstate;
ErrorContextCallback errcallback;
MemoryContext oldcontext = CurrentMemoryContext;
MemoryContext tupcontext;
......@@ -1125,7 +1127,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
/*
* Create CopyState from FDW options.
*/
cstate = BeginCopyFrom(NULL, onerel, filename, is_program, NULL, NIL,
cstate = BeginCopyFrom(NULL, onerel, NULL, filename, is_program, NULL, NIL,
options);
/*
......
......@@ -24,6 +24,9 @@ OBJS = \
constraint.o \
conversioncmds.o \
copy.o \
copyfrom.o \
copyfromparse.o \
copyto.o \
createas.o \
dbcommands.o \
define.o \
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -749,7 +749,7 @@ copy_table(Relation rel)
LogicalRepRelation lrel;
WalRcvExecResult *res;
StringInfoData cmd;
CopyState cstate;
CopyFromState cstate;
List *attnamelist;
ParseState *pstate;
......@@ -800,7 +800,7 @@ copy_table(Relation rel)
NULL, false, false);
attnamelist = make_copy_attnamelist(relmapentry);
cstate = BeginCopyFrom(pstate, rel, NULL, false, copy_read_data, attnamelist, NIL);
cstate = BeginCopyFrom(pstate, rel, NULL, NULL, false, copy_read_data, attnamelist, NIL);
/* Do the copy */
(void) CopyFrom(cstate);
......
......@@ -19,26 +19,71 @@
#include "parser/parse_node.h"
#include "tcop/dest.h"
/* CopyStateData is private in commands/copy.c */
typedef struct CopyStateData *CopyState;
/*
* A struct to hold COPY options, in a parsed form. All of these are related
* to formatting, except for 'freeze', which doesn't really belong here, but
* it's expedient to parse it along with all the other options.
*/
typedef struct CopyFormatOptions
{
/* parameters from the COPY command */
int file_encoding; /* file or remote side's character encoding,
* -1 if not specified */
bool binary; /* binary format? */
bool freeze; /* freeze rows on loading? */
bool csv_mode; /* Comma Separated Value format? */
bool header_line; /* CSV header line? */
char *null_print; /* NULL marker string (server encoding!) */
int null_print_len; /* length of same */
char *null_print_client; /* same converted to file encoding */
char *delim; /* column delimiter (must be 1 byte) */
char *quote; /* CSV quote char (must be 1 byte) */
char *escape; /* CSV escape char (must be 1 byte) */
List *force_quote; /* list of column names */
bool force_quote_all; /* FORCE_QUOTE *? */
bool *force_quote_flags; /* per-column CSV FQ flags */
List *force_notnull; /* list of column names */
bool *force_notnull_flags; /* per-column CSV FNN flags */
List *force_null; /* list of column names */
bool *force_null_flags; /* per-column CSV FN flags */
bool convert_selectively; /* do selective binary conversion? */
List *convert_select; /* list of column names (can be NIL) */
} CopyFormatOptions;
/* These are private in commands/copy[from|to].c */
typedef struct CopyFromStateData *CopyFromState;
typedef struct CopyToStateData *CopyToState;
typedef int (*copy_data_source_cb) (void *outbuf, int minread, int maxread);
extern void DoCopy(ParseState *state, const CopyStmt *stmt,
int stmt_location, int stmt_len,
uint64 *processed);
extern void ProcessCopyOptions(ParseState *pstate, CopyState cstate, bool is_from, List *options);
extern CopyState BeginCopyFrom(ParseState *pstate, Relation rel, const char *filename,
extern void ProcessCopyOptions(ParseState *pstate, CopyFormatOptions *ops_out, bool is_from, List *options);
extern CopyFromState BeginCopyFrom(ParseState *pstate, Relation rel, Node *whereClause,
const char *filename,
bool is_program, copy_data_source_cb data_source_cb, List *attnamelist, List *options);
extern void EndCopyFrom(CopyState cstate);
extern bool NextCopyFrom(CopyState cstate, ExprContext *econtext,
extern void EndCopyFrom(CopyFromState cstate);
extern bool NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
Datum *values, bool *nulls);
extern bool NextCopyFromRawFields(CopyState cstate,
extern bool NextCopyFromRawFields(CopyFromState cstate,
char ***fields, int *nfields);
extern void CopyFromErrorCallback(void *arg);
extern uint64 CopyFrom(CopyState cstate);
extern uint64 CopyFrom(CopyFromState cstate);
extern DestReceiver *CreateCopyDestReceiver(void);
/*
* internal prototypes
*/
extern CopyToState BeginCopyTo(ParseState *pstate, Relation rel, RawStmt *query,
Oid queryRelId, const char *filename, bool is_program,
List *attnamelist, List *options);
extern void EndCopyTo(CopyToState cstate);
extern uint64 DoCopyTo(CopyToState cstate);
extern List *CopyGetAttnums(TupleDesc tupDesc, Relation rel,
List *attnamelist);
#endif /* COPY_H */
/*-------------------------------------------------------------------------
*
* copyfrom_internal.h
* Internal definitions for COPY FROM command.
*
*
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* src/include/commands/copyfrom_internal.h
*
*-------------------------------------------------------------------------
*/
#ifndef COPYFROM_INTERNAL_H
#define COPYFROM_INTERNAL_H
#include "commands/copy.h"
#include "commands/trigger.h"
/*
* Represents the different source cases we need to worry about at
* the bottom level
*/
typedef enum CopySource
{
COPY_FILE, /* from file (or a piped program) */
COPY_OLD_FE, /* from frontend (2.0 protocol) */
COPY_NEW_FE, /* from frontend (3.0 protocol) */
COPY_CALLBACK /* from callback function */
} CopySource;
/*
* Represents the end-of-line terminator type of the input
*/
typedef enum EolType
{
EOL_UNKNOWN,
EOL_NL,
EOL_CR,
EOL_CRNL
} EolType;
/*
* Represents the heap insert method to be used during COPY FROM.
*/
typedef enum CopyInsertMethod
{
CIM_SINGLE, /* use table_tuple_insert or fdw routine */
CIM_MULTI, /* always use table_multi_insert */
CIM_MULTI_CONDITIONAL /* use table_multi_insert only if valid */
} CopyInsertMethod;
/*
* This struct contains all the state variables used throughout a COPY FROM
* operation.
*
* Multi-byte encodings: all supported client-side encodings encode multi-byte
* characters by having the first byte's high bit set. Subsequent bytes of the
* character can have the high bit not set. When scanning data in such an
* encoding to look for a match to a single-byte (ie ASCII) character, we must
* use the full pg_encoding_mblen() machinery to skip over multibyte
* characters, else we might find a false match to a trailing byte. In
* supported server encodings, there is no possibility of a false match, and
* it's faster to make useless comparisons to trailing bytes than it is to
* invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true
* when we have to do it the hard way.
*/
typedef struct CopyFromStateData
{
/* low-level state data */
CopySource copy_src; /* type of copy source */
FILE *copy_file; /* used if copy_src == COPY_FILE */
StringInfo fe_msgbuf; /* used if copy_src == COPY_NEW_FE */
bool reached_eof; /* true if we read to end of copy data (not
* all copy_src types maintain this) */
EolType eol_type; /* EOL type of input */
int file_encoding; /* file or remote side's character encoding */
bool need_transcoding; /* file encoding diff from server? */
bool encoding_embeds_ascii; /* ASCII can be non-first byte? */
/* parameters from the COPY command */
Relation rel; /* relation to copy from */
List *attnumlist; /* integer list of attnums to copy */
char *filename; /* filename, or NULL for STDIN */
bool is_program; /* is 'filename' a program to popen? */
copy_data_source_cb data_source_cb; /* function for reading data */
CopyFormatOptions opts;
bool *convert_select_flags; /* per-column CSV/TEXT CS flags */
Node *whereClause; /* WHERE condition (or NULL) */
/* these are just for error messages, see CopyFromErrorCallback */
const char *cur_relname; /* table name for error messages */
uint64 cur_lineno; /* line number for error messages */
const char *cur_attname; /* current att for error messages */
const char *cur_attval; /* current att value for error messages */
/*
* Working state
*/
MemoryContext copycontext; /* per-copy execution context */
AttrNumber num_defaults;
FmgrInfo *in_functions; /* array of input functions for each attrs */
Oid *typioparams; /* array of element types for in_functions */
int *defmap; /* array of default att numbers */
ExprState **defexprs; /* array of default att expressions */
bool volatile_defexprs; /* is any of defexprs volatile? */
List *range_table;
ExprState *qualexpr;
TransitionCaptureState *transition_capture;
/*
* These variables are used to reduce overhead in COPY FROM.
*
* attribute_buf holds the separated, de-escaped text for each field of
* the current line. The CopyReadAttributes functions return arrays of
* pointers into this buffer. We avoid palloc/pfree overhead by re-using
* the buffer on each cycle.
*
* In binary COPY FROM, attribute_buf holds the binary data for the
* current field, but the usage is otherwise similar.
*/
StringInfoData attribute_buf;
/* field raw data pointers found by COPY FROM */
int max_fields;
char **raw_fields;
/*
* Similarly, line_buf holds the whole input line being processed. The
* input cycle is first to read the whole line into line_buf, convert it
* to server encoding there, and then extract the individual attribute
* fields into attribute_buf. line_buf is preserved unmodified so that we
* can display it in error messages if appropriate. (In binary mode,
* line_buf is not used.)
*/
StringInfoData line_buf;
bool line_buf_converted; /* converted to server encoding? */
bool line_buf_valid; /* contains the row being processed? */
/*
* Finally, raw_buf holds raw data read from the data source (file or
* client connection). In text mode, CopyReadLine parses this data
* sufficiently to locate line boundaries, then transfers the data to
* line_buf and converts it. In binary mode, CopyReadBinaryData fetches
* appropriate amounts of data from this buffer. In both modes, we
* guarantee that there is a \0 at raw_buf[raw_buf_len].
*/
#define RAW_BUF_SIZE 65536 /* we palloc RAW_BUF_SIZE+1 bytes */
char *raw_buf;
int raw_buf_index; /* next byte to process */
int raw_buf_len; /* total # of bytes stored */
/* Shorthand for number of unconsumed bytes available in raw_buf */
#define RAW_BUF_BYTES(cstate) ((cstate)->raw_buf_len - (cstate)->raw_buf_index)
} CopyFromStateData;
extern void ReceiveCopyBegin(CopyFromState cstate);
extern void ReceiveCopyBinaryHeader(CopyFromState cstate);
#endif /* COPYFROM_INTERNAL_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment