Commit 0a0727cc authored by Tom Lane's avatar Tom Lane

Improve performance of binary COPY FROM through better buffering.

At least on Linux and macOS, fread() turns out to have far higher
per-call overhead than one could wish.  Reading 64KB of data at a time
and then parceling it out with our own memcpy logic makes binary COPY
from a file significantly faster --- around 30% in simple testing for
cases with narrow text columns (on Linux ... even more on macOS).

In binary COPY from frontend, there's no per-call fread(), and this
patch introduces an extra layer of memcpy'ing, but it still manages
to eke out a small win.  Apparently, the control-logic overhead in
CopyGetData() is enough to be worth avoiding for small fetches.

Bharath Rupireddy and Amit Langote, reviewed by Vignesh C,
cosmetic tweaks by me

Discussion: https://postgr.es/m/CALj2ACU5Bz06HWLwqSzNMN=Gupoj6Rcn_QVC+k070V4em9wu=A@mail.gmail.com
parent 8a37951e
......@@ -187,15 +187,15 @@ typedef struct CopyStateData
TransitionCaptureState *transition_capture;
/*
* These variables are used to reduce overhead in textual COPY FROM.
* These variables are used to reduce overhead in COPY FROM.
*
* attribute_buf holds the separated, de-escaped text for each field of
* the current line. The CopyReadAttributes functions return arrays of
* pointers into this buffer. We avoid palloc/pfree overhead by re-using
* the buffer on each cycle.
*
* (In binary COPY FROM, attribute_buf holds the binary data for the
* current field, while the other variables are not used.)
* In binary COPY FROM, attribute_buf holds the binary data for the
* current field, but the usage is otherwise similar.
*/
StringInfoData attribute_buf;
......@@ -209,7 +209,8 @@ typedef struct CopyStateData
* input cycle is first to read the whole line into line_buf, convert it
* to server encoding there, and then extract the individual attribute
* fields into attribute_buf. line_buf is preserved unmodified so that we
* can display it in error messages if appropriate.
* can display it in error messages if appropriate. (In binary mode,
* line_buf is not used.)
*/
StringInfoData line_buf;
bool line_buf_converted; /* converted to server encoding? */
......@@ -217,15 +218,18 @@ typedef struct CopyStateData
/*
* Finally, raw_buf holds raw data read from the data source (file or
* client connection). CopyReadLine parses this data sufficiently to
* locate line boundaries, then transfers the data to line_buf and
* converts it. Note: we guarantee that there is a \0 at
* raw_buf[raw_buf_len].
* client connection). In text mode, CopyReadLine parses this data
* sufficiently to locate line boundaries, then transfers the data to
* line_buf and converts it. In binary mode, CopyReadBinaryData fetches
* appropriate amounts of data from this buffer. In both modes, we
* guarantee that there is a \0 at raw_buf[raw_buf_len].
*/
#define RAW_BUF_SIZE 65536 /* we palloc RAW_BUF_SIZE+1 bytes */
char *raw_buf;
int raw_buf_index; /* next byte to process */
int raw_buf_len; /* total # of bytes stored */
/* Shorthand for number of unconsumed bytes available in raw_buf */
#define RAW_BUF_BYTES(cstate) ((cstate)->raw_buf_len - (cstate)->raw_buf_index)
} CopyStateData;
/* DestReceiver for COPY (query) TO */
......@@ -394,6 +398,8 @@ static void CopySendInt32(CopyState cstate, int32 val);
static bool CopyGetInt32(CopyState cstate, int32 *val);
static void CopySendInt16(CopyState cstate, int16 val);
static bool CopyGetInt16(CopyState cstate, int16 *val);
static bool CopyLoadRawBuf(CopyState cstate);
static int CopyReadBinaryData(CopyState cstate, char *dest, int nbytes);
/*
......@@ -723,7 +729,7 @@ CopyGetData(CopyState cstate, void *databuf, int minread, int maxread)
/*
* CopySendInt32 sends an int32 in network byte order
*/
static void
static inline void
CopySendInt32(CopyState cstate, int32 val)
{
uint32 buf;
......@@ -737,12 +743,12 @@ CopySendInt32(CopyState cstate, int32 val)
*
* Returns true if OK, false if EOF
*/
static bool
static inline bool
CopyGetInt32(CopyState cstate, int32 *val)
{
uint32 buf;
if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
{
*val = 0; /* suppress compiler warning */
return false;
......@@ -754,7 +760,7 @@ CopyGetInt32(CopyState cstate, int32 *val)
/*
* CopySendInt16 sends an int16 in network byte order
*/
static void
static inline void
CopySendInt16(CopyState cstate, int16 val)
{
uint16 buf;
......@@ -766,12 +772,12 @@ CopySendInt16(CopyState cstate, int16 val)
/*
* CopyGetInt16 reads an int16 that appears in network byte order
*/
static bool
static inline bool
CopyGetInt16(CopyState cstate, int16 *val)
{
uint16 buf;
if (CopyGetData(cstate, &buf, sizeof(buf), sizeof(buf)) != sizeof(buf))
if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
{
*val = 0; /* suppress compiler warning */
return false;
......@@ -786,26 +792,20 @@ CopyGetInt16(CopyState cstate, int16 *val)
*
* Returns true if able to obtain at least one more byte, else false.
*
* If raw_buf_index < raw_buf_len, the unprocessed bytes are transferred
* down to the start of the buffer and then we load more data after that.
* This case is used only when a frontend multibyte character crosses a
* bufferload boundary.
* If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
* of the buffer and then we load more data after that. This case occurs only
* when a multibyte character crosses a bufferload boundary.
*/
static bool
CopyLoadRawBuf(CopyState cstate)
{
int nbytes;
int nbytes = RAW_BUF_BYTES(cstate);
int inbytes;
if (cstate->raw_buf_index < cstate->raw_buf_len)
{
/* Copy down the unprocessed data */
nbytes = cstate->raw_buf_len - cstate->raw_buf_index;
/* Copy down the unprocessed data if any. */
if (nbytes > 0)
memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
nbytes);
}
else
nbytes = 0; /* no data need be saved */
inbytes = CopyGetData(cstate, cstate->raw_buf + nbytes,
1, RAW_BUF_SIZE - nbytes);
......@@ -816,6 +816,54 @@ CopyLoadRawBuf(CopyState cstate)
return (inbytes > 0);
}
/*
* CopyReadBinaryData
*
* Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf
* and writes them to 'dest'. Returns the number of bytes read (which
* would be less than 'nbytes' only if we reach EOF).
*/
static int
CopyReadBinaryData(CopyState cstate, char *dest, int nbytes)
{
int copied_bytes = 0;
if (RAW_BUF_BYTES(cstate) >= nbytes)
{
/* Enough bytes are present in the buffer. */
memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
cstate->raw_buf_index += nbytes;
copied_bytes = nbytes;
}
else
{
/*
* Not enough bytes in the buffer, so must read from the file. Need
* to loop since 'nbytes' could be larger than the buffer size.
*/
do
{
int copy_bytes;
/* Load more data if buffer is empty. */
if (RAW_BUF_BYTES(cstate) == 0)
{
if (!CopyLoadRawBuf(cstate))
break; /* EOF */
}
/* Transfer some bytes. */
copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
cstate->raw_buf_index += copy_bytes;
dest += copy_bytes;
copied_bytes += copy_bytes;
} while (copied_bytes < nbytes);
}
return copied_bytes;
}
/*
* DoCopy executes the SQL COPY statement
......@@ -3366,17 +3414,17 @@ BeginCopyFrom(ParseState *pstate,
cstate->cur_attval = NULL;
/*
* Set up variables to avoid per-attribute overhead. attribute_buf is
* used in both text and binary modes, but we use line_buf and raw_buf
* Set up variables to avoid per-attribute overhead. attribute_buf and
* raw_buf are used in both text and binary modes, but we use line_buf
* only in text mode.
*/
initStringInfo(&cstate->attribute_buf);
cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
cstate->raw_buf_index = cstate->raw_buf_len = 0;
if (!cstate->binary)
{
initStringInfo(&cstate->line_buf);
cstate->line_buf_converted = false;
cstate->raw_buf = (char *) palloc(RAW_BUF_SIZE + 1);
cstate->raw_buf_index = cstate->raw_buf_len = 0;
}
/* Assign range table, we'll need it in CopyFrom. */
......@@ -3527,7 +3575,7 @@ BeginCopyFrom(ParseState *pstate,
int32 tmp;
/* Signature */
if (CopyGetData(cstate, readSig, 11, 11) != 11 ||
if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
memcmp(readSig, BinarySignature, 11) != 0)
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
......@@ -3555,7 +3603,7 @@ BeginCopyFrom(ParseState *pstate,
/* Skip extension header, if present */
while (tmp-- > 0)
{
if (CopyGetData(cstate, readSig, 1, 1) != 1)
if (CopyReadBinaryData(cstate, readSig, 1) != 1)
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("invalid COPY file header (wrong length)")));
......@@ -3771,7 +3819,7 @@ NextCopyFrom(CopyState cstate, ExprContext *econtext,
char dummy;
if (cstate->copy_dest != COPY_OLD_FE &&
CopyGetData(cstate, &dummy, 1, 1) > 0)
CopyReadBinaryData(cstate, &dummy, 1) > 0)
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("received copy data after EOF marker")));
......@@ -4744,8 +4792,8 @@ CopyReadBinaryAttribute(CopyState cstate, FmgrInfo *flinfo,
resetStringInfo(&cstate->attribute_buf);
enlargeStringInfo(&cstate->attribute_buf, fld_size);
if (CopyGetData(cstate, cstate->attribute_buf.data,
fld_size, fld_size) != fld_size)
if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
fld_size) != fld_size)
ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("unexpected EOF in COPY data")));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment