Commit c50b3158 authored by Tomas Vondra's avatar Tomas Vondra

Reduce overhead of pg_mcv_list (de)serialization

Commit ea4e1c0e resolved issues with memory alignment in serialized
pg_mcv_list values, but it required copying data to/from the varlena
buffer during serialization and deserialization.  As the MCV lits may
be fairly large, the overhead (memory consumption, CPU usage) can get
rather significant too.

This change tweaks the serialization format so that the alignment is
correct with respect to the varlena value, and so the parts may be
accessed directly without copying the data.

Catversion bump, as it affects existing pg_statistic_ext data.
parent b0b39f72
...@@ -61,6 +61,16 @@ ...@@ -61,6 +61,16 @@
#define ITEM_FREQUENCY(item,ndims) ((double *) (ITEM_NULLS(item, ndims) + (ndims))) #define ITEM_FREQUENCY(item,ndims) ((double *) (ITEM_NULLS(item, ndims) + (ndims)))
#define ITEM_BASE_FREQUENCY(item,ndims) ((double *) (ITEM_FREQUENCY(item, ndims) + 1)) #define ITEM_BASE_FREQUENCY(item,ndims) ((double *) (ITEM_FREQUENCY(item, ndims) + 1))
/*
* Used to compute size of serialized MCV list representation.
*/
#define MinSizeOfMCVList \
(VARHDRSZ + sizeof(uint32) * 3 + sizeof(AttrNumber))
#define SizeOfMCVList(ndims,nitems) \
(MAXALIGN(MinSizeOfMCVList + sizeof(Oid) * (ndims)) + \
MAXALIGN((ndims) * sizeof(DimensionInfo)) + \
MAXALIGN((nitems) * ITEM_SIZE(ndims)))
static MultiSortSupport build_mss(VacAttrStats **stats, int numattrs); static MultiSortSupport build_mss(VacAttrStats **stats, int numattrs);
...@@ -491,7 +501,6 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) ...@@ -491,7 +501,6 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
char *item = palloc0(itemsize); char *item = palloc0(itemsize);
/* serialized items (indexes into arrays, etc.) */ /* serialized items (indexes into arrays, etc.) */
bytea *output;
char *raw; char *raw;
char *ptr; char *ptr;
...@@ -625,15 +634,25 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) ...@@ -625,15 +634,25 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
* Now we can finally compute how much space we'll actually need for the * Now we can finally compute how much space we'll actually need for the
* whole serialized MCV list (varlena header, MCV header, dimension info * whole serialized MCV list (varlena header, MCV header, dimension info
* for each attribute, deduplicated values and items). * for each attribute, deduplicated values and items).
*
* The header fields are copied one by one, so that we don't need any
* explicit alignment (we copy them while deserializing). All fields
* after this need to be properly aligned, for direct access.
*/ */
total_length = offsetof(MCVList, items) total_length = MAXALIGN(VARHDRSZ + (3 * sizeof(uint32))
+ MAXALIGN(ndims * sizeof(DimensionInfo)); + sizeof(AttrNumber) + (ndims * sizeof(Oid)));
/* dimension info */
total_length += MAXALIGN(ndims * sizeof(DimensionInfo));
/* add space for the arrays of deduplicated values */ /* add space for the arrays of deduplicated values */
for (i = 0; i < ndims; i++) for (i = 0; i < ndims; i++)
total_length += MAXALIGN(info[i].nbytes); total_length += MAXALIGN(info[i].nbytes);
/* and finally the items (no additional alignment needed) */ /*
* And finally the items (no additional alignment needed, we start
* at proper alignment and the itemsize formula uses MAXALIGN)
*/
total_length += mcvlist->nitems * itemsize; total_length += mcvlist->nitems * itemsize;
/* /*
...@@ -641,11 +660,27 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) ...@@ -641,11 +660,27 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
* so we set them to zero to make the result more compressible). * so we set them to zero to make the result more compressible).
*/ */
raw = palloc0(total_length); raw = palloc0(total_length);
ptr = raw; SET_VARSIZE(raw, total_length);
ptr = VARDATA(raw);
/* copy the MCV list header fields, one by one */
memcpy(ptr, &mcvlist->magic, sizeof(uint32));
ptr += sizeof(uint32);
memcpy(ptr, &mcvlist->type, sizeof(uint32));
ptr += sizeof(uint32);
memcpy(ptr, &mcvlist->nitems, sizeof(uint32));
ptr += sizeof(uint32);
memcpy(ptr, &mcvlist->ndimensions, sizeof(AttrNumber));
ptr += sizeof(AttrNumber);
/* copy the MCV list header */ memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
memcpy(ptr, mcvlist, offsetof(MCVList, items)); ptr += (sizeof(Oid) * ndims);
ptr += offsetof(MCVList, items);
/* the header may not be exactly aligned, so make sure it is */
ptr = raw + MAXALIGN(ptr - raw);
/* store information about the attributes */ /* store information about the attributes */
memcpy(ptr, info, sizeof(DimensionInfo) * ndims); memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
...@@ -761,14 +796,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) ...@@ -761,14 +796,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
pfree(values); pfree(values);
pfree(counts); pfree(counts);
output = (bytea *) palloc(VARHDRSZ + total_length); return (bytea *) raw;
SET_VARSIZE(output, VARHDRSZ + total_length);
memcpy(VARDATA_ANY(output), raw, total_length);
pfree(raw);
return output;
} }
/* /*
...@@ -789,8 +817,7 @@ statext_mcv_deserialize(bytea *data) ...@@ -789,8 +817,7 @@ statext_mcv_deserialize(bytea *data)
char *ptr; char *ptr;
int ndims, int ndims,
nitems, nitems;
itemsize;
DimensionInfo *info = NULL; DimensionInfo *info = NULL;
/* local allocation buffer (used only for deserialization) */ /* local allocation buffer (used only for deserialization) */
...@@ -810,24 +837,32 @@ statext_mcv_deserialize(bytea *data) ...@@ -810,24 +837,32 @@ statext_mcv_deserialize(bytea *data)
/* /*
* We can't possibly deserialize a MCV list if there's not even a complete * We can't possibly deserialize a MCV list if there's not even a complete
* header. * header. We need an explicit formula here, because we serialize the
* header fields one by one, so we need to ignore struct alignment.
*/ */
if (VARSIZE_ANY_EXHDR(data) < offsetof(MCVList, items)) if (VARSIZE_ANY(data) < MinSizeOfMCVList)
elog(ERROR, "invalid MCV size %zd (expected at least %zu)", elog(ERROR, "invalid MCV size %zd (expected at least %zu)",
VARSIZE_ANY_EXHDR(data), offsetof(MCVList, items)); VARSIZE_ANY(data), MinSizeOfMCVList);
/* read the MCV list header */ /* read the MCV list header */
mcvlist = (MCVList *) palloc0(offsetof(MCVList, items)); mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
/* initialize pointer to the data part (skip the varlena header) */ /* pointer to the data part (skip the varlena header) */
raw = palloc(VARSIZE_ANY_EXHDR(data)); ptr = VARDATA_ANY(data);
ptr = raw; raw = (char *) data;
memcpy(raw, VARDATA_ANY(data), VARSIZE_ANY_EXHDR(data));
/* get the header and perform further sanity checks */ /* get the header and perform further sanity checks */
memcpy(mcvlist, ptr, offsetof(MCVList, items)); memcpy(&mcvlist->magic, ptr, sizeof(uint32));
ptr += offsetof(MCVList, items); ptr += sizeof(uint32);
memcpy(&mcvlist->type, ptr, sizeof(uint32));
ptr += sizeof(uint32);
memcpy(&mcvlist->nitems, ptr, sizeof(uint32));
ptr += sizeof(uint32);
memcpy(&mcvlist->ndimensions, ptr, sizeof(AttrNumber));
ptr += sizeof(AttrNumber);
if (mcvlist->magic != STATS_MCV_MAGIC) if (mcvlist->magic != STATS_MCV_MAGIC)
elog(ERROR, "invalid MCV magic %u (expected %u)", elog(ERROR, "invalid MCV magic %u (expected %u)",
...@@ -852,25 +887,29 @@ statext_mcv_deserialize(bytea *data) ...@@ -852,25 +887,29 @@ statext_mcv_deserialize(bytea *data)
nitems = mcvlist->nitems; nitems = mcvlist->nitems;
ndims = mcvlist->ndimensions; ndims = mcvlist->ndimensions;
itemsize = ITEM_SIZE(ndims);
/* /*
* Check amount of data including DimensionInfo for all dimensions and * Check amount of data including DimensionInfo for all dimensions and
* also the serialized items (including uint16 indexes). Also, walk * also the serialized items (including uint16 indexes). Also, walk
* through the dimension information and add it to the sum. * through the dimension information and add it to the sum.
*/ */
expected_size = offsetof(MCVList, items) + expected_size = SizeOfMCVList(ndims, nitems);
ndims * sizeof(DimensionInfo) +
(nitems * itemsize);
/* /*
* Check that we have at least the dimension and info records, along with * Check that we have at least the dimension and info records, along with
* the items. We don't know the size of the serialized values yet. We need * the items. We don't know the size of the serialized values yet. We need
* to do this check first, before accessing the dimension info. * to do this check first, before accessing the dimension info.
*/ */
if (VARSIZE_ANY_EXHDR(data) < expected_size) if (VARSIZE_ANY(data) < expected_size)
elog(ERROR, "invalid MCV size %zd (expected %zu)", elog(ERROR, "invalid MCV size %zd (expected %zu)",
VARSIZE_ANY_EXHDR(data), expected_size); VARSIZE_ANY(data), expected_size);
/* Now copy the array of type Oids. */
memcpy(ptr, mcvlist->types, sizeof(Oid) * ndims);
ptr += (sizeof(Oid) * ndims);
/* ensure alignment of the pointer (after the header fields) */
ptr = raw + MAXALIGN(ptr - raw);
/* Now it's safe to access the dimension info. */ /* Now it's safe to access the dimension info. */
info = (DimensionInfo *) ptr; info = (DimensionInfo *) ptr;
...@@ -894,9 +933,9 @@ statext_mcv_deserialize(bytea *data) ...@@ -894,9 +933,9 @@ statext_mcv_deserialize(bytea *data)
* (header, dimension info. items and deduplicated data). So do the final * (header, dimension info. items and deduplicated data). So do the final
* check on size. * check on size.
*/ */
if (VARSIZE_ANY_EXHDR(data) != expected_size) if (VARSIZE_ANY(data) != expected_size)
elog(ERROR, "invalid MCV size %zd (expected %zu)", elog(ERROR, "invalid MCV size %zd (expected %zu)",
VARSIZE_ANY_EXHDR(data), expected_size); VARSIZE_ANY(data), expected_size);
/* /*
* We need an array of Datum values for each dimension, so that we can * We need an array of Datum values for each dimension, so that we can
...@@ -1063,18 +1102,17 @@ statext_mcv_deserialize(bytea *data) ...@@ -1063,18 +1102,17 @@ statext_mcv_deserialize(bytea *data)
ptr += ITEM_SIZE(ndims); ptr += ITEM_SIZE(ndims);
/* check we're not overflowing the input */ /* check we're not overflowing the input */
Assert(ptr <= (char *) raw + VARSIZE_ANY_EXHDR(data)); Assert(ptr <= (char *) raw + VARSIZE_ANY(data));
} }
/* check that we processed all the data */ /* check that we processed all the data */
Assert(ptr == raw + VARSIZE_ANY_EXHDR(data)); Assert(ptr == raw + VARSIZE_ANY(data));
/* release the buffers used for mapping */ /* release the buffers used for mapping */
for (dim = 0; dim < ndims; dim++) for (dim = 0; dim < ndims; dim++)
pfree(map[dim]); pfree(map[dim]);
pfree(map); pfree(map);
pfree(raw);
return mcvlist; return mcvlist;
} }
......
...@@ -53,6 +53,6 @@ ...@@ -53,6 +53,6 @@
*/ */
/* yyyymmddN */ /* yyyymmddN */
#define CATALOG_VERSION_NO 201904021 #define CATALOG_VERSION_NO 201904031
#endif #endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment