Commit d85e0f36 authored by Tomas Vondra's avatar Tomas Vondra

Fix memory alignment in pg_mcv_list serialization

Blind attempt at fixing ia64, hppa an sparc builds.

The serialized representation of MCV lists did not enforce proper memory
alignment for internal fields, resulting in deserialization issues on
platforms that are more sensitive to this (ia64, sparc and hppa).

This forces a catalog version bump, because the layout of serialized
pg_mcv_list changes.

Broken since 7300a699.
parent d3a5fc17
...@@ -451,9 +451,9 @@ statext_mcv_load(Oid mvoid) ...@@ -451,9 +451,9 @@ statext_mcv_load(Oid mvoid)
* *
* The overall structure of the serialized representation looks like this: * The overall structure of the serialized representation looks like this:
* *
* +--------+----------------+---------------------+-------+ * +---------------+----------------+---------------------+-------+
* | header | dimension info | deduplicated values | items | * | header fields | dimension info | deduplicated values | items |
* +--------+----------------+---------------------+-------+ * +---------------+----------------+---------------------+-------+
* *
* Where dimension info stores information about type of K-th attribute (e.g. * Where dimension info stores information about type of K-th attribute (e.g.
* typlen, typbyval and length of deduplicated values). Deduplicated values * typlen, typbyval and length of deduplicated values). Deduplicated values
...@@ -492,6 +492,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) ...@@ -492,6 +492,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
/* serialized items (indexes into arrays, etc.) */ /* serialized items (indexes into arrays, etc.) */
bytea *output; bytea *output;
char *raw;
char *ptr; char *ptr;
/* values per dimension (and number of non-NULL values) */ /* values per dimension (and number of non-NULL values) */
...@@ -593,8 +594,12 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) ...@@ -593,8 +594,12 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
info[dim].nbytes = 0; info[dim].nbytes = 0;
for (i = 0; i < info[dim].nvalues; i++) for (i = 0; i < info[dim].nvalues; i++)
{ {
Size len;
values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i])); values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
info[dim].nbytes += VARSIZE_ANY(values[dim][i]);
len = VARSIZE_ANY(values[dim][i]);
info[dim].nbytes += MAXALIGN(len);
} }
} }
else if (info[dim].typlen == -2) /* cstring */ else if (info[dim].typlen == -2) /* cstring */
...@@ -602,9 +607,13 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) ...@@ -602,9 +607,13 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
info[dim].nbytes = 0; info[dim].nbytes = 0;
for (i = 0; i < info[dim].nvalues; i++) for (i = 0; i < info[dim].nvalues; i++)
{ {
Size len;
/* c-strings include terminator, so +1 byte */ /* c-strings include terminator, so +1 byte */
values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i])); values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i]));
info[dim].nbytes += strlen(DatumGetCString(values[dim][i])) + 1;
len = strlen(DatumGetCString(values[dim][i])) + 1;
info[dim].nbytes += MAXALIGN(len);
} }
} }
...@@ -617,20 +626,22 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) ...@@ -617,20 +626,22 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
* whole serialized MCV list (varlena header, MCV header, dimension info * whole serialized MCV list (varlena header, MCV header, dimension info
* for each attribute, deduplicated values and items). * for each attribute, deduplicated values and items).
*/ */
total_length = VARHDRSZ + offsetof(MCVList, items) total_length = offsetof(MCVList, items)
+ (ndims * sizeof(DimensionInfo)) + MAXALIGN(ndims * sizeof(DimensionInfo));
+ (mcvlist->nitems * itemsize);
/* add space for the arrays of deduplicated values */ /* add space for the arrays of deduplicated values */
for (i = 0; i < ndims; i++) for (i = 0; i < ndims; i++)
total_length += info[i].nbytes; total_length += MAXALIGN(info[i].nbytes);
/* allocate space for the whole serialized MCV list */ /* and finally the items (no additional alignment needed) */
output = (bytea *) palloc(total_length); total_length += mcvlist->nitems * itemsize;
SET_VARSIZE(output, total_length);
/* 'ptr' points to the current position in the output buffer */ /*
ptr = VARDATA(output); * Allocate space for the whole serialized MCV list (we'll skip bytes,
* so we set them to zero to make the result more compressible).
*/
raw = palloc0(total_length);
ptr = raw;
/* copy the MCV list header */ /* copy the MCV list header */
memcpy(ptr, mcvlist, offsetof(MCVList, items)); memcpy(ptr, mcvlist, offsetof(MCVList, items));
...@@ -638,7 +649,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) ...@@ -638,7 +649,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
/* store information about the attributes */ /* store information about the attributes */
memcpy(ptr, info, sizeof(DimensionInfo) * ndims); memcpy(ptr, info, sizeof(DimensionInfo) * ndims);
ptr += sizeof(DimensionInfo) * ndims; ptr += MAXALIGN(sizeof(DimensionInfo) * ndims);
/* Copy the deduplicated values for all attributes to the output. */ /* Copy the deduplicated values for all attributes to the output. */
for (dim = 0; dim < ndims; dim++) for (dim = 0; dim < ndims; dim++)
...@@ -670,6 +681,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) ...@@ -670,6 +681,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
} }
else if (info[dim].typlen > 0) /* pased by reference */ else if (info[dim].typlen > 0) /* pased by reference */
{ {
/* no special alignment needed, treated as char array */
memcpy(ptr, DatumGetPointer(value), info[dim].typlen); memcpy(ptr, DatumGetPointer(value), info[dim].typlen);
ptr += info[dim].typlen; ptr += info[dim].typlen;
} }
...@@ -678,14 +690,14 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) ...@@ -678,14 +690,14 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
int len = VARSIZE_ANY(value); int len = VARSIZE_ANY(value);
memcpy(ptr, DatumGetPointer(value), len); memcpy(ptr, DatumGetPointer(value), len);
ptr += len; ptr += MAXALIGN(len);
} }
else if (info[dim].typlen == -2) /* cstring */ else if (info[dim].typlen == -2) /* cstring */
{ {
Size len = strlen(DatumGetCString(value)) + 1; /* terminator */ Size len = strlen(DatumGetCString(value)) + 1; /* terminator */
memcpy(ptr, DatumGetCString(value), len); memcpy(ptr, DatumGetCString(value), len);
ptr += len; ptr += MAXALIGN(len);
} }
/* no underflows or overflows */ /* no underflows or overflows */
...@@ -694,6 +706,9 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) ...@@ -694,6 +706,9 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
/* we should get exactly nbytes of data for this dimension */ /* we should get exactly nbytes of data for this dimension */
Assert((ptr - start) == info[dim].nbytes); Assert((ptr - start) == info[dim].nbytes);
/* make sure the pointer is aligned correctly after each dimension */
ptr = raw + MAXALIGN(ptr - raw);
} }
/* Serialize the items, with uint16 indexes instead of the values. */ /* Serialize the items, with uint16 indexes instead of the values. */
...@@ -702,7 +717,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) ...@@ -702,7 +717,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
MCVItem *mcvitem = &mcvlist->items[i]; MCVItem *mcvitem = &mcvlist->items[i];
/* don't write beyond the allocated space */ /* don't write beyond the allocated space */
Assert(ptr <= (char *) output + total_length - itemsize); Assert(ptr <= raw + total_length - itemsize);
/* reset the item (we only allocate it once and reuse it) */ /* reset the item (we only allocate it once and reuse it) */
memset(item, 0, itemsize); memset(item, 0, itemsize);
...@@ -741,12 +756,19 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) ...@@ -741,12 +756,19 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats)
} }
/* at this point we expect to match the total_length exactly */ /* at this point we expect to match the total_length exactly */
Assert((ptr - (char *) output) == total_length); Assert((ptr - raw) == total_length);
pfree(item); pfree(item);
pfree(values); pfree(values);
pfree(counts); pfree(counts);
output = (bytea *) palloc(VARHDRSZ + total_length);
SET_VARSIZE(output, VARHDRSZ + total_length);
memcpy(VARDATA_ANY(output), raw, total_length);
pfree(raw);
return output; return output;
} }
...@@ -764,6 +786,7 @@ statext_mcv_deserialize(bytea *data) ...@@ -764,6 +786,7 @@ statext_mcv_deserialize(bytea *data)
i; i;
Size expected_size; Size expected_size;
MCVList *mcvlist; MCVList *mcvlist;
char *raw;
char *ptr; char *ptr;
int ndims, int ndims,
...@@ -781,6 +804,7 @@ statext_mcv_deserialize(bytea *data) ...@@ -781,6 +804,7 @@ statext_mcv_deserialize(bytea *data)
Size datalen; Size datalen;
char *dataptr; char *dataptr;
char *valuesptr; char *valuesptr;
char *isnullptr;
if (data == NULL) if (data == NULL)
return NULL; return NULL;
...@@ -797,7 +821,10 @@ statext_mcv_deserialize(bytea *data) ...@@ -797,7 +821,10 @@ statext_mcv_deserialize(bytea *data)
mcvlist = (MCVList *) palloc0(offsetof(MCVList, items)); mcvlist = (MCVList *) palloc0(offsetof(MCVList, items));
/* initialize pointer to the data part (skip the varlena header) */ /* initialize pointer to the data part (skip the varlena header) */
ptr = VARDATA_ANY(data); raw = palloc(VARSIZE_ANY_EXHDR(data));
ptr = raw;
memcpy(raw, VARDATA_ANY(data), VARSIZE_ANY_EXHDR(data));
/* get the header and perform further sanity checks */ /* get the header and perform further sanity checks */
memcpy(mcvlist, ptr, offsetof(MCVList, items)); memcpy(mcvlist, ptr, offsetof(MCVList, items));
...@@ -848,7 +875,7 @@ statext_mcv_deserialize(bytea *data) ...@@ -848,7 +875,7 @@ statext_mcv_deserialize(bytea *data)
/* Now it's safe to access the dimension info. */ /* Now it's safe to access the dimension info. */
info = (DimensionInfo *) ptr; info = (DimensionInfo *) ptr;
ptr += ndims * sizeof(DimensionInfo); ptr += MAXALIGN(ndims * sizeof(DimensionInfo));
/* account for the value arrays */ /* account for the value arrays */
for (dim = 0; dim < ndims; dim++) for (dim = 0; dim < ndims; dim++)
...@@ -860,7 +887,7 @@ statext_mcv_deserialize(bytea *data) ...@@ -860,7 +887,7 @@ statext_mcv_deserialize(bytea *data)
Assert(info[dim].nvalues >= 0); Assert(info[dim].nvalues >= 0);
Assert(info[dim].nbytes >= 0); Assert(info[dim].nbytes >= 0);
expected_size += info[dim].nbytes; expected_size += MAXALIGN(info[dim].nbytes);
} }
/* /*
...@@ -890,7 +917,7 @@ statext_mcv_deserialize(bytea *data) ...@@ -890,7 +917,7 @@ statext_mcv_deserialize(bytea *data)
/* space needed for a copy of data for by-ref types */ /* space needed for a copy of data for by-ref types */
if (!info[dim].typbyval) if (!info[dim].typbyval)
datalen += info[dim].nbytes; datalen += MAXALIGN(info[dim].nbytes);
} }
/* /*
...@@ -899,19 +926,25 @@ statext_mcv_deserialize(bytea *data) ...@@ -899,19 +926,25 @@ statext_mcv_deserialize(bytea *data)
* original data - it may disappear while we're still using the MCV list, * original data - it may disappear while we're still using the MCV list,
* e.g. due to catcache release. Only needed for by-ref types. * e.g. due to catcache release. Only needed for by-ref types.
*/ */
mcvlen = offsetof(MCVList, items) + mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
+(sizeof(MCVItem) * nitems) /* array of MCVItem */
+ ((sizeof(Datum) + sizeof(bool)) * ndims * nitems) + /* arrays of values and isnull flags for all MCV items */
+datalen; /* by-ref data */ mcvlen += MAXALIGN(sizeof(Datum) * ndims * nitems);
mcvlen += MAXALIGN(sizeof(bool) * ndims * nitems);
/* we don't quite need to align this, but it makes some assers easier */
mcvlen += MAXALIGN(datalen);
/* now resize the deserialized MCV list, and compute pointers to parts */
mcvlist = repalloc(mcvlist, mcvlen); mcvlist = repalloc(mcvlist, mcvlen);
/* pointer to the beginning of values/isnull space */ /* pointer to the beginning of values/isnull arrays */
valuesptr = (char *) mcvlist + offsetof(MCVList, items) valuesptr = (char *) mcvlist
+ (sizeof(MCVItem) * nitems); + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems));
isnullptr = valuesptr + (MAXALIGN(sizeof(Datum) * ndims * nitems));
/* get pointer where to store the data */ dataptr = isnullptr + (MAXALIGN(sizeof(bool) * ndims * nitems));
dataptr = (char *) mcvlist + (mcvlen - datalen);
/* /*
* Build mapping (index => value) for translating the serialized data into * Build mapping (index => value) for translating the serialized data into
...@@ -963,11 +996,11 @@ statext_mcv_deserialize(bytea *data) ...@@ -963,11 +996,11 @@ statext_mcv_deserialize(bytea *data)
Size len = VARSIZE_ANY(ptr); Size len = VARSIZE_ANY(ptr);
memcpy(dataptr, ptr, len); memcpy(dataptr, ptr, len);
ptr += len; ptr += MAXALIGN(len);
/* just point into the array */ /* just point into the array */
map[dim][i] = PointerGetDatum(dataptr); map[dim][i] = PointerGetDatum(dataptr);
dataptr += len; dataptr += MAXALIGN(len);
} }
} }
else if (info[dim].typlen == -2) else if (info[dim].typlen == -2)
...@@ -978,11 +1011,11 @@ statext_mcv_deserialize(bytea *data) ...@@ -978,11 +1011,11 @@ statext_mcv_deserialize(bytea *data)
Size len = (strlen(ptr) + 1); /* don't forget the \0 */ Size len = (strlen(ptr) + 1); /* don't forget the \0 */
memcpy(dataptr, ptr, len); memcpy(dataptr, ptr, len);
ptr += len; ptr += MAXALIGN(len);
/* just point into the array */ /* just point into the array */
map[dim][i] = PointerGetDatum(dataptr); map[dim][i] = PointerGetDatum(dataptr);
dataptr += len; dataptr += MAXALIGN(len);
} }
} }
...@@ -995,6 +1028,9 @@ statext_mcv_deserialize(bytea *data) ...@@ -995,6 +1028,9 @@ statext_mcv_deserialize(bytea *data)
/* check we consumed input data for this dimension exactly */ /* check we consumed input data for this dimension exactly */
Assert(ptr == (start + info[dim].nbytes)); Assert(ptr == (start + info[dim].nbytes));
/* ensure proper alignment of the data */
ptr = raw + MAXALIGN(ptr - raw);
} }
/* we should have also filled the MCV list exactly */ /* we should have also filled the MCV list exactly */
...@@ -1027,16 +1063,18 @@ statext_mcv_deserialize(bytea *data) ...@@ -1027,16 +1063,18 @@ statext_mcv_deserialize(bytea *data)
ptr += ITEM_SIZE(ndims); ptr += ITEM_SIZE(ndims);
/* check we're not overflowing the input */ /* check we're not overflowing the input */
Assert(ptr <= (char *) data + VARSIZE_ANY(data)); Assert(ptr <= (char *) raw + VARSIZE_ANY_EXHDR(data));
} }
/* check that we processed all the data */ /* check that we processed all the data */
Assert(ptr == (char *) data + VARSIZE_ANY(data)); Assert(ptr == raw + VARSIZE_ANY_EXHDR(data));
/* release the buffers used for mapping */ /* release the buffers used for mapping */
for (dim = 0; dim < ndims; dim++) for (dim = 0; dim < ndims; dim++)
pfree(map[dim]); pfree(map[dim]);
pfree(map); pfree(map);
pfree(raw);
return mcvlist; return mcvlist;
} }
......
...@@ -53,6 +53,6 @@ ...@@ -53,6 +53,6 @@
*/ */
/* yyyymmddN */ /* yyyymmddN */
#define CATALOG_VERSION_NO 201903271 #define CATALOG_VERSION_NO 201903291
#endif #endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment