From d85e0f366a347633f255b8d1031ab34733c5e147 Mon Sep 17 00:00:00 2001 From: Tomas Vondra Date: Fri, 29 Mar 2019 18:50:51 +0100 Subject: [PATCH] Fix memory alignment in pg_mcv_list serialization Blind attempt at fixing ia64, hppa an sparc builds. The serialized representation of MCV lists did not enforce proper memory alignment for internal fields, resulting in deserialization issues on platforms that are more sensitive to this (ia64, sparc and hppa). This forces a catalog version bump, because the layout of serialized pg_mcv_list changes. Broken since 7300a699. --- src/backend/statistics/mcv.c | 114 ++++++++++++++++++++----------- src/include/catalog/catversion.h | 2 +- 2 files changed, 77 insertions(+), 39 deletions(-) diff --git a/src/backend/statistics/mcv.c b/src/backend/statistics/mcv.c index ee58127832..e90a03fdf7 100644 --- a/src/backend/statistics/mcv.c +++ b/src/backend/statistics/mcv.c @@ -451,9 +451,9 @@ statext_mcv_load(Oid mvoid) * * The overall structure of the serialized representation looks like this: * - * +--------+----------------+---------------------+-------+ - * | header | dimension info | deduplicated values | items | - * +--------+----------------+---------------------+-------+ + * +---------------+----------------+---------------------+-------+ + * | header fields | dimension info | deduplicated values | items | + * +---------------+----------------+---------------------+-------+ * * Where dimension info stores information about type of K-th attribute (e.g. * typlen, typbyval and length of deduplicated values). Deduplicated values @@ -492,6 +492,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) /* serialized items (indexes into arrays, etc.) */ bytea *output; + char *raw; char *ptr; /* values per dimension (and number of non-NULL values) */ @@ -593,8 +594,12 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) info[dim].nbytes = 0; for (i = 0; i < info[dim].nvalues; i++) { + Size len; + values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i])); - info[dim].nbytes += VARSIZE_ANY(values[dim][i]); + + len = VARSIZE_ANY(values[dim][i]); + info[dim].nbytes += MAXALIGN(len); } } else if (info[dim].typlen == -2) /* cstring */ @@ -602,9 +607,13 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) info[dim].nbytes = 0; for (i = 0; i < info[dim].nvalues; i++) { + Size len; + /* c-strings include terminator, so +1 byte */ values[dim][i] = PointerGetDatum(PG_DETOAST_DATUM(values[dim][i])); - info[dim].nbytes += strlen(DatumGetCString(values[dim][i])) + 1; + + len = strlen(DatumGetCString(values[dim][i])) + 1; + info[dim].nbytes += MAXALIGN(len); } } @@ -617,20 +626,22 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) * whole serialized MCV list (varlena header, MCV header, dimension info * for each attribute, deduplicated values and items). */ - total_length = VARHDRSZ + offsetof(MCVList, items) - + (ndims * sizeof(DimensionInfo)) - + (mcvlist->nitems * itemsize); + total_length = offsetof(MCVList, items) + + MAXALIGN(ndims * sizeof(DimensionInfo)); /* add space for the arrays of deduplicated values */ for (i = 0; i < ndims; i++) - total_length += info[i].nbytes; + total_length += MAXALIGN(info[i].nbytes); - /* allocate space for the whole serialized MCV list */ - output = (bytea *) palloc(total_length); - SET_VARSIZE(output, total_length); + /* and finally the items (no additional alignment needed) */ + total_length += mcvlist->nitems * itemsize; - /* 'ptr' points to the current position in the output buffer */ - ptr = VARDATA(output); + /* + * Allocate space for the whole serialized MCV list (we'll skip bytes, + * so we set them to zero to make the result more compressible). + */ + raw = palloc0(total_length); + ptr = raw; /* copy the MCV list header */ memcpy(ptr, mcvlist, offsetof(MCVList, items)); @@ -638,7 +649,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) /* store information about the attributes */ memcpy(ptr, info, sizeof(DimensionInfo) * ndims); - ptr += sizeof(DimensionInfo) * ndims; + ptr += MAXALIGN(sizeof(DimensionInfo) * ndims); /* Copy the deduplicated values for all attributes to the output. */ for (dim = 0; dim < ndims; dim++) @@ -670,6 +681,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) } else if (info[dim].typlen > 0) /* pased by reference */ { + /* no special alignment needed, treated as char array */ memcpy(ptr, DatumGetPointer(value), info[dim].typlen); ptr += info[dim].typlen; } @@ -678,14 +690,14 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) int len = VARSIZE_ANY(value); memcpy(ptr, DatumGetPointer(value), len); - ptr += len; + ptr += MAXALIGN(len); } else if (info[dim].typlen == -2) /* cstring */ { Size len = strlen(DatumGetCString(value)) + 1; /* terminator */ memcpy(ptr, DatumGetCString(value), len); - ptr += len; + ptr += MAXALIGN(len); } /* no underflows or overflows */ @@ -694,6 +706,9 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) /* we should get exactly nbytes of data for this dimension */ Assert((ptr - start) == info[dim].nbytes); + + /* make sure the pointer is aligned correctly after each dimension */ + ptr = raw + MAXALIGN(ptr - raw); } /* Serialize the items, with uint16 indexes instead of the values. */ @@ -702,7 +717,7 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) MCVItem *mcvitem = &mcvlist->items[i]; /* don't write beyond the allocated space */ - Assert(ptr <= (char *) output + total_length - itemsize); + Assert(ptr <= raw + total_length - itemsize); /* reset the item (we only allocate it once and reuse it) */ memset(item, 0, itemsize); @@ -741,12 +756,19 @@ statext_mcv_serialize(MCVList * mcvlist, VacAttrStats **stats) } /* at this point we expect to match the total_length exactly */ - Assert((ptr - (char *) output) == total_length); + Assert((ptr - raw) == total_length); pfree(item); pfree(values); pfree(counts); + output = (bytea *) palloc(VARHDRSZ + total_length); + SET_VARSIZE(output, VARHDRSZ + total_length); + + memcpy(VARDATA_ANY(output), raw, total_length); + + pfree(raw); + return output; } @@ -764,6 +786,7 @@ statext_mcv_deserialize(bytea *data) i; Size expected_size; MCVList *mcvlist; + char *raw; char *ptr; int ndims, @@ -781,6 +804,7 @@ statext_mcv_deserialize(bytea *data) Size datalen; char *dataptr; char *valuesptr; + char *isnullptr; if (data == NULL) return NULL; @@ -797,7 +821,10 @@ statext_mcv_deserialize(bytea *data) mcvlist = (MCVList *) palloc0(offsetof(MCVList, items)); /* initialize pointer to the data part (skip the varlena header) */ - ptr = VARDATA_ANY(data); + raw = palloc(VARSIZE_ANY_EXHDR(data)); + ptr = raw; + + memcpy(raw, VARDATA_ANY(data), VARSIZE_ANY_EXHDR(data)); /* get the header and perform further sanity checks */ memcpy(mcvlist, ptr, offsetof(MCVList, items)); @@ -848,7 +875,7 @@ statext_mcv_deserialize(bytea *data) /* Now it's safe to access the dimension info. */ info = (DimensionInfo *) ptr; - ptr += ndims * sizeof(DimensionInfo); + ptr += MAXALIGN(ndims * sizeof(DimensionInfo)); /* account for the value arrays */ for (dim = 0; dim < ndims; dim++) @@ -860,7 +887,7 @@ statext_mcv_deserialize(bytea *data) Assert(info[dim].nvalues >= 0); Assert(info[dim].nbytes >= 0); - expected_size += info[dim].nbytes; + expected_size += MAXALIGN(info[dim].nbytes); } /* @@ -890,7 +917,7 @@ statext_mcv_deserialize(bytea *data) /* space needed for a copy of data for by-ref types */ if (!info[dim].typbyval) - datalen += info[dim].nbytes; + datalen += MAXALIGN(info[dim].nbytes); } /* @@ -899,19 +926,25 @@ statext_mcv_deserialize(bytea *data) * original data - it may disappear while we're still using the MCV list, * e.g. due to catcache release. Only needed for by-ref types. */ - mcvlen = offsetof(MCVList, items) + - +(sizeof(MCVItem) * nitems) /* array of MCVItem */ - + ((sizeof(Datum) + sizeof(bool)) * ndims * nitems) + - +datalen; /* by-ref data */ + mcvlen = MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems)); + + /* arrays of values and isnull flags for all MCV items */ + mcvlen += MAXALIGN(sizeof(Datum) * ndims * nitems); + mcvlen += MAXALIGN(sizeof(bool) * ndims * nitems); + /* we don't quite need to align this, but it makes some assers easier */ + mcvlen += MAXALIGN(datalen); + + /* now resize the deserialized MCV list, and compute pointers to parts */ mcvlist = repalloc(mcvlist, mcvlen); - /* pointer to the beginning of values/isnull space */ - valuesptr = (char *) mcvlist + offsetof(MCVList, items) - + (sizeof(MCVItem) * nitems); + /* pointer to the beginning of values/isnull arrays */ + valuesptr = (char *) mcvlist + + MAXALIGN(offsetof(MCVList, items) + (sizeof(MCVItem) * nitems)); + + isnullptr = valuesptr + (MAXALIGN(sizeof(Datum) * ndims * nitems)); - /* get pointer where to store the data */ - dataptr = (char *) mcvlist + (mcvlen - datalen); + dataptr = isnullptr + (MAXALIGN(sizeof(bool) * ndims * nitems)); /* * Build mapping (index => value) for translating the serialized data into @@ -963,11 +996,11 @@ statext_mcv_deserialize(bytea *data) Size len = VARSIZE_ANY(ptr); memcpy(dataptr, ptr, len); - ptr += len; + ptr += MAXALIGN(len); /* just point into the array */ map[dim][i] = PointerGetDatum(dataptr); - dataptr += len; + dataptr += MAXALIGN(len); } } else if (info[dim].typlen == -2) @@ -978,11 +1011,11 @@ statext_mcv_deserialize(bytea *data) Size len = (strlen(ptr) + 1); /* don't forget the \0 */ memcpy(dataptr, ptr, len); - ptr += len; + ptr += MAXALIGN(len); /* just point into the array */ map[dim][i] = PointerGetDatum(dataptr); - dataptr += len; + dataptr += MAXALIGN(len); } } @@ -995,6 +1028,9 @@ statext_mcv_deserialize(bytea *data) /* check we consumed input data for this dimension exactly */ Assert(ptr == (start + info[dim].nbytes)); + + /* ensure proper alignment of the data */ + ptr = raw + MAXALIGN(ptr - raw); } /* we should have also filled the MCV list exactly */ @@ -1027,16 +1063,18 @@ statext_mcv_deserialize(bytea *data) ptr += ITEM_SIZE(ndims); /* check we're not overflowing the input */ - Assert(ptr <= (char *) data + VARSIZE_ANY(data)); + Assert(ptr <= (char *) raw + VARSIZE_ANY_EXHDR(data)); } /* check that we processed all the data */ - Assert(ptr == (char *) data + VARSIZE_ANY(data)); + Assert(ptr == raw + VARSIZE_ANY_EXHDR(data)); /* release the buffers used for mapping */ for (dim = 0; dim < ndims; dim++) pfree(map[dim]); + pfree(map); + pfree(raw); return mcvlist; } diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index f01087614e..485cf422d9 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201903271 +#define CATALOG_VERSION_NO 201903291 #endif -- 2.40.0