From: Tom Lane Date: Wed, 3 Jan 2018 02:23:02 +0000 (-0500) Subject: Ensure proper alignment of tuples in HashMemoryChunkData buffers. X-Git-Tag: REL_11_BETA1~1014 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5dc692f78d3bee1e86d095a9e8d9242b44f78b01;p=postgresql Ensure proper alignment of tuples in HashMemoryChunkData buffers. The previous coding relied (without any documentation) on the data[] member of HashMemoryChunkData being at a MAXALIGN'ed offset. If it was not, the tuples would not be maxaligned either, leading to failures on alignment-picky machines. While there seems to be no live bug on any platform we support, this is clearly pretty fragile: any addition to or rearrangement of the fields in HashMemoryChunkData could break it. Let's remove the hazard by getting rid of the data[] member and instead using pointer arithmetic with an explicitly maxalign'ed offset. Discussion: https://postgr.es/m/14483.1514938129@sss.pgh.pa.us --- diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c index 4e1a2806b5..38a84cc14c 100644 --- a/src/backend/executor/nodeHash.c +++ b/src/backend/executor/nodeHash.c @@ -979,7 +979,7 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable) /* process all tuples stored in this chunk (and then free it) */ while (idx < oldchunks->used) { - HashJoinTuple hashTuple = (HashJoinTuple) (oldchunks->data + idx); + HashJoinTuple hashTuple = (HashJoinTuple) (HASH_CHUNK_DATA(oldchunks) + idx); MinimalTuple tuple = HJTUPLE_MINTUPLE(hashTuple); int hashTupleSize = (HJTUPLE_OVERHEAD + tuple->t_len); int bucketno; @@ -1285,7 +1285,7 @@ ExecParallelHashRepartitionFirst(HashJoinTable hashtable) /* Repartition all tuples in this chunk. */ while (idx < chunk->used) { - HashJoinTuple hashTuple = (HashJoinTuple) (chunk->data + idx); + HashJoinTuple hashTuple = (HashJoinTuple) (HASH_CHUNK_DATA(chunk) + idx); MinimalTuple tuple = HJTUPLE_MINTUPLE(hashTuple); HashJoinTuple copyTuple; dsa_pointer shared; @@ -1469,7 +1469,7 @@ ExecHashIncreaseNumBuckets(HashJoinTable hashtable) while (idx < chunk->used) { - HashJoinTuple hashTuple = (HashJoinTuple) (chunk->data + idx); + HashJoinTuple hashTuple = (HashJoinTuple) (HASH_CHUNK_DATA(chunk) + idx); int bucketno; int batchno; @@ -1552,7 +1552,7 @@ ExecParallelHashIncreaseNumBuckets(HashJoinTable hashtable) while (idx < chunk->used) { - HashJoinTuple hashTuple = (HashJoinTuple) (chunk->data + idx); + HashJoinTuple hashTuple = (HashJoinTuple) (HASH_CHUNK_DATA(chunk) + idx); dsa_pointer shared = chunk_s + HASH_CHUNK_HEADER_SIZE + idx; int bucketno; int batchno; @@ -2651,17 +2651,16 @@ dense_alloc(HashJoinTable hashtable, Size size) size = MAXALIGN(size); /* - * If tuple size is larger than of 1/4 of chunk size, allocate a separate - * chunk. + * If tuple size is larger than threshold, allocate a separate chunk. */ if (size > HASH_CHUNK_THRESHOLD) { /* allocate new chunk and put it at the beginning of the list */ newChunk = (HashMemoryChunk) MemoryContextAlloc(hashtable->batchCxt, - offsetof(HashMemoryChunkData, data) + size); + HASH_CHUNK_HEADER_SIZE + size); newChunk->maxlen = size; - newChunk->used = 0; - newChunk->ntuples = 0; + newChunk->used = size; + newChunk->ntuples = 1; /* * Add this chunk to the list after the first existing chunk, so that @@ -2678,10 +2677,7 @@ dense_alloc(HashJoinTable hashtable, Size size) hashtable->chunks = newChunk; } - newChunk->used += size; - newChunk->ntuples += 1; - - return newChunk->data; + return HASH_CHUNK_DATA(newChunk); } /* @@ -2693,7 +2689,7 @@ dense_alloc(HashJoinTable hashtable, Size size) { /* allocate new chunk and put it at the beginning of the list */ newChunk = (HashMemoryChunk) MemoryContextAlloc(hashtable->batchCxt, - offsetof(HashMemoryChunkData, data) + HASH_CHUNK_SIZE); + HASH_CHUNK_HEADER_SIZE + HASH_CHUNK_SIZE); newChunk->maxlen = HASH_CHUNK_SIZE; newChunk->used = size; @@ -2702,11 +2698,11 @@ dense_alloc(HashJoinTable hashtable, Size size) newChunk->next.unshared = hashtable->chunks; hashtable->chunks = newChunk; - return newChunk->data; + return HASH_CHUNK_DATA(newChunk); } /* There is enough space in the current chunk, let's add the tuple */ - ptr = hashtable->chunks->data + hashtable->chunks->used; + ptr = HASH_CHUNK_DATA(hashtable->chunks) + hashtable->chunks->used; hashtable->chunks->used += size; hashtable->chunks->ntuples += 1; @@ -2751,7 +2747,7 @@ ExecParallelHashTupleAlloc(HashJoinTable hashtable, size_t size, chunk_shared = hashtable->current_chunk_shared; Assert(chunk == dsa_get_address(hashtable->area, chunk_shared)); *shared = chunk_shared + HASH_CHUNK_HEADER_SIZE + chunk->used; - result = (HashJoinTuple) (chunk->data + chunk->used); + result = (HashJoinTuple) (HASH_CHUNK_DATA(chunk) + chunk->used); chunk->used += size; Assert(chunk->used <= chunk->maxlen); @@ -2859,8 +2855,8 @@ ExecParallelHashTupleAlloc(HashJoinTable hashtable, size_t size, } LWLockRelease(&pstate->lock); - Assert(chunk->data == dsa_get_address(hashtable->area, *shared)); - result = (HashJoinTuple) chunk->data; + Assert(HASH_CHUNK_DATA(chunk) == dsa_get_address(hashtable->area, *shared)); + result = (HashJoinTuple) HASH_CHUNK_DATA(chunk); return result; } diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h index d8c82d4e7c..be83500b9d 100644 --- a/src/include/executor/hashjoin.h +++ b/src/include/executor/hashjoin.h @@ -117,7 +117,7 @@ typedef struct HashSkewBucket typedef struct HashMemoryChunkData { int ntuples; /* number of tuples stored in this chunk */ - size_t maxlen; /* size of the buffer holding the tuples */ + size_t maxlen; /* size of the chunk's tuple buffer */ size_t used; /* number of buffer bytes already used */ /* pointer to the next chunk (linked list) */ @@ -127,13 +127,19 @@ typedef struct HashMemoryChunkData dsa_pointer shared; } next; - char data[FLEXIBLE_ARRAY_MEMBER]; /* buffer allocated at the end */ + /* + * The chunk's tuple buffer starts after the HashMemoryChunkData struct, + * at offset HASH_CHUNK_HEADER_SIZE (which must be maxaligned). Note that + * that offset is not included in "maxlen" or "used". + */ } HashMemoryChunkData; typedef struct HashMemoryChunkData *HashMemoryChunk; #define HASH_CHUNK_SIZE (32 * 1024L) -#define HASH_CHUNK_HEADER_SIZE (offsetof(HashMemoryChunkData, data)) +#define HASH_CHUNK_HEADER_SIZE MAXALIGN(sizeof(HashMemoryChunkData)) +#define HASH_CHUNK_DATA(hc) (((char *) (hc)) + HASH_CHUNK_HEADER_SIZE) +/* tuples exceeding HASH_CHUNK_THRESHOLD bytes are put in their own chunk */ #define HASH_CHUNK_THRESHOLD (HASH_CHUNK_SIZE / 4) /*