]> granicus.if.org Git - postgresql/commitdiff
Avoid using potentially-under-aligned page buffers.
authorTom Lane <tgl@sss.pgh.pa.us>
Sat, 1 Sep 2018 19:27:12 +0000 (15:27 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Sat, 1 Sep 2018 19:27:17 +0000 (15:27 -0400)
There's a project policy against using plain "char buf[BLCKSZ]" local
or static variables as page buffers; preferred style is to palloc or
malloc each buffer to ensure it is MAXALIGN'd.  However, that policy's
been ignored in an increasing number of places.  We've apparently got
away with it so far, probably because (a) relatively few people use
platforms on which misalignment causes core dumps and/or (b) the
variables chance to be sufficiently aligned anyway.  But this is not
something to rely on.  Moreover, even if we don't get a core dump,
we might be paying a lot of cycles for misaligned accesses.

To fix, invent new union types PGAlignedBlock and PGAlignedXLogBlock
that the compiler must allocate with sufficient alignment, and use
those in place of plain char arrays.

I used these types even for variables where there's no risk of a
misaligned access, since ensuring proper alignment should make
kernel data transfers faster.  I also changed some places where
we had been palloc'ing short-lived buffers, for coding style
uniformity and to save palloc/pfree overhead.

Since this seems to be a live portability hazard (despite the lack
of field reports), back-patch to all supported versions.

Patch by me; thanks to Michael Paquier for review.

Discussion: https://postgr.es/m/1535618100.1286.3.camel@credativ.de

24 files changed:
contrib/bloom/blinsert.c
contrib/pg_prewarm/pg_prewarm.c
contrib/pg_standby/pg_standby.c
src/backend/access/gin/ginentrypage.c
src/backend/access/gin/ginfast.c
src/backend/access/hash/hashpage.c
src/backend/access/heap/heapam.c
src/backend/access/heap/visibilitymap.c
src/backend/access/transam/generic_xlog.c
src/backend/access/transam/xlog.c
src/backend/access/transam/xloginsert.c
src/backend/access/transam/xlogreader.c
src/backend/commands/tablecmds.c
src/backend/replication/walsender.c
src/backend/storage/file/buffile.c
src/backend/storage/freespace/freespace.c
src/backend/utils/sort/logtape.c
src/bin/pg_basebackup/walmethods.c
src/bin/pg_resetwal/pg_resetwal.c
src/bin/pg_rewind/copy_fetch.c
src/bin/pg_upgrade/file.c
src/bin/pg_verify_checksums/pg_verify_checksums.c
src/bin/pg_waldump/pg_waldump.c
src/include/c.h

index 4afdea7c9a302fc5fd50e33c2f0e29a7da4aa9f0..9f223d3b2a7bd4c4247016b11557850b8c25b9df 100644 (file)
@@ -36,7 +36,7 @@ typedef struct
        int64           indtuples;              /* total number of tuples indexed */
        MemoryContext tmpCtx;           /* temporary memory context reset after each
                                                                 * tuple */
-       char            data[BLCKSZ];   /* cached page */
+       PGAlignedBlock data;            /* cached page */
        int                     count;                  /* number of tuples in cached page */
 } BloomBuildState;
 
@@ -52,7 +52,7 @@ flushCachedPage(Relation index, BloomBuildState *buildstate)
 
        state = GenericXLogStart(index);
        page = GenericXLogRegisterBuffer(state, buffer, GENERIC_XLOG_FULL_IMAGE);
-       memcpy(page, buildstate->data, BLCKSZ);
+       memcpy(page, buildstate->data.data, BLCKSZ);
        GenericXLogFinish(state);
        UnlockReleaseBuffer(buffer);
 }
@@ -63,8 +63,8 @@ flushCachedPage(Relation index, BloomBuildState *buildstate)
 static void
 initCachedPage(BloomBuildState *buildstate)
 {
-       memset(buildstate->data, 0, BLCKSZ);
-       BloomInitPage(buildstate->data, 0);
+       memset(buildstate->data.data, 0, BLCKSZ);
+       BloomInitPage(buildstate->data.data, 0);
        buildstate->count = 0;
 }
 
@@ -84,7 +84,7 @@ bloomBuildCallback(Relation index, HeapTuple htup, Datum *values,
        itup = BloomFormTuple(&buildstate->blstate, &htup->t_self, values, isnull);
 
        /* Try to add next item to cached page */
-       if (BloomPageAddItem(&buildstate->blstate, buildstate->data, itup))
+       if (BloomPageAddItem(&buildstate->blstate, buildstate->data.data, itup))
        {
                /* Next item was added successfully */
                buildstate->count++;
@@ -98,7 +98,7 @@ bloomBuildCallback(Relation index, HeapTuple htup, Datum *values,
 
                initCachedPage(buildstate);
 
-               if (!BloomPageAddItem(&buildstate->blstate, buildstate->data, itup))
+               if (!BloomPageAddItem(&buildstate->blstate, buildstate->data.data, itup))
                {
                        /* We shouldn't be here since we're inserting to the empty page */
                        elog(ERROR, "could not add new bloom tuple to empty page");
index 3cbb7c2b889ac3953727840beea61e37ee9df42f..1f4bfb8c0d4b56fec8bcfa93dfaea350a3e95c2e 100644 (file)
@@ -36,7 +36,7 @@ typedef enum
        PREWARM_BUFFER
 } PrewarmType;
 
-static char blockbuffer[BLCKSZ];
+static PGAlignedBlock blockbuffer;
 
 /*
  * pg_prewarm(regclass, mode text, fork text,
@@ -178,7 +178,7 @@ pg_prewarm(PG_FUNCTION_ARGS)
                for (block = first_block; block <= last_block; ++block)
                {
                        CHECK_FOR_INTERRUPTS();
-                       smgrread(rel->rd_smgr, forkNumber, block, blockbuffer);
+                       smgrread(rel->rd_smgr, forkNumber, block, blockbuffer.data);
                        ++blocks_done;
                }
        }
index d840940e410fe043603deade3d433b71a987c175..ee1fbd7b3327924abc2a30583d8d56bb291c0812 100644 (file)
@@ -401,9 +401,7 @@ SetWALSegSize(void)
 {
        bool            ret_val = false;
        int                     fd;
-
-       /* malloc this buffer to ensure sufficient alignment: */
-       char       *buf = (char *) pg_malloc(XLOG_BLCKSZ);
+       PGAlignedXLogBlock buf;
 
        Assert(WalSegSz == -1);
 
@@ -411,14 +409,13 @@ SetWALSegSize(void)
        {
                fprintf(stderr, "%s: could not open WAL file \"%s\": %s\n",
                                progname, WALFilePath, strerror(errno));
-               pg_free(buf);
                return false;
        }
 
        errno = 0;
-       if (read(fd, buf, XLOG_BLCKSZ) == XLOG_BLCKSZ)
+       if (read(fd, buf.data, XLOG_BLCKSZ) == XLOG_BLCKSZ)
        {
-               XLogLongPageHeader longhdr = (XLogLongPageHeader) buf;
+               XLogLongPageHeader longhdr = (XLogLongPageHeader) buf.data;
 
                WalSegSz = longhdr->xlp_seg_size;
 
@@ -455,7 +452,6 @@ SetWALSegSize(void)
        fflush(stderr);
 
        close(fd);
-       pg_free(buf);
        return ret_val;
 }
 
index 810769718f4e63cfbe480425944dee143b154600..184cc0af3e69b342db36e602b6de7ce27bf2674b 100644 (file)
@@ -616,7 +616,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
        Page            lpage = PageGetTempPageCopy(BufferGetPage(origbuf));
        Page            rpage = PageGetTempPageCopy(BufferGetPage(origbuf));
        Size            pageSize = PageGetPageSize(lpage);
-       char            tupstore[2 * BLCKSZ];
+       PGAlignedBlock tupstore[2]; /* could need 2 pages' worth of tuples */
 
        entryPreparePage(btree, lpage, off, insertData, updateblkno);
 
@@ -625,7 +625,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
         * one after another in a temporary workspace.
         */
        maxoff = PageGetMaxOffsetNumber(lpage);
-       ptr = tupstore;
+       ptr = tupstore[0].data;
        for (i = FirstOffsetNumber; i <= maxoff; i++)
        {
                if (i == off)
@@ -658,7 +658,7 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
        GinInitPage(rpage, GinPageGetOpaque(lpage)->flags, pageSize);
        GinInitPage(lpage, GinPageGetOpaque(rpage)->flags, pageSize);
 
-       ptr = tupstore;
+       ptr = tupstore[0].data;
        maxoff++;
        lsize = 0;
 
index e32807e62ac194a4c3dde6bd73bc4688414790bf..ca2a32bd257c4f49a1afac83a2cc8dbdcaee3e50 100644 (file)
@@ -64,18 +64,15 @@ writeListPage(Relation index, Buffer buffer,
                                size = 0;
        OffsetNumber l,
                                off;
-       char       *workspace;
+       PGAlignedBlock workspace;
        char       *ptr;
 
-       /* workspace could be a local array; we use palloc for alignment */
-       workspace = palloc(BLCKSZ);
-
        START_CRIT_SECTION();
 
        GinInitBuffer(buffer, GIN_LIST);
 
        off = FirstOffsetNumber;
-       ptr = workspace;
+       ptr = workspace.data;
 
        for (i = 0; i < ntuples; i++)
        {
@@ -127,7 +124,7 @@ writeListPage(Relation index, Buffer buffer,
                XLogRegisterData((char *) &data, sizeof(ginxlogInsertListPage));
 
                XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
-               XLogRegisterBufData(0, workspace, size);
+               XLogRegisterBufData(0, workspace.data, size);
 
                recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE);
                PageSetLSN(page, recptr);
@@ -140,8 +137,6 @@ writeListPage(Relation index, Buffer buffer,
 
        END_CRIT_SECTION();
 
-       pfree(workspace);
-
        return freesize;
 }
 
index 3ec29a535689f11cdf64ed665071446d9260a1b2..69a9c4f1feed3e2bf6f75fd04b59f63a3ef53a52 100644 (file)
@@ -1000,7 +1000,7 @@ static bool
 _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
 {
        BlockNumber lastblock;
-       char            zerobuf[BLCKSZ];
+       PGAlignedBlock zerobuf;
        Page            page;
        HashPageOpaque ovflopaque;
 
@@ -1013,7 +1013,7 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
        if (lastblock < firstblock || lastblock == InvalidBlockNumber)
                return false;
 
-       page = (Page) zerobuf;
+       page = (Page) zerobuf.data;
 
        /*
         * Initialize the page.  Just zeroing the page won't work; see
@@ -1034,11 +1034,11 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
                log_newpage(&rel->rd_node,
                                        MAIN_FORKNUM,
                                        lastblock,
-                                       zerobuf,
+                                       zerobuf.data,
                                        true);
 
        RelationOpenSmgr(rel);
-       smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf, false);
+       smgrextend(rel->rd_smgr, MAIN_FORKNUM, lastblock, zerobuf.data, false);
 
        return true;
 }
index b8bfe23a8233c9d40bf3a0f25ec2ce2b91b5d4aa..56f1d82f962c43e44bc538f607b9b875972ca0e2 100644 (file)
@@ -2709,7 +2709,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
        HeapTuple  *heaptuples;
        int                     i;
        int                     ndone;
-       char       *scratch = NULL;
+       PGAlignedBlock scratch;
        Page            page;
        bool            needwal;
        Size            saveFreeSpace;
@@ -2726,14 +2726,6 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
                heaptuples[i] = heap_prepare_insert(relation, tuples[i],
                                                                                        xid, cid, options);
 
-       /*
-        * Allocate some memory to use for constructing the WAL record. Using
-        * palloc() within a critical section is not safe, so we allocate this
-        * beforehand.
-        */
-       if (needwal)
-               scratch = palloc(BLCKSZ);
-
        /*
         * We're about to do the actual inserts -- but check for conflict first,
         * to minimize the possibility of having to roll back work we've just
@@ -2826,7 +2818,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
                        uint8           info = XLOG_HEAP2_MULTI_INSERT;
                        char       *tupledata;
                        int                     totaldatalen;
-                       char       *scratchptr = scratch;
+                       char       *scratchptr = scratch.data;
                        bool            init;
                        int                     bufflags = 0;
 
@@ -2885,7 +2877,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
                                scratchptr += datalen;
                        }
                        totaldatalen = scratchptr - tupledata;
-                       Assert((scratchptr - scratch) < BLCKSZ);
+                       Assert((scratchptr - scratch.data) < BLCKSZ);
 
                        if (need_tuple_data)
                                xlrec->flags |= XLH_INSERT_CONTAINS_NEW_TUPLE;
@@ -2912,7 +2904,7 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
                                bufflags |= REGBUF_KEEP_DATA;
 
                        XLogBeginInsert();
-                       XLogRegisterData((char *) xlrec, tupledata - scratch);
+                       XLogRegisterData((char *) xlrec, tupledata - scratch.data);
                        XLogRegisterBuffer(0, buffer, REGBUF_STANDARD | bufflags);
 
                        XLogRegisterBufData(0, tupledata, totaldatalen);
index 239a10f5509730178c4c92bd7a80d7a468b6887f..695567b4b0d5edea59e800fb5754c420b6e2bbfb 100644 (file)
@@ -645,10 +645,9 @@ static void
 vm_extend(Relation rel, BlockNumber vm_nblocks)
 {
        BlockNumber vm_nblocks_now;
-       Page            pg;
+       PGAlignedBlock pg;
 
-       pg = (Page) palloc(BLCKSZ);
-       PageInit(pg, BLCKSZ, 0);
+       PageInit((Page) pg.data, BLCKSZ, 0);
 
        /*
         * We use the relation extension lock to lock out other backends trying to
@@ -679,10 +678,10 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
        /* Now extend the file */
        while (vm_nblocks_now < vm_nblocks)
        {
-               PageSetChecksumInplace(pg, vm_nblocks_now);
+               PageSetChecksumInplace((Page) pg.data, vm_nblocks_now);
 
                smgrextend(rel->rd_smgr, VISIBILITYMAP_FORKNUM, vm_nblocks_now,
-                                  (char *) pg, false);
+                                  pg.data, false);
                vm_nblocks_now++;
        }
 
@@ -699,6 +698,4 @@ vm_extend(Relation rel, BlockNumber vm_nblocks)
        rel->rd_smgr->smgr_vm_nblocks = vm_nblocks_now;
 
        UnlockRelationForExtension(rel, ExclusiveLock);
-
-       pfree(pg);
 }
index ce023548ae8d209841067089a4cf9c9865e9f55d..aa7ad725f4ec7ed936df5ad4717fe7e4569bc0c9 100644 (file)
@@ -61,14 +61,11 @@ typedef struct
 /* State of generic xlog record construction */
 struct GenericXLogState
 {
-       /*
-        * page's images. Should be first in this struct to have MAXALIGN'ed
-        * images addresses, because some code working with pages directly aligns
-        * addresses, not offsets from beginning of page
-        */
-       char            images[MAX_GENERIC_XLOG_PAGES * BLCKSZ];
+       /* Info about each page, see above */
        PageData        pages[MAX_GENERIC_XLOG_PAGES];
        bool            isLogged;
+       /* Page images (properly aligned) */
+       PGAlignedBlock images[MAX_GENERIC_XLOG_PAGES];
 };
 
 static void writeFragment(PageData *pageData, OffsetNumber offset,
@@ -251,12 +248,12 @@ computeDelta(PageData *pageData, Page curpage, Page targetpage)
 #ifdef WAL_DEBUG
        if (XLOG_DEBUG)
        {
-               char            tmp[BLCKSZ];
+               PGAlignedBlock tmp;
 
-               memcpy(tmp, curpage, BLCKSZ);
-               applyPageRedo(tmp, pageData->delta, pageData->deltaLen);
-               if (memcmp(tmp, targetpage, targetLower) != 0 ||
-                       memcmp(tmp + targetUpper, targetpage + targetUpper,
+               memcpy(tmp.data, curpage, BLCKSZ);
+               applyPageRedo(tmp.data, pageData->delta, pageData->deltaLen);
+               if (memcmp(tmp.data, targetpage, targetLower) != 0 ||
+                       memcmp(tmp.data + targetUpper, targetpage + targetUpper,
                                   BLCKSZ - targetUpper) != 0)
                        elog(ERROR, "result of generic xlog apply does not match");
        }
@@ -277,7 +274,7 @@ GenericXLogStart(Relation relation)
 
        for (i = 0; i < MAX_GENERIC_XLOG_PAGES; i++)
        {
-               state->pages[i].image = state->images + BLCKSZ * i;
+               state->pages[i].image = state->images[i].data;
                state->pages[i].buffer = InvalidBuffer;
        }
 
index 65db2e48d881d7421b6a2f26dede6ddbd9dd908a..85a7b285ec393033e0191cedcf10b69d6cd91c51 100644 (file)
@@ -3210,8 +3210,7 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
 {
        char            path[MAXPGPATH];
        char            tmppath[MAXPGPATH];
-       char            zbuffer_raw[XLOG_BLCKSZ + MAXIMUM_ALIGNOF];
-       char       *zbuffer;
+       PGAlignedXLogBlock zbuffer;
        XLogSegNo       installed_segno;
        XLogSegNo       max_segno;
        int                     fd;
@@ -3263,17 +3262,13 @@ XLogFileInit(XLogSegNo logsegno, bool *use_existent, bool use_lock)
         * fsync below) that all the indirect blocks are down on disk.  Therefore,
         * fdatasync(2) or O_DSYNC will be sufficient to sync future writes to the
         * log file.
-        *
-        * Note: ensure the buffer is reasonably well-aligned; this may save a few
-        * cycles transferring data to the kernel.
         */
-       zbuffer = (char *) MAXALIGN(zbuffer_raw);
-       memset(zbuffer, 0, XLOG_BLCKSZ);
+       memset(zbuffer.data, 0, XLOG_BLCKSZ);
        for (nbytes = 0; nbytes < wal_segment_size; nbytes += XLOG_BLCKSZ)
        {
                errno = 0;
                pgstat_report_wait_start(WAIT_EVENT_WAL_INIT_WRITE);
-               if ((int) write(fd, zbuffer, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
+               if ((int) write(fd, zbuffer.data, XLOG_BLCKSZ) != (int) XLOG_BLCKSZ)
                {
                        int                     save_errno = errno;
 
@@ -3380,7 +3375,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
 {
        char            path[MAXPGPATH];
        char            tmppath[MAXPGPATH];
-       char            buffer[XLOG_BLCKSZ];
+       PGAlignedXLogBlock buffer;
        int                     srcfd;
        int                     fd;
        int                     nbytes;
@@ -3423,7 +3418,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
                 * zeros.
                 */
                if (nread < sizeof(buffer))
-                       memset(buffer, 0, sizeof(buffer));
+                       memset(buffer.data, 0, sizeof(buffer));
 
                if (nread > 0)
                {
@@ -3432,7 +3427,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
                        if (nread > sizeof(buffer))
                                nread = sizeof(buffer);
                        pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_READ);
-                       r = read(srcfd, buffer, nread);
+                       r = read(srcfd, buffer.data, nread);
                        if (r != nread)
                        {
                                if (r < 0)
@@ -3450,7 +3445,7 @@ XLogFileCopy(XLogSegNo destsegno, TimeLineID srcTLI, XLogSegNo srcsegno,
                }
                errno = 0;
                pgstat_report_wait_start(WAIT_EVENT_WAL_COPY_WRITE);
-               if ((int) write(fd, buffer, sizeof(buffer)) != (int) sizeof(buffer))
+               if ((int) write(fd, buffer.data, sizeof(buffer)) != (int) sizeof(buffer))
                {
                        int                     save_errno = errno;
 
@@ -6540,8 +6535,11 @@ StartupXLOG(void)
        xlogreader->system_identifier = ControlFile->system_identifier;
 
        /*
-        * Allocate pages dedicated to WAL consistency checks, those had better be
-        * aligned.
+        * Allocate two page buffers dedicated to WAL consistency checks.  We do
+        * it this way, rather than just making static arrays, for two reasons:
+        * (1) no need to waste the storage in most instantiations of the backend;
+        * (2) a static char array isn't guaranteed to have any particular
+        * alignment, whereas palloc() will provide MAXALIGN'd storage.
         */
        replay_image_masked = (char *) palloc(BLCKSZ);
        master_image_masked = (char *) palloc(BLCKSZ);
index 5bea073a2b7e8af341a016d6e5c8b084efe0fbcc..34d4db42977e74f3d55987828696503d31e99b3d 100644 (file)
@@ -809,12 +809,12 @@ XLogCompressBackupBlock(char *page, uint16 hole_offset, uint16 hole_length,
        int32           len;
        int32           extra_bytes = 0;
        char       *source;
-       char            tmp[BLCKSZ];
+       PGAlignedBlock tmp;
 
        if (hole_length != 0)
        {
                /* must skip the hole */
-               source = tmp;
+               source = tmp.data;
                memcpy(source, page, hole_offset);
                memcpy(source + hole_offset,
                           page + (hole_offset + hole_length),
@@ -917,7 +917,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
        if (lsn <= RedoRecPtr)
        {
                int                     flags;
-               char            copied_buffer[BLCKSZ];
+               PGAlignedBlock copied_buffer;
                char       *origdata = (char *) BufferGetBlock(buffer);
                RelFileNode rnode;
                ForkNumber      forkno;
@@ -935,11 +935,11 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
                        uint16          lower = ((PageHeader) page)->pd_lower;
                        uint16          upper = ((PageHeader) page)->pd_upper;
 
-                       memcpy(copied_buffer, origdata, lower);
-                       memcpy(copied_buffer + upper, origdata + upper, BLCKSZ - upper);
+                       memcpy(copied_buffer.data, origdata, lower);
+                       memcpy(copied_buffer.data + upper, origdata + upper, BLCKSZ - upper);
                }
                else
-                       memcpy(copied_buffer, origdata, BLCKSZ);
+                       memcpy(copied_buffer.data, origdata, BLCKSZ);
 
                XLogBeginInsert();
 
@@ -948,7 +948,7 @@ XLogSaveBufferForHint(Buffer buffer, bool buffer_std)
                        flags |= REGBUF_STANDARD;
 
                BufferGetTag(buffer, &rnode, &forkno, &blkno);
-               XLogRegisterBlock(0, &rnode, forkno, blkno, copied_buffer, flags);
+               XLogRegisterBlock(0, &rnode, forkno, blkno, copied_buffer.data, flags);
 
                recptr = XLogInsert(RM_XLOG_ID, XLOG_FPI_FOR_HINT);
        }
index 4c633c6c49886d9be07ea942028412fc84eb493b..0768ca782263599f4d6dde6fb835d3922f0b6053 100644 (file)
@@ -1412,7 +1412,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
 {
        DecodedBkpBlock *bkpb;
        char       *ptr;
-       char            tmp[BLCKSZ];
+       PGAlignedBlock tmp;
 
        if (!record->blocks[block_id].in_use)
                return false;
@@ -1425,7 +1425,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
        if (bkpb->bimg_info & BKPIMAGE_IS_COMPRESSED)
        {
                /* If a backup block image is compressed, decompress it */
-               if (pglz_decompress(ptr, bkpb->bimg_len, tmp,
+               if (pglz_decompress(ptr, bkpb->bimg_len, tmp.data,
                                                        BLCKSZ - bkpb->hole_length) < 0)
                {
                        report_invalid_record(record, "invalid compressed image at %X/%X, block %d",
@@ -1434,7 +1434,7 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
                                                                  block_id);
                        return false;
                }
-               ptr = tmp;
+               ptr = tmp.data;
        }
 
        /* generate page, taking into account hole if necessary */
index 48743dbfa8b305394413cce85421f9fe81e502ec..f46af41b5625b3f2e535f0ba9f2d6fa329aeb844 100644 (file)
@@ -7220,7 +7220,8 @@ ATAddCheckConstraint(List **wqueue, AlteredTableInfo *tab, Relation rel,
                                                                                recursing | is_readd,   /* allow_merge */
                                                                                !recursing, /* is_local */
                                                                                is_readd,       /* is_internal */
-                                                                               NULL);          /* queryString not available here */
+                                                                               NULL);  /* queryString not available
+                                                                                                * here */
 
        /* we don't expect more than one constraint here */
        Assert(list_length(newcons) <= 1);
@@ -11276,21 +11277,14 @@ static void
 copy_relation_data(SMgrRelation src, SMgrRelation dst,
                                   ForkNumber forkNum, char relpersistence)
 {
-       char       *buf;
+       PGAlignedBlock buf;
        Page            page;
        bool            use_wal;
        bool            copying_initfork;
        BlockNumber nblocks;
        BlockNumber blkno;
 
-       /*
-        * palloc the buffer so that it's MAXALIGN'd.  If it were just a local
-        * char[] array, the compiler might align it on any byte boundary, which
-        * can seriously hurt transfer speed to and from the kernel; not to
-        * mention possibly making log_newpage's accesses to the page header fail.
-        */
-       buf = (char *) palloc(BLCKSZ);
-       page = (Page) buf;
+       page = (Page) buf.data;
 
        /*
         * The init fork for an unlogged relation in many respects has to be
@@ -11314,7 +11308,7 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
                /* If we got a cancel signal during the copy of the data, quit */
                CHECK_FOR_INTERRUPTS();
 
-               smgrread(src, forkNum, blkno, buf);
+               smgrread(src, forkNum, blkno, buf.data);
 
                if (!PageIsVerified(page, blkno))
                        ereport(ERROR,
@@ -11340,11 +11334,9 @@ copy_relation_data(SMgrRelation src, SMgrRelation dst,
                 * rel, because there's no need for smgr to schedule an fsync for this
                 * write; we'll do it ourselves below.
                 */
-               smgrextend(dst, forkNum, blkno, buf, true);
+               smgrextend(dst, forkNum, blkno, buf.data, true);
        }
 
-       pfree(buf);
-
        /*
         * If the rel is WAL-logged, must fsync before commit.  We use heap_sync
         * to ensure that the toast table gets fsync'd too.  (For a temp or
index 3e51cf3d3122acdb5992a12db2e5fc01e1e0637f..370429d746cb334fca1fd512a954f8c75d6337c1 100644 (file)
@@ -499,11 +499,11 @@ SendTimeLineHistory(TimeLineHistoryCmd *cmd)
        bytesleft = histfilelen;
        while (bytesleft > 0)
        {
-               char            rbuf[BLCKSZ];
+               PGAlignedBlock rbuf;
                int                     nread;
 
                pgstat_report_wait_start(WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ);
-               nread = read(fd, rbuf, sizeof(rbuf));
+               nread = read(fd, rbuf.data, sizeof(rbuf));
                pgstat_report_wait_end();
                if (nread < 0)
                        ereport(ERROR,
@@ -516,7 +516,7 @@ SendTimeLineHistory(TimeLineHistoryCmd *cmd)
                                         errmsg("could not read file \"%s\": read %d of %zu",
                                                        path, nread, (Size) bytesleft)));
 
-               pq_sendbytes(&buf, rbuf, nread);
+               pq_sendbytes(&buf, rbuf.data, nread);
                bytesleft -= nread;
        }
        CloseTransientFile(fd);
index efbede76297269947ceeee3ac03b122d9e1e4641..e93813d97371f32aa0f6cbe839fdbd34b7dfacbc 100644 (file)
@@ -96,7 +96,7 @@ struct BufFile
        off_t           curOffset;              /* offset part of current pos */
        int                     pos;                    /* next read/write position in buffer */
        int                     nbytes;                 /* total # of valid bytes in buffer */
-       char            buffer[BLCKSZ];
+       PGAlignedBlock buffer;
 };
 
 static BufFile *makeBufFileCommon(int nfiles);
@@ -437,7 +437,7 @@ BufFileLoadBuffer(BufFile *file)
         * Read whatever we can get, up to a full bufferload.
         */
        file->nbytes = FileRead(thisfile,
-                                                       file->buffer,
+                                                       file->buffer.data,
                                                        sizeof(file->buffer),
                                                        WAIT_EVENT_BUFFILE_READ);
        if (file->nbytes < 0)
@@ -502,7 +502,7 @@ BufFileDumpBuffer(BufFile *file)
                        file->offsets[file->curFile] = file->curOffset;
                }
                bytestowrite = FileWrite(thisfile,
-                                                                file->buffer + wpos,
+                                                                file->buffer.data + wpos,
                                                                 bytestowrite,
                                                                 WAIT_EVENT_BUFFILE_WRITE);
                if (bytestowrite <= 0)
@@ -572,7 +572,7 @@ BufFileRead(BufFile *file, void *ptr, size_t size)
                        nthistime = size;
                Assert(nthistime > 0);
 
-               memcpy(ptr, file->buffer + file->pos, nthistime);
+               memcpy(ptr, file->buffer.data + file->pos, nthistime);
 
                file->pos += nthistime;
                ptr = (void *) ((char *) ptr + nthistime);
@@ -621,7 +621,7 @@ BufFileWrite(BufFile *file, void *ptr, size_t size)
                        nthistime = size;
                Assert(nthistime > 0);
 
-               memcpy(file->buffer + file->pos, ptr, nthistime);
+               memcpy(file->buffer.data + file->pos, ptr, nthistime);
 
                file->dirty = true;
                file->pos += nthistime;
index 8d0ee7fc937d51edfcfb9d68b6a566c863c1bee3..7c4ad1c44947351dab3c713e2f2cfbf062e518fc 100644 (file)
@@ -615,10 +615,9 @@ static void
 fsm_extend(Relation rel, BlockNumber fsm_nblocks)
 {
        BlockNumber fsm_nblocks_now;
-       Page            pg;
+       PGAlignedBlock pg;
 
-       pg = (Page) palloc(BLCKSZ);
-       PageInit(pg, BLCKSZ, 0);
+       PageInit((Page) pg.data, BLCKSZ, 0);
 
        /*
         * We use the relation extension lock to lock out other backends trying to
@@ -648,10 +647,10 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks)
 
        while (fsm_nblocks_now < fsm_nblocks)
        {
-               PageSetChecksumInplace(pg, fsm_nblocks_now);
+               PageSetChecksumInplace((Page) pg.data, fsm_nblocks_now);
 
                smgrextend(rel->rd_smgr, FSM_FORKNUM, fsm_nblocks_now,
-                                  (char *) pg, false);
+                                  pg.data, false);
                fsm_nblocks_now++;
        }
 
@@ -659,8 +658,6 @@ fsm_extend(Relation rel, BlockNumber fsm_nblocks)
        rel->rd_smgr->smgr_fsm_nblocks = fsm_nblocks_now;
 
        UnlockRelationForExtension(rel, ExclusiveLock);
-
-       pfree(pg);
 }
 
 /*
index 62d7797e1e5583abd603231ae8a17ea911df0970..50a150b7136b59030f4b879570f869e9b875bb0f 100644 (file)
@@ -240,11 +240,11 @@ ltsWriteBlock(LogicalTapeSet *lts, long blocknum, void *buffer)
         */
        while (blocknum > lts->nBlocksWritten)
        {
-               char            zerobuf[BLCKSZ];
+               PGAlignedBlock zerobuf;
 
-               MemSet(zerobuf, 0, sizeof(zerobuf));
+               MemSet(zerobuf.data, 0, sizeof(zerobuf));
 
-               ltsWriteBlock(lts, lts->nBlocksWritten, zerobuf);
+               ltsWriteBlock(lts, lts->nBlocksWritten, zerobuf.data);
        }
 
        /* Write the requested block */
index 42e3f3023a0f22378c4d6a082de9b6379176bf8b..68ef7fa6d770c1d596709f8df17cc6845f403d64 100644 (file)
@@ -116,18 +116,17 @@ dir_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
        /* Do pre-padding on non-compressed files */
        if (pad_to_size && dir_data->compression == 0)
        {
-               char       *zerobuf;
+               PGAlignedXLogBlock zerobuf;
                int                     bytes;
 
-               zerobuf = pg_malloc0(XLOG_BLCKSZ);
+               memset(zerobuf.data, 0, XLOG_BLCKSZ);
                for (bytes = 0; bytes < pad_to_size; bytes += XLOG_BLCKSZ)
                {
                        errno = 0;
-                       if (write(fd, zerobuf, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+                       if (write(fd, zerobuf.data, XLOG_BLCKSZ) != XLOG_BLCKSZ)
                        {
                                int                     save_errno = errno;
 
-                               pg_free(zerobuf);
                                close(fd);
 
                                /*
@@ -137,7 +136,6 @@ dir_open_for_write(const char *pathname, const char *temp_suffix, size_t pad_to_
                                return NULL;
                        }
                }
-               pg_free(zerobuf);
 
                if (lseek(fd, 0, SEEK_SET) != 0)
                {
@@ -513,24 +511,20 @@ tar_write(Walfile f, const void *buf, size_t count)
 static bool
 tar_write_padding_data(TarMethodFile *f, size_t bytes)
 {
-       char       *zerobuf = pg_malloc0(XLOG_BLCKSZ);
+       PGAlignedXLogBlock zerobuf;
        size_t          bytesleft = bytes;
 
+       memset(zerobuf.data, 0, XLOG_BLCKSZ);
        while (bytesleft)
        {
-               size_t          bytestowrite = bytesleft > XLOG_BLCKSZ ? XLOG_BLCKSZ : bytesleft;
-
-               ssize_t         r = tar_write(f, zerobuf, bytestowrite);
+               size_t          bytestowrite = Min(bytesleft, XLOG_BLCKSZ);
+               ssize_t         r = tar_write(f, zerobuf.data, bytestowrite);
 
                if (r < 0)
-               {
-                       pg_free(zerobuf);
                        return false;
-               }
                bytesleft -= r;
        }
 
-       pg_free(zerobuf);
        return true;
 }
 
index d63a3a27f60c4acb8144f7ae24fc091c4ee7f5a4..6fb403a5a8a04b35c7e415c464bfcc89267ad344 100644 (file)
@@ -1209,7 +1209,7 @@ KillExistingArchiveStatus(void)
 static void
 WriteEmptyXLOG(void)
 {
-       char       *buffer;
+       PGAlignedXLogBlock buffer;
        XLogPageHeader page;
        XLogLongPageHeader longpage;
        XLogRecord *record;
@@ -1219,12 +1219,10 @@ WriteEmptyXLOG(void)
        int                     nbytes;
        char       *recptr;
 
-       /* Use malloc() to ensure buffer is MAXALIGNED */
-       buffer = (char *) pg_malloc(XLOG_BLCKSZ);
-       page = (XLogPageHeader) buffer;
-       memset(buffer, 0, XLOG_BLCKSZ);
+       memset(buffer.data, 0, XLOG_BLCKSZ);
 
        /* Set up the XLOG page header */
+       page = (XLogPageHeader) buffer.data;
        page->xlp_magic = XLOG_PAGE_MAGIC;
        page->xlp_info = XLP_LONG_HEADER;
        page->xlp_tli = ControlFile.checkPointCopy.ThisTimeLineID;
@@ -1271,7 +1269,7 @@ WriteEmptyXLOG(void)
        }
 
        errno = 0;
-       if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+       if (write(fd, buffer.data, XLOG_BLCKSZ) != XLOG_BLCKSZ)
        {
                /* if write didn't set errno, assume problem is no disk space */
                if (errno == 0)
@@ -1282,11 +1280,11 @@ WriteEmptyXLOG(void)
        }
 
        /* Fill the rest of the file with zeroes */
-       memset(buffer, 0, XLOG_BLCKSZ);
+       memset(buffer.data, 0, XLOG_BLCKSZ);
        for (nbytes = XLOG_BLCKSZ; nbytes < WalSegSz; nbytes += XLOG_BLCKSZ)
        {
                errno = 0;
-               if (write(fd, buffer, XLOG_BLCKSZ) != XLOG_BLCKSZ)
+               if (write(fd, buffer.data, XLOG_BLCKSZ) != XLOG_BLCKSZ)
                {
                        if (errno == 0)
                                errno = ENOSPC;
index 160a91284771bdfbb34ccf0445acd28372568158..36d48899f743d32ee40a4036d435856573f87086 100644 (file)
@@ -156,7 +156,7 @@ recurse_dir(const char *datadir, const char *parentpath,
 static void
 rewind_copy_file_range(const char *path, off_t begin, off_t end, bool trunc)
 {
-       char            buf[BLCKSZ];
+       PGAlignedBlock buf;
        char            srcpath[MAXPGPATH];
        int                     srcfd;
 
@@ -182,7 +182,7 @@ rewind_copy_file_range(const char *path, off_t begin, off_t end, bool trunc)
                else
                        len = end - begin;
 
-               readlen = read(srcfd, buf, len);
+               readlen = read(srcfd, buf.data, len);
 
                if (readlen < 0)
                        pg_fatal("could not read file \"%s\": %s\n",
@@ -190,7 +190,7 @@ rewind_copy_file_range(const char *path, off_t begin, off_t end, bool trunc)
                else if (readlen == 0)
                        pg_fatal("unexpected EOF while reading file \"%s\"\n", srcpath);
 
-               write_target_range(buf, begin, readlen);
+               write_target_range(buf.data, begin, readlen);
                begin += readlen;
        }
 
index f68211aa20ad876aa1c584e0990084db6e9c49ba..c27cc93dc2e8a66bf00d8908c0864e915838bc7f 100644 (file)
@@ -132,8 +132,8 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile,
 {
        int                     src_fd;
        int                     dst_fd;
-       char       *buffer;
-       char       *new_vmbuf;
+       PGAlignedBlock buffer;
+       PGAlignedBlock new_vmbuf;
        ssize_t         totalBytesRead = 0;
        ssize_t         src_filesize;
        int                     rewriteVmBytesPerPage;
@@ -159,13 +159,6 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile,
        /* Save old file size */
        src_filesize = statbuf.st_size;
 
-       /*
-        * Malloc the work buffers, rather than making them local arrays, to
-        * ensure adequate alignment.
-        */
-       buffer = (char *) pg_malloc(BLCKSZ);
-       new_vmbuf = (char *) pg_malloc(BLCKSZ);
-
        /*
         * Turn each visibility map page into 2 pages one by one. Each new page
         * has the same page header as the old one.  If the last section of the
@@ -181,7 +174,7 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile,
                PageHeaderData pageheader;
                bool            old_lastblk;
 
-               if ((bytesRead = read(src_fd, buffer, BLCKSZ)) != BLCKSZ)
+               if ((bytesRead = read(src_fd, buffer.data, BLCKSZ)) != BLCKSZ)
                {
                        if (bytesRead < 0)
                                pg_fatal("error while copying relation \"%s.%s\": could not read file \"%s\": %s\n",
@@ -195,7 +188,7 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile,
                old_lastblk = (totalBytesRead == src_filesize);
 
                /* Save the page header data */
-               memcpy(&pageheader, buffer, SizeOfPageHeaderData);
+               memcpy(&pageheader, buffer.data, SizeOfPageHeaderData);
 
                /*
                 * These old_* variables point to old visibility map page. old_cur
@@ -203,8 +196,8 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile,
                 * old block.  old_break is the end+1 position on the old page for the
                 * data that will be transferred to the current new page.
                 */
-               old_cur = buffer + SizeOfPageHeaderData;
-               old_blkend = buffer + bytesRead;
+               old_cur = buffer.data + SizeOfPageHeaderData;
+               old_blkend = buffer.data + bytesRead;
                old_break = old_cur + rewriteVmBytesPerPage;
 
                while (old_break <= old_blkend)
@@ -214,12 +207,12 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile,
                        bool            old_lastpart;
 
                        /* First, copy old page header to new page */
-                       memcpy(new_vmbuf, &pageheader, SizeOfPageHeaderData);
+                       memcpy(new_vmbuf.data, &pageheader, SizeOfPageHeaderData);
 
                        /* Rewriting the last part of the last old page? */
                        old_lastpart = old_lastblk && (old_break == old_blkend);
 
-                       new_cur = new_vmbuf + SizeOfPageHeaderData;
+                       new_cur = new_vmbuf.data + SizeOfPageHeaderData;
 
                        /* Process old page bytes one by one, and turn it into new page. */
                        while (old_cur < old_break)
@@ -253,11 +246,11 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile,
 
                        /* Set new checksum for visibility map page, if enabled */
                        if (new_cluster.controldata.data_checksum_version != 0)
-                               ((PageHeader) new_vmbuf)->pd_checksum =
-                                       pg_checksum_page(new_vmbuf, new_blkno);
+                               ((PageHeader) new_vmbuf.data)->pd_checksum =
+                                       pg_checksum_page(new_vmbuf.data, new_blkno);
 
                        errno = 0;
-                       if (write(dst_fd, new_vmbuf, BLCKSZ) != BLCKSZ)
+                       if (write(dst_fd, new_vmbuf.data, BLCKSZ) != BLCKSZ)
                        {
                                /* if write didn't set errno, assume problem is no disk space */
                                if (errno == 0)
@@ -273,8 +266,6 @@ rewriteVisibilityMap(const char *fromfile, const char *tofile,
        }
 
        /* Clean up */
-       pg_free(buffer);
-       pg_free(new_vmbuf);
        close(dst_fd);
        close(src_fd);
 }
index bf7feedf3462240e6d0f2c761e0a5c8d9319b62c..d46bac2cd6593138e414160f3f1f7b110cc8e095 100644 (file)
@@ -75,8 +75,8 @@ skipfile(const char *fn)
 static void
 scan_file(const char *fn, BlockNumber segmentno)
 {
-       char            buf[BLCKSZ];
-       PageHeader      header = (PageHeader) buf;
+       PGAlignedBlock buf;
+       PageHeader      header = (PageHeader) buf.data;
        int                     f;
        BlockNumber blockno;
 
@@ -93,7 +93,7 @@ scan_file(const char *fn, BlockNumber segmentno)
        for (blockno = 0;; blockno++)
        {
                uint16          csum;
-               int                     r = read(f, buf, BLCKSZ);
+               int                     r = read(f, buf.data, BLCKSZ);
 
                if (r == 0)
                        break;
@@ -109,7 +109,7 @@ scan_file(const char *fn, BlockNumber segmentno)
                if (PageIsNew(header))
                        continue;
 
-               csum = pg_checksum_page(buf, blockno + segmentno * RELSEG_SIZE);
+               csum = pg_checksum_page(buf.data, blockno + segmentno * RELSEG_SIZE);
                if (csum != header->pd_checksum)
                {
                        if (ControlFile->data_checksum_version == PG_DATA_CHECKSUM_VERSION)
index 2a557b37e5de3a5ab5b4eaadc7e9fc259e76fe2c..ad28333a1e917be9d6d39ee21579bf935cea49c0 100644 (file)
@@ -209,13 +209,13 @@ search_directory(const char *directory, const char *fname)
        /* set WalSegSz if file is successfully opened */
        if (fd >= 0)
        {
-               char            buf[XLOG_BLCKSZ];
+               PGAlignedXLogBlock buf;
                int                     r;
 
-               r = read(fd, buf, XLOG_BLCKSZ);
+               r = read(fd, buf.data, XLOG_BLCKSZ);
                if (r == XLOG_BLCKSZ)
                {
-                       XLogLongPageHeader longhdr = (XLogLongPageHeader) buf;
+                       XLogLongPageHeader longhdr = (XLogLongPageHeader) buf.data;
 
                        WalSegSz = longhdr->xlp_seg_size;
 
index 1e50103095b0555fcdb485b5acb062e96d900828..901d7911980f4567d86c19bd4b4c48117998f86b 100644 (file)
@@ -989,6 +989,32 @@ extern void ExceptionalCondition(const char *conditionName,
  * ----------------------------------------------------------------
  */
 
+/*
+ * Use this, not "char buf[BLCKSZ]", to declare a field or local variable
+ * holding a page buffer, if that page might be accessed as a page and not
+ * just a string of bytes.  Otherwise the variable might be under-aligned,
+ * causing problems on alignment-picky hardware.  (In some places, we use
+ * this to declare buffers even though we only pass them to read() and
+ * write(), because copying to/from aligned buffers is usually faster than
+ * using unaligned buffers.)  We include both "double" and "int64" in the
+ * union to ensure that the compiler knows the value must be MAXALIGN'ed
+ * (cf. configure's computation of MAXIMUM_ALIGNOF).
+ */
+typedef union PGAlignedBlock
+{
+       char            data[BLCKSZ];
+       double          force_align_d;
+       int64           force_align_i64;
+} PGAlignedBlock;
+
+/* Same, but for an XLOG_BLCKSZ-sized buffer */
+typedef union PGAlignedXLogBlock
+{
+       char            data[XLOG_BLCKSZ];
+       double          force_align_d;
+       int64           force_align_i64;
+} PGAlignedXLogBlock;
+
 /* msb for char */
 #define HIGHBIT                                        (0x80)
 #define IS_HIGHBIT_SET(ch)             ((unsigned char)(ch) & HIGHBIT)