Improve bulk-insert performance by keeping the current target buffer pinned
authorTom Lane <tgl@sss.pgh.pa.us>
Thu, 6 Nov 2008 20:51:15 +0000 (20:51 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Thu, 6 Nov 2008 20:51:15 +0000 (20:51 +0000)
(but not locked, as that would risk deadlocks).  Also, make it work in a small
ring of buffers to avoid having bulk inserts trash the whole buffer arena.

Robert Haas, after an idea of Simon Riggs'.

12 files changed:
src/backend/access/heap/heapam.c
src/backend/access/heap/hio.c
src/backend/access/heap/rewriteheap.c
src/backend/access/heap/tuptoaster.c
src/backend/commands/copy.c
src/backend/executor/execMain.c
src/backend/storage/buffer/README
src/backend/storage/buffer/freelist.c
src/include/access/heapam.h
src/include/access/hio.h
src/include/access/tuptoaster.h
src/include/storage/bufmgr.h

index 49bca5b329946c1eebc10ed6ad6c2c9b77ac119f..7139b03471ea13d4bec72a91c9a1fa4e3cd7a139 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.268 2008/10/31 19:40:26 heikki Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.269 2008/11/06 20:51:14 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -1799,23 +1799,53 @@ UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
 }
 
 
+/*
+ * GetBulkInsertState - prepare status object for a bulk insert
+ */
+BulkInsertState
+GetBulkInsertState(void)
+{
+       BulkInsertState bistate;
+
+       bistate = (BulkInsertState) palloc(sizeof(BulkInsertStateData));
+       bistate->strategy = GetAccessStrategy(BAS_BULKWRITE);
+       bistate->current_buf = InvalidBuffer;
+       return bistate;
+}
+
+/*
+ * FreeBulkInsertState - clean up after finishing a bulk insert
+ */
+void
+FreeBulkInsertState(BulkInsertState bistate)
+{
+       if (bistate->current_buf != InvalidBuffer)
+               ReleaseBuffer(bistate->current_buf);            
+       FreeAccessStrategy(bistate->strategy);
+       pfree(bistate);
+}
+
+
 /*
  *     heap_insert             - insert tuple into a heap
  *
  * The new tuple is stamped with current transaction ID and the specified
  * command ID.
  *
- * If use_wal is false, the new tuple is not logged in WAL, even for a
- * non-temp relation.  Safe usage of this behavior requires that we arrange
- * that all new tuples go into new pages not containing any tuples from other
- * transactions, and that the relation gets fsync'd before commit.
- * (See also heap_sync() comments)
+ * If the HEAP_INSERT_SKIP_WAL option is specified, the new tuple is not
+ * logged in WAL, even for a non-temp relation.  Safe usage of this behavior
+ * requires that we arrange that all new tuples go into new pages not
+ * containing any tuples from other transactions, and that the relation gets
+ * fsync'd before commit.  (See also heap_sync() comments)
+ *
+ * The HEAP_INSERT_SKIP_FSM option is passed directly to
+ * RelationGetBufferForTuple, which see for more info.
  *
- * use_fsm is passed directly to RelationGetBufferForTuple, which see for
- * more info.
+ * Note that these options will be applied when inserting into the heap's
+ * TOAST table, too, if the tuple requires any out-of-line data.
  *
- * Note that use_wal and use_fsm will be applied when inserting into the
- * heap's TOAST table, too, if the tuple requires any out-of-line data.
+ * The BulkInsertState object (if any; bistate can be NULL for default
+ * behavior) is also just passed through to RelationGetBufferForTuple.
  *
  * The return value is the OID assigned to the tuple (either here or by the
  * caller), or InvalidOid if no OID.  The header fields of *tup are updated
@@ -1825,7 +1855,7 @@ UpdateXmaxHintBits(HeapTupleHeader tuple, Buffer buffer, TransactionId xid)
  */
 Oid
 heap_insert(Relation relation, HeapTuple tup, CommandId cid,
-                       bool use_wal, bool use_fsm)
+                       int options, BulkInsertState bistate)
 {
        TransactionId xid = GetCurrentTransactionId();
        HeapTuple       heaptup;
@@ -1877,14 +1907,13 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
                heaptup = tup;
        }
        else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
-               heaptup = toast_insert_or_update(relation, tup, NULL,
-                                                                                use_wal, use_fsm);
+               heaptup = toast_insert_or_update(relation, tup, NULL, options);
        else
                heaptup = tup;
 
        /* Find buffer to insert this tuple into */
        buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
-                                                                          InvalidBuffer, use_fsm);
+                                                                          InvalidBuffer, options, bistate);
 
        /* NO EREPORT(ERROR) from here till changes are logged */
        START_CRIT_SECTION();
@@ -1905,7 +1934,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
        MarkBufferDirty(buffer);
 
        /* XLOG stuff */
-       if (use_wal && !relation->rd_istemp)
+       if (!(options & HEAP_INSERT_SKIP_WAL) && !relation->rd_istemp)
        {
                xl_heap_insert xlrec;
                xl_heap_header xlhdr;
@@ -2000,7 +2029,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 Oid
 simple_heap_insert(Relation relation, HeapTuple tup)
 {
-       return heap_insert(relation, tup, GetCurrentCommandId(true), true, true);
+       return heap_insert(relation, tup, GetCurrentCommandId(true), 0, NULL);
 }
 
 /*
@@ -2595,8 +2624,7 @@ l2:
                if (need_toast)
                {
                        /* Note we always use WAL and FSM during updates */
-                       heaptup = toast_insert_or_update(relation, newtup, &oldtup,
-                                                                                        true, true);
+                       heaptup = toast_insert_or_update(relation, newtup, &oldtup, 0);
                        newtupsize = MAXALIGN(heaptup->t_len);
                }
                else
@@ -2623,7 +2651,7 @@ l2:
                {
                        /* Assume there's no chance to put heaptup on same page. */
                        newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
-                                                                                          buffer, true);
+                                                                                          buffer, 0, NULL);
                }
                else
                {
@@ -2640,7 +2668,7 @@ l2:
                                 */
                                LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
                                newbuf = RelationGetBufferForTuple(relation, heaptup->t_len,
-                                                                                                  buffer, true);
+                                                                                                  buffer, 0, NULL);
                        }
                        else
                        {
index 3723977fe09c05097165a16fdf4eab259018bac7..5cfd150b8ef931b954156512c25a4f288a2b640d 100644 (file)
@@ -8,13 +8,14 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.73 2008/09/30 10:52:10 heikki Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.74 2008/11/06 20:51:14 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 
 #include "postgres.h"
 
+#include "access/heapam.h"
 #include "access/hio.h"
 #include "storage/bufmgr.h"
 #include "storage/freespace.h"
@@ -56,6 +57,43 @@ RelationPutHeapTuple(Relation relation,
        ((HeapTupleHeader) item)->t_ctid = tuple->t_self;
 }
 
+/*
+ * Read in a buffer, using bulk-insert strategy if bistate isn't NULL.
+ */
+static Buffer
+ReadBufferBI(Relation relation, BlockNumber targetBlock,
+                        BulkInsertState bistate)
+{
+       Buffer buffer;
+
+       /* If not bulk-insert, exactly like ReadBuffer */
+       if (!bistate)
+               return ReadBuffer(relation, targetBlock);
+
+       /* If we have the desired block already pinned, re-pin and return it */
+       if (bistate->current_buf != InvalidBuffer)
+       {
+               if (BufferGetBlockNumber(bistate->current_buf) == targetBlock)
+               {
+                       IncrBufferRefCount(bistate->current_buf);
+                       return bistate->current_buf;
+               }
+               /* ... else drop the old buffer */
+               ReleaseBuffer(bistate->current_buf);
+               bistate->current_buf = InvalidBuffer;
+       }
+
+       /* Perform a read using the buffer strategy */
+       buffer = ReadBufferExtended(relation, MAIN_FORKNUM, targetBlock,
+                                                               RBM_NORMAL, bistate->strategy);
+
+       /* Save the selected block as target for future inserts */
+       IncrBufferRefCount(buffer);
+       bistate->current_buf = buffer;
+
+       return buffer;
+}
+
 /*
  * RelationGetBufferForTuple
  *
@@ -80,13 +118,13 @@ RelationPutHeapTuple(Relation relation,
  *     happen if space is freed in that page after heap_update finds there's not
  *     enough there).  In that case, the page will be pinned and locked only once.
  *
- *     If use_fsm is true (the normal case), we use FSM to help us find free
- *     space.  If use_fsm is false, we always append a new empty page to the
- *     end of the relation if the tuple won't fit on the current target page.
+ *     We normally use FSM to help us find free space.  However,
+ *     if HEAP_INSERT_SKIP_FSM is specified, we just append a new empty page to
+ *     the end of the relation if the tuple won't fit on the current target page.
  *     This can save some cycles when we know the relation is new and doesn't
  *     contain useful amounts of free space.
  *
- *     The use_fsm = false case is also useful for non-WAL-logged additions to a
+ *     HEAP_INSERT_SKIP_FSM is also useful for non-WAL-logged additions to a
  *     relation, if the caller holds exclusive lock and is careful to invalidate
  *     relation->rd_targblock before the first insertion --- that ensures that
  *     all insertions will occur into newly added pages and not be intermixed
@@ -94,6 +132,12 @@ RelationPutHeapTuple(Relation relation,
  *     any committed data of other transactions.  (See heap_insert's comments
  *     for additional constraints needed for safe usage of this behavior.)
  *
+ *     The caller can also provide a BulkInsertState object to optimize many
+ *     insertions into the same relation.  This keeps a pin on the current
+ *     insertion target page (to save pin/unpin cycles) and also passes a
+ *     BULKWRITE buffer selection strategy object to the buffer manager.
+ *     Passing NULL for bistate selects the default behavior.
+ *
  *     We always try to avoid filling existing pages further than the fillfactor.
  *     This is OK since this routine is not consulted when updating a tuple and
  *     keeping it on the same page, which is the scenario fillfactor is meant
@@ -104,8 +148,10 @@ RelationPutHeapTuple(Relation relation,
  */
 Buffer
 RelationGetBufferForTuple(Relation relation, Size len,
-                                                 Buffer otherBuffer, bool use_fsm)
+                                                 Buffer otherBuffer, int options,
+                                                 struct BulkInsertStateData *bistate)
 {
+       bool            use_fsm = !(options & HEAP_INSERT_SKIP_FSM);
        Buffer          buffer = InvalidBuffer;
        Page            page;
        Size            pageFreeSpace,
@@ -116,6 +162,9 @@ RelationGetBufferForTuple(Relation relation, Size len,
 
        len = MAXALIGN(len);            /* be conservative */
 
+       /* Bulk insert is not supported for updates, only inserts. */
+       Assert(otherBuffer == InvalidBuffer || !bistate);
+
        /*
         * If we're gonna fail for oversize tuple, do it right away
         */
@@ -137,25 +186,27 @@ RelationGetBufferForTuple(Relation relation, Size len,
 
        /*
         * We first try to put the tuple on the same page we last inserted a tuple
-        * on, as cached in the relcache entry.  If that doesn't work, we ask the
-        * shared Free Space Map to locate a suitable page.  Since the FSM's info
-        * might be out of date, we have to be prepared to loop around and retry
-        * multiple times.      (To insure this isn't an infinite loop, we must update
-        * the FSM with the correct amount of free space on each page that proves
-        * not to be suitable.)  If the FSM has no record of a page with enough
-        * free space, we give up and extend the relation.
+        * on, as cached in the BulkInsertState or relcache entry.  If that
+        * doesn't work, we ask the Free Space Map to locate a suitable page.
+        * Since the FSM's info might be out of date, we have to be prepared to
+        * loop around and retry multiple times. (To insure this isn't an infinite
+        * loop, we must update the FSM with the correct amount of free space on
+        * each page that proves not to be suitable.)  If the FSM has no record of
+        * a page with enough free space, we give up and extend the relation.
         *
         * When use_fsm is false, we either put the tuple onto the existing target
         * page or extend the relation.
         */
-       if (len + saveFreeSpace <= MaxHeapTupleSize)
-               targetBlock = relation->rd_targblock;
-       else
+       if (len + saveFreeSpace > MaxHeapTupleSize)
        {
-               /* can't fit, don't screw up FSM request tracking by trying */
+               /* can't fit, don't bother asking FSM */
                targetBlock = InvalidBlockNumber;
                use_fsm = false;
        }
+       else if (bistate && bistate->current_buf != InvalidBuffer)
+               targetBlock = BufferGetBlockNumber(bistate->current_buf);
+       else
+               targetBlock = relation->rd_targblock;
 
        if (targetBlock == InvalidBlockNumber && use_fsm)
        {
@@ -189,7 +240,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
                if (otherBuffer == InvalidBuffer)
                {
                        /* easy case */
-                       buffer = ReadBuffer(relation, targetBlock);
+                       buffer = ReadBufferBI(relation, targetBlock, bistate);
                        LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
                }
                else if (otherBlock == targetBlock)
@@ -274,7 +325,7 @@ RelationGetBufferForTuple(Relation relation, Size len,
         * it worth keeping an accurate file length in shared memory someplace,
         * rather than relying on the kernel to do it for us?
         */
-       buffer = ReadBuffer(relation, P_NEW);
+       buffer = ReadBufferBI(relation, P_NEW, bistate);
 
        /*
         * We can be certain that locking the otherBuffer first is OK, since it
index cd7302bd5d718b84ba7efa6e77bf9d9881747b10..18c7a72d64fcd5ed860d58c41d980278f080ff0c 100644 (file)
@@ -96,7 +96,7 @@
  * Portions Copyright (c) 1994-5, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.15 2008/08/11 11:05:10 heikki Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.16 2008/11/06 20:51:14 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -575,7 +575,9 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
        }
        else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD)
                heaptup = toast_insert_or_update(state->rs_new_rel, tup, NULL,
-                                                                                state->rs_use_wal, false);
+                                                                                HEAP_INSERT_SKIP_FSM |
+                                                                                (state->rs_use_wal ?
+                                                                            0 : HEAP_INSERT_SKIP_WAL));
        else
                heaptup = tup;
 
index e3014e288ab14a37fad7533579c3a67e0252f9b7..f8bb77bd0a9a6ab5bf8d1adc2e7507add28de58e 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.90 2008/11/02 01:45:27 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.91 2008/11/06 20:51:14 tgl Exp $
  *
  *
  * INTERFACE ROUTINES
@@ -74,8 +74,7 @@ do { \
 
 
 static void toast_delete_datum(Relation rel, Datum value);
-static Datum toast_save_datum(Relation rel, Datum value,
-                                bool use_wal, bool use_fsm);
+static Datum toast_save_datum(Relation rel, Datum value, int options);
 static struct varlena *toast_fetch_datum(struct varlena * attr);
 static struct varlena *toast_fetch_datum_slice(struct varlena * attr,
                                                int32 sliceoffset, int32 length);
@@ -400,7 +399,7 @@ toast_delete(Relation rel, HeapTuple oldtup)
  * Inputs:
  *     newtup: the candidate new tuple to be inserted
  *     oldtup: the old row version for UPDATE, or NULL for INSERT
- *     use_wal, use_fsm: flags to be passed to heap_insert() for toast rows
+ *     options: options to be passed to heap_insert() for toast rows
  * Result:
  *     either newtup if no toasting is needed, or a palloc'd modified tuple
  *     that is what should actually get stored
@@ -411,7 +410,7 @@ toast_delete(Relation rel, HeapTuple oldtup)
  */
 HeapTuple
 toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
-                                          bool use_wal, bool use_fsm)
+                                          int options)
 {
        HeapTuple       result_tuple;
        TupleDesc       tupleDesc;
@@ -677,8 +676,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
                {
                        old_value = toast_values[i];
                        toast_action[i] = 'p';
-                       toast_values[i] = toast_save_datum(rel, toast_values[i],
-                                                                                          use_wal, use_fsm);
+                       toast_values[i] = toast_save_datum(rel, toast_values[i], options);
                        if (toast_free[i])
                                pfree(DatumGetPointer(old_value));
                        toast_free[i] = true;
@@ -728,8 +726,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
                i = biggest_attno;
                old_value = toast_values[i];
                toast_action[i] = 'p';
-               toast_values[i] = toast_save_datum(rel, toast_values[i],
-                                                                                  use_wal, use_fsm);
+               toast_values[i] = toast_save_datum(rel, toast_values[i], options);
                if (toast_free[i])
                        pfree(DatumGetPointer(old_value));
                toast_free[i] = true;
@@ -838,8 +835,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
                i = biggest_attno;
                old_value = toast_values[i];
                toast_action[i] = 'p';
-               toast_values[i] = toast_save_datum(rel, toast_values[i],
-                                                                                  use_wal, use_fsm);
+               toast_values[i] = toast_save_datum(rel, toast_values[i], options);
                if (toast_free[i])
                        pfree(DatumGetPointer(old_value));
                toast_free[i] = true;
@@ -1120,8 +1116,7 @@ toast_compress_datum(Datum value)
  * ----------
  */
 static Datum
-toast_save_datum(Relation rel, Datum value,
-                                bool use_wal, bool use_fsm)
+toast_save_datum(Relation rel, Datum value, int options)
 {
        Relation        toastrel;
        Relation        toastidx;
@@ -1218,7 +1213,7 @@ toast_save_datum(Relation rel, Datum value,
                memcpy(VARDATA(&chunk_data), data_p, chunk_size);
                toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull);
 
-               heap_insert(toastrel, toasttup, mycid, use_wal, use_fsm);
+               heap_insert(toastrel, toasttup, mycid, options, NULL);
 
                /*
                 * Create the index entry.      We cheat a little here by not using
index 4d6fd988d77a90390ee47d5894d98d2576d43682..2c680008a7af9f4cb5b5d7f85d6085eea52afbe5 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.300 2008/11/02 01:45:27 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.301 2008/11/06 20:51:14 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1653,8 +1653,8 @@ CopyFrom(CopyState cstate)
        MemoryContext oldcontext = CurrentMemoryContext;
        ErrorContextCallback errcontext;
        CommandId       mycid = GetCurrentCommandId(true);
-       bool            use_wal = true; /* by default, use WAL logging */
-       bool            use_fsm = true; /* by default, use FSM for free space */
+       int                     hi_options = 0; /* start with default heap_insert options */
+       BulkInsertState bistate;
 
        Assert(cstate->rel);
 
@@ -1707,9 +1707,9 @@ CopyFrom(CopyState cstate)
        if (cstate->rel->rd_createSubid != InvalidSubTransactionId ||
                cstate->rel->rd_newRelfilenodeSubid != InvalidSubTransactionId)
        {
-               use_fsm = false;
+               hi_options |= HEAP_INSERT_SKIP_FSM;
                if (!XLogArchivingActive())
-                       use_wal = false;
+                       hi_options |= HEAP_INSERT_SKIP_WAL;
        }
 
        if (pipe)
@@ -1886,6 +1886,8 @@ CopyFrom(CopyState cstate)
        cstate->cur_attname = NULL;
        cstate->cur_attval = NULL;
 
+       bistate = GetBulkInsertState();
+
        /* Set up callback to identify error line number */
        errcontext.callback = copy_in_error_callback;
        errcontext.arg = (void *) cstate;
@@ -2108,7 +2110,7 @@ CopyFrom(CopyState cstate)
                                ExecConstraints(resultRelInfo, slot, estate);
 
                        /* OK, store the tuple and create index entries for it */
-                       heap_insert(cstate->rel, tuple, mycid, use_wal, use_fsm);
+                       heap_insert(cstate->rel, tuple, mycid, hi_options, bistate);
 
                        if (resultRelInfo->ri_NumIndices > 0)
                                ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false);
@@ -2128,6 +2130,8 @@ CopyFrom(CopyState cstate)
        /* Done, clean up */
        error_context_stack = errcontext.previous;
 
+       FreeBulkInsertState(bistate);
+
        MemoryContextSwitchTo(oldcontext);
 
        /* Execute AFTER STATEMENT insertion triggers */
@@ -2164,7 +2168,7 @@ CopyFrom(CopyState cstate)
         * If we skipped writing WAL, then we need to sync the heap (but not
         * indexes since those use WAL anyway)
         */
-       if (!use_wal)
+       if (hi_options & HEAP_INSERT_SKIP_WAL)
                heap_sync(cstate->rel);
 }
 
index 47840d42ebc5fa08c242124af62a82df6f027d3b..350381ad4b514053397dfe74c96fb303a8e995d2 100644 (file)
@@ -26,7 +26,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.314 2008/10/31 21:07:54 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.315 2008/11/06 20:51:14 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1623,8 +1623,7 @@ ExecInsert(TupleTableSlot *slot,
         * t_self field.
         */
        newId = heap_insert(resultRelationDesc, tuple,
-                                               estate->es_output_cid,
-                                               true, true);
+                                               estate->es_output_cid, 0, NULL);
 
        IncrAppended();
        (estate->es_processed)++;
@@ -2621,7 +2620,8 @@ typedef struct
        DestReceiver pub;                       /* publicly-known function pointers */
        EState     *estate;                     /* EState we are working with */
        Relation        rel;                    /* Relation to write to */
-       bool            use_wal;                /* do we need to WAL-log our writes? */
+       int                     hi_options;             /* heap_insert performance options */
+       BulkInsertState bistate;        /* bulk insert state */
 } DR_intorel;
 
 /*
@@ -2753,14 +2753,17 @@ OpenIntoRel(QueryDesc *queryDesc)
        myState = (DR_intorel *) queryDesc->dest;
        Assert(myState->pub.mydest == DestIntoRel);
        myState->estate = estate;
+       myState->rel = intoRelationDesc;
 
        /*
-        * We can skip WAL-logging the insertions, unless PITR is in use.
+        * We can skip WAL-logging the insertions, unless PITR is in use.  We
+        * can skip the FSM in any case.
         */
-       myState->use_wal = XLogArchivingActive();
-       myState->rel = intoRelationDesc;
+       myState->hi_options = HEAP_INSERT_SKIP_FSM |
+               (XLogArchivingActive() ? 0 : HEAP_INSERT_SKIP_WAL);
+       myState->bistate = GetBulkInsertState();
 
-       /* use_wal off requires rd_targblock be initially invalid */
+       /* Not using WAL requires rd_targblock be initially invalid */
        Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber);
 }
 
@@ -2775,8 +2778,10 @@ CloseIntoRel(QueryDesc *queryDesc)
        /* OpenIntoRel might never have gotten called */
        if (myState && myState->pub.mydest == DestIntoRel && myState->rel)
        {
+               FreeBulkInsertState(myState->bistate);
+
                /* If we skipped using WAL, must heap_sync before commit */
-               if (!myState->use_wal)
+               if (myState->hi_options & HEAP_INSERT_SKIP_WAL)
                        heap_sync(myState->rel);
 
                /* close rel, but keep lock until commit */
@@ -2834,8 +2839,8 @@ intorel_receive(TupleTableSlot *slot, DestReceiver *self)
        heap_insert(myState->rel,
                                tuple,
                                myState->estate->es_output_cid,
-                               myState->use_wal,
-                               false);                 /* never any point in using FSM */
+                               myState->hi_options,
+                               myState->bistate);
 
        /* We know this is a newly created relation, so there are no indexes */
 
index 057f817b7e41a0f8ba285c7bbbc63e5b5e010a0a..696e5e8c3052ce169d58c2a0c18a3eb4516c5a76 100644 (file)
@@ -1,4 +1,4 @@
-$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.14 2008/03/21 13:23:28 momjian Exp $
+$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.15 2008/11/06 20:51:14 tgl Exp $
 
 Notes About Shared Buffer Access Rules
 ======================================
@@ -235,6 +235,10 @@ buffers were sent to the freelist, which was effectively a buffer ring of 1
 buffer, resulting in excessive WAL flushing.  Allowing VACUUM to update
 256KB between WAL flushes should be more efficient.
 
+Bulk writes work similarly to VACUUM.  Currently this applies only to
+COPY IN and CREATE TABLE AS SELECT.  (Might it be interesting to make
+seqscan UPDATE and DELETE use the bulkwrite strategy?)
+
 
 Background Writer's Processing
 ------------------------------
index 4e55db9adc22f9a13ba50af8d2394b1d36b202b7..5f4c05cef6d412a5f621c38099ba1ec616f6fd51 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.64 2008/01/01 19:45:51 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.65 2008/11/06 20:51:15 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -384,6 +384,9 @@ GetAccessStrategy(BufferAccessStrategyType btype)
                case BAS_BULKREAD:
                        ring_size = 256 * 1024 / BLCKSZ;
                        break;
+               case BAS_BULKWRITE:
+                       ring_size = 256 * 1024 / BLCKSZ;
+                       break;
                case BAS_VACUUM:
                        ring_size = 256 * 1024 / BLCKSZ;
                        break;
index b24edea6e40f29a1babb08dfee6fdf5600ca6aa9..1b78b40cc594279ea8eafc16167a5e91dbff50a2 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.139 2008/10/08 01:14:44 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.140 2008/11/06 20:51:15 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "utils/snapshot.h"
 
 
+/* "options" flag bits for heap_insert */
+#define HEAP_INSERT_SKIP_WAL   0x0001
+#define HEAP_INSERT_SKIP_FSM   0x0002
+
+typedef struct BulkInsertStateData *BulkInsertState;
+
 typedef enum
 {
        LockTupleShared,
@@ -86,8 +92,11 @@ extern void heap_get_latest_tid(Relation relation, Snapshot snapshot,
                                        ItemPointer tid);
 extern void setLastTid(const ItemPointer tid);
 
+extern BulkInsertState GetBulkInsertState(void);
+extern void FreeBulkInsertState(BulkInsertState);
+
 extern Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid,
-                       bool use_wal, bool use_fsm);
+                       int options, BulkInsertState bistate);
 extern HTSU_Result heap_delete(Relation relation, ItemPointer tid,
                        ItemPointer ctid, TransactionId *update_xmax,
                        CommandId cid, Snapshot crosscheck, bool wait);
index a089bddbf3f4d8807f98bfc1f533a6c100038743..813347dccbee827291888cfc93d1f473936ce974 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/access/hio.h,v 1.36 2008/06/19 00:46:06 alvherre Exp $
+ * $PostgreSQL: pgsql/src/include/access/hio.h,v 1.37 2008/11/06 20:51:15 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "utils/relcache.h"
 #include "storage/buf.h"
 
+
+/*
+ * state for bulk inserts --- private to heapam.c and hio.c
+ *
+ * If current_buf isn't InvalidBuffer, then we are holding an extra pin
+ * on that buffer.
+ *
+ * "typedef struct BulkInsertStateData *BulkInsertState" is in heapam.h
+ */
+typedef struct BulkInsertStateData
+{
+       BufferAccessStrategy strategy;  /* our BULKWRITE strategy object */
+       Buffer current_buf;                             /* current insertion target page */
+} BulkInsertStateData;
+
+
 extern void RelationPutHeapTuple(Relation relation, Buffer buffer,
                                         HeapTuple tuple);
 extern Buffer RelationGetBufferForTuple(Relation relation, Size len,
-                                                 Buffer otherBuffer, bool use_fsm);
+                                                 Buffer otherBuffer, int options,
+                                                 struct BulkInsertStateData *bistate);
 
 #endif   /* HIO_H */
index a87aee62dfc8ee3870a40eca9697456b85674aca..3cf7aad2b8714ce252de7fb53111d95548d96149 100644 (file)
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 2000-2008, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/access/tuptoaster.h,v 1.41 2008/07/13 20:45:47 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/access/tuptoaster.h,v 1.42 2008/11/06 20:51:15 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -93,7 +93,7 @@
  */
 extern HeapTuple toast_insert_or_update(Relation rel,
                                           HeapTuple newtup, HeapTuple oldtup,
-                                          bool use_wal, bool use_fsm);
+                                          int options);
 
 /* ----------
  * toast_delete -
index 56f584a78ba8b4251ae93b8d8c02de9d3f967779..f2252c8f46040fd6ed72cd1829eed3919868a5ae 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.116 2008/10/31 15:05:00 heikki Exp $
+ * $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.117 2008/11/06 20:51:15 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -28,6 +28,7 @@ typedef enum BufferAccessStrategyType
        BAS_NORMAL,                                     /* Normal random access */
        BAS_BULKREAD,                           /* Large read-only scan (hint bit updates are
                                                                 * ok) */
+       BAS_BULKWRITE,                          /* Large multi-block write (e.g. COPY IN) */
        BAS_VACUUM                                      /* VACUUM */
 } BufferAccessStrategyType;