]> granicus.if.org Git - postgresql/commitdiff
Reduce the memory footprint of large pending-trigger-event lists, as per my
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 24 Oct 2008 23:42:35 +0000 (23:42 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 24 Oct 2008 23:42:35 +0000 (23:42 +0000)
recent proposal.  In typical cases, we now need 12 bytes per insert or delete
event and 16 bytes per update event; previously we needed 40 bytes per
event on 32-bit hardware and 80 bytes per event on 64-bit hardware.  Even
in the worst case usage pattern with a large number of distinct triggers being
fired in one query, usage is at most 32 bytes per event.  It seems to be a
bit faster than the old code as well, due to reduction of palloc overhead.

This commit doesn't address the TODO item of allowing the event list to spill
to disk; rather it's trying to stave off the need for that.  However, it
probably makes that task a bit easier by reducing the data structure's
dependency on pointers.  It would now be practical to dump an event list to
disk by "chunks" instead of individual events.

src/backend/commands/trigger.c
src/include/commands/trigger.h

index 3461056c576ff3b2fc3035c02a72ab6fb2e1e95b..ffdb168236f20f56747f4d411e7a76666ba019da 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.237 2008/09/01 20:42:44 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/trigger.c,v 1.238 2008/10/24 23:42:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -2234,12 +2234,14 @@ ltrmark:;
  * during the current transaction tree.  (BEFORE triggers are fired
  * immediately so we don't need any persistent state about them.)  The struct
  * and most of its subsidiary data are kept in TopTransactionContext; however
- * the individual event records are kept in separate contexts, to make them
- * easy to delete during subtransaction abort.
+ * the individual event records are kept in a separate sub-context.  This is
+ * done mainly so that it's easy to tell from a memory context dump how much
+ * space is being eaten by trigger events.
  *
- * Because the list of pending events can grow large, we go to some effort
- * to minimize memory consumption.     We do not use the generic List mechanism
- * but thread the events manually.
+ * Because the list of pending events can grow large, we go to some
+ * considerable effort to minimize per-event memory consumption.  The event
+ * records are grouped into chunks and common data for similar events in the
+ * same chunk is only stored once.
  *
  * XXX We need to be able to save the per-event data in a file if it grows too
  * large.
@@ -2280,30 +2282,101 @@ typedef SetConstraintStateData *SetConstraintState;
 /*
  * Per-trigger-event data
  *
- * Note: ate_firing_id is meaningful when either AFTER_TRIGGER_DONE
- * or AFTER_TRIGGER_IN_PROGRESS is set.  It indicates which trigger firing
- * cycle the trigger was or will be fired in.
+ * The actual per-event data, AfterTriggerEventData, includes DONE/IN_PROGRESS
+ * status bits and one or two tuple CTIDs.  Each event record also has an
+ * associated AfterTriggerSharedData that is shared across all instances
+ * of similar events within a "chunk".
+ *
+ * We arrange not to waste storage on ate_ctid2 for non-update events.
+ * We could go further and not store either ctid for statement-level triggers,
+ * but that seems unlikely to be worth the trouble.
+ *
+ * Note: ats_firing_id is initially zero and is set to something else when
+ * AFTER_TRIGGER_IN_PROGRESS is set.  It indicates which trigger firing
+ * cycle the trigger will be fired in (or was fired in, if DONE is set).
+ * Although this is mutable state, we can keep it in AfterTriggerSharedData
+ * because all instances of the same type of event in a given event list will
+ * be fired at the same time, if they were queued between the same firing
+ * cycles.  So we need only ensure that ats_firing_id is zero when attaching
+ * a new event to an existing AfterTriggerSharedData record.
  */
+typedef uint32 TriggerFlags;
+
+#define AFTER_TRIGGER_OFFSET                   0x0FFFFFFF /* must be low-order bits */
+#define AFTER_TRIGGER_2CTIDS                   0x10000000
+#define AFTER_TRIGGER_DONE                             0x20000000
+#define AFTER_TRIGGER_IN_PROGRESS              0x40000000
+
+typedef struct AfterTriggerSharedData *AfterTriggerShared;
+
+typedef struct AfterTriggerSharedData
+{
+       TriggerEvent ats_event;         /* event type indicator, see trigger.h */
+       Oid                     ats_tgoid;              /* the trigger's ID */
+       Oid                     ats_relid;              /* the relation it's on */
+       CommandId       ats_firing_id;  /* ID for firing cycle */
+} AfterTriggerSharedData;
+
 typedef struct AfterTriggerEventData *AfterTriggerEvent;
 
 typedef struct AfterTriggerEventData
 {
-       AfterTriggerEvent ate_next; /* list link */
-       TriggerEvent ate_event;         /* event type and status bits */
-       CommandId       ate_firing_id;  /* ID for firing cycle */
-       Oid                     ate_tgoid;              /* the trigger's ID */
-       Oid                     ate_relid;              /* the relation it's on */
-       ItemPointerData ate_oldctid;    /* specific tuple(s) involved */
-       ItemPointerData ate_newctid;
+       TriggerFlags ate_flags;         /* status bits and offset to shared data */
+       ItemPointerData ate_ctid1;      /* inserted, deleted, or old updated tuple */
+       ItemPointerData ate_ctid2;      /* new updated tuple */
 } AfterTriggerEventData;
 
+/* This struct must exactly match the one above except for not having ctid2 */
+typedef struct AfterTriggerEventDataOneCtid
+{
+       TriggerFlags ate_flags;         /* status bits and offset to shared data */
+       ItemPointerData ate_ctid1;      /* inserted, deleted, or old updated tuple */
+} AfterTriggerEventDataOneCtid;
+
+#define SizeofTriggerEvent(evt) \
+       (((evt)->ate_flags & AFTER_TRIGGER_2CTIDS) ? \
+        sizeof(AfterTriggerEventData) : sizeof(AfterTriggerEventDataOneCtid))
+
+#define GetTriggerSharedData(evt) \
+       ((AfterTriggerShared) ((char *) (evt) + ((evt)->ate_flags & AFTER_TRIGGER_OFFSET)))
+
+/*
+ * To avoid palloc overhead, we keep trigger events in arrays in successively-
+ * larger chunks (a slightly more sophisticated version of an expansible
+ * array).  The space between CHUNK_DATA_START and freeptr is occupied by
+ * AfterTriggerEventData records; the space between endfree and endptr is
+ * occupied by AfterTriggerSharedData records.
+ */
+typedef struct AfterTriggerEventChunk
+{
+       struct AfterTriggerEventChunk *next;    /* list link */
+       char       *freeptr;            /* start of free space in chunk */
+       char       *endfree;            /* end of free space in chunk */
+       char       *endptr;                     /* end of chunk */
+       /* event data follows here */
+} AfterTriggerEventChunk;
+
+#define CHUNK_DATA_START(cptr) ((char *) (cptr) + MAXALIGN(sizeof(AfterTriggerEventChunk)))
+
 /* A list of events */
 typedef struct AfterTriggerEventList
 {
-       AfterTriggerEvent head;
-       AfterTriggerEvent tail;
+       AfterTriggerEventChunk *head;
+       AfterTriggerEventChunk *tail;
+       char       *tailfree;           /* freeptr of tail chunk */
 } AfterTriggerEventList;
 
+/* Macros to help in iterating over a list of events */
+#define for_each_chunk(cptr, evtlist) \
+       for (cptr = (evtlist).head; cptr != NULL; cptr = cptr->next)
+#define for_each_event(eptr, cptr) \
+       for (eptr = (AfterTriggerEvent) CHUNK_DATA_START(cptr); \
+                (char *) eptr < (cptr)->freeptr; \
+                eptr = (AfterTriggerEvent) (((char *) eptr) + SizeofTriggerEvent(eptr)))
+/* Use this if no special per-chunk processing is needed */
+#define for_each_event_chunk(eptr, cptr, evtlist) \
+       for_each_chunk(cptr, evtlist) for_each_event(eptr, cptr)
+
 
 /*
  * All per-transaction data for the AFTER TRIGGERS module.
@@ -2323,9 +2396,7 @@ typedef struct AfterTriggerEventList
  * all subtransactions of the current transaction.     In a subtransaction
  * abort, we know that the events added by the subtransaction are at the
  * end of the list, so it is relatively easy to discard them.  The event
- * structs themselves are stored in event_cxt if generated by the top-level
- * transaction, else in per-subtransaction contexts identified by the
- * entries in cxt_stack.
+ * list chunks themselves are stored in event_cxt.
  *
  * query_depth is the current depth of nested AfterTriggerBeginQuery calls
  * (-1 when the stack is empty).
@@ -2367,8 +2438,7 @@ typedef struct AfterTriggersData
        int                     query_depth;    /* current query list index */
        AfterTriggerEventList *query_stack; /* events pending from each query */
        int                     maxquerydepth;  /* allocated len of above array */
-       MemoryContext event_cxt;        /* top transaction's event context, if any */
-       MemoryContext *cxt_stack;       /* per-subtransaction event contexts */
+       MemoryContext event_cxt;        /* memory context for events, if any */
 
        /* these fields are just for resetting at subtrans abort: */
 
@@ -2398,13 +2468,13 @@ static SetConstraintState SetConstraintStateAddItem(SetConstraintState state,
 /* ----------
  * afterTriggerCheckState()
  *
- *     Returns true if the trigger identified by tgoid is actually
- *     in state DEFERRED.
+ *     Returns true if the trigger event is actually in state DEFERRED.
  * ----------
  */
 static bool
-afterTriggerCheckState(Oid tgoid, TriggerEvent eventstate)
+afterTriggerCheckState(AfterTriggerShared evtshared)
 {
+       Oid                     tgoid = evtshared->ats_tgoid;
        SetConstraintState state = afterTriggers->state;
        int                     i;
 
@@ -2412,7 +2482,7 @@ afterTriggerCheckState(Oid tgoid, TriggerEvent eventstate)
         * For not-deferrable triggers (i.e. normal AFTER ROW triggers and
         * constraints declared NOT DEFERRABLE), the state is always false.
         */
-       if ((eventstate & AFTER_TRIGGER_DEFERRABLE) == 0)
+       if ((evtshared->ats_event & AFTER_TRIGGER_DEFERRABLE) == 0)
                return false;
 
        /*
@@ -2433,37 +2503,184 @@ afterTriggerCheckState(Oid tgoid, TriggerEvent eventstate)
        /*
         * Otherwise return the default state for the trigger.
         */
-       return ((eventstate & AFTER_TRIGGER_INITDEFERRED) != 0);
+       return ((evtshared->ats_event & AFTER_TRIGGER_INITDEFERRED) != 0);
 }
 
 
 /* ----------
  * afterTriggerAddEvent()
  *
- *     Add a new trigger event to the current query's queue.
+ *     Add a new trigger event to the specified queue.
+ *     The passed-in event data is copied.
  * ----------
  */
 static void
-afterTriggerAddEvent(AfterTriggerEvent event)
+afterTriggerAddEvent(AfterTriggerEventList *events,
+                                        AfterTriggerEvent event, AfterTriggerShared evtshared)
 {
-       AfterTriggerEventList *events;
+       Size            eventsize = SizeofTriggerEvent(event);
+       Size            needed = eventsize + sizeof(AfterTriggerSharedData);
+       AfterTriggerEventChunk *chunk;
+       AfterTriggerShared newshared;
+       AfterTriggerEvent newevent;
 
-       Assert(event->ate_next == NULL);
+       /*
+        * If empty list or not enough room in the tail chunk, make a new chunk.
+        * We assume here that a new shared record will always be needed.
+        */
+       chunk = events->tail;
+       if (chunk == NULL ||
+               chunk->endfree - chunk->freeptr < needed)
+       {
+               Size            chunksize;
 
-       /* Must be inside a query */
-       Assert(afterTriggers->query_depth >= 0);
+               /* Create event context if we didn't already */
+               if (afterTriggers->event_cxt == NULL)
+                       afterTriggers->event_cxt =
+                               AllocSetContextCreate(TopTransactionContext,
+                                                                         "AfterTriggerEvents",
+                                                                         ALLOCSET_DEFAULT_MINSIZE,
+                                                                         ALLOCSET_DEFAULT_INITSIZE,
+                                                                         ALLOCSET_DEFAULT_MAXSIZE);
 
-       events = &afterTriggers->query_stack[afterTriggers->query_depth];
-       if (events->tail == NULL)
+               /*
+                * Chunk size starts at 1KB and is allowed to increase up to 1MB.
+                * These numbers are fairly arbitrary, though there is a hard limit at
+                * AFTER_TRIGGER_OFFSET; else we couldn't link event records to their
+                * shared records using the available space in ate_flags.  Another
+                * constraint is that if the chunk size gets too huge, the search loop
+                * below would get slow given a (not too common) usage pattern with
+                * many distinct event types in a chunk.  Therefore, we double the
+                * preceding chunk size only if there weren't too many shared records
+                * in the preceding chunk; otherwise we halve it.  This gives us some
+                * ability to adapt to the actual usage pattern of the current query
+                * while still having large chunk sizes in typical usage.  All chunk
+                * sizes used should be MAXALIGN multiples, to ensure that the shared
+                * records will be aligned safely.
+                */
+#define MIN_CHUNK_SIZE 1024
+#define MAX_CHUNK_SIZE (1024*1024)
+
+#if MAX_CHUNK_SIZE > (AFTER_TRIGGER_OFFSET+1)
+#error MAX_CHUNK_SIZE must not exceed AFTER_TRIGGER_OFFSET
+#endif
+
+               if (chunk == NULL)
+                       chunksize = MIN_CHUNK_SIZE;
+               else
+               {
+                       /* preceding chunk size... */
+                       chunksize = chunk->endptr - (char *) chunk;
+                       /* check number of shared records in preceding chunk */
+                       if ((chunk->endptr - chunk->endfree) <=
+                               (100 * sizeof(AfterTriggerSharedData)))
+                               chunksize *= 2;                         /* okay, double it */
+                       else
+                               chunksize /= 2;                         /* too many shared records */
+                       chunksize = Min(chunksize, MAX_CHUNK_SIZE);
+               }
+               chunk = MemoryContextAlloc(afterTriggers->event_cxt, chunksize);
+               chunk->next = NULL;
+               chunk->freeptr = CHUNK_DATA_START(chunk);
+               chunk->endptr = chunk->endfree = (char *) chunk + chunksize;
+               Assert(chunk->endfree - chunk->freeptr >= needed);
+
+               if (events->head == NULL)
+                       events->head = chunk;
+               else
+                       events->tail->next = chunk;
+               events->tail = chunk;
+       }
+
+       /*
+        * Try to locate a matching shared-data record already in the chunk.
+        * If none, make a new one.
+        */
+       for (newshared = ((AfterTriggerShared) chunk->endptr) - 1;
+                (char *) newshared >= chunk->endfree;
+                newshared--)
+       {
+               if (newshared->ats_tgoid == evtshared->ats_tgoid &&
+                       newshared->ats_relid == evtshared->ats_relid &&
+                       newshared->ats_event == evtshared->ats_event &&
+                       newshared->ats_firing_id == 0)
+                       break;
+       }
+       if ((char *) newshared < chunk->endfree)
        {
-               /* first list entry */
-               events->head = event;
-               events->tail = event;
+               *newshared = *evtshared;
+               newshared->ats_firing_id = 0;                           /* just to be sure */
+               chunk->endfree = (char *) newshared;
+       }
+
+       /* Insert the data */
+       newevent = (AfterTriggerEvent) chunk->freeptr;
+       memcpy(newevent, event, eventsize);
+       /* ... and link the new event to its shared record */
+       newevent->ate_flags &= ~AFTER_TRIGGER_OFFSET;
+       newevent->ate_flags |= (char *) newshared - (char *) newevent;
+
+       chunk->freeptr += eventsize;
+       events->tailfree = chunk->freeptr;
+}
+
+/* ----------
+ * afterTriggerFreeEventList()
+ *
+ *     Free all the event storage in the given list.
+ * ----------
+ */
+static void
+afterTriggerFreeEventList(AfterTriggerEventList *events)
+{
+       AfterTriggerEventChunk *chunk;
+       AfterTriggerEventChunk *next_chunk;
+
+       for (chunk = events->head; chunk != NULL; chunk = next_chunk)
+       {
+               next_chunk = chunk->next;
+               pfree(chunk);
+       }
+       events->head = NULL;
+       events->tail = NULL;
+       events->tailfree = NULL;
+}
+
+/* ----------
+ * afterTriggerRestoreEventList()
+ *
+ *     Restore an event list to its prior length, removing all the events
+ *     added since it had the value old_events.
+ * ----------
+ */
+static void
+afterTriggerRestoreEventList(AfterTriggerEventList *events,
+                                                        const AfterTriggerEventList *old_events)
+{
+       AfterTriggerEventChunk *chunk;
+       AfterTriggerEventChunk *next_chunk;
+
+       if (old_events->tail == NULL)
+       {
+               /* restoring to a completely empty state, so free everything */
+               afterTriggerFreeEventList(events);
        }
        else
        {
-               events->tail->ate_next = event;
-               events->tail = event;
+               *events = *old_events;
+               /* free any chunks after the last one we want to keep */
+               for (chunk = events->tail->next; chunk != NULL; chunk = next_chunk)
+               {
+                       next_chunk = chunk->next;
+                       pfree(chunk);
+               }
+               /* and clean up the tail chunk to be the right length */
+               events->tail->next = NULL;
+               events->tail->freeptr = events->tailfree;
+               /*
+                * We don't make any effort to remove now-unused shared data records.
+                * They might still be useful, anyway.
+                */
        }
 }
 
@@ -2494,13 +2711,14 @@ AfterTriggerExecute(AfterTriggerEvent event,
                                        FmgrInfo *finfo, Instrumentation *instr,
                                        MemoryContext per_tuple_context)
 {
-       Oid                     tgoid = event->ate_tgoid;
+       AfterTriggerShared evtshared = GetTriggerSharedData(event);
+       Oid                     tgoid = evtshared->ats_tgoid;
        TriggerData LocTriggerData;
-       HeapTupleData oldtuple;
-       HeapTupleData newtuple;
+       HeapTupleData tuple1;
+       HeapTupleData tuple2;
        HeapTuple       rettuple;
-       Buffer          oldbuffer = InvalidBuffer;
-       Buffer          newbuffer = InvalidBuffer;
+       Buffer          buffer1 = InvalidBuffer;
+       Buffer          buffer2 = InvalidBuffer;
        int                     tgindx;
 
        /*
@@ -2526,37 +2744,36 @@ AfterTriggerExecute(AfterTriggerEvent event,
                InstrStartNode(instr + tgindx);
 
        /*
-        * Fetch the required OLD and NEW tuples.
+        * Fetch the required tuple(s).
         */
-       LocTriggerData.tg_trigtuple = NULL;
-       LocTriggerData.tg_newtuple = NULL;
-       LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
-       LocTriggerData.tg_newtuplebuf = InvalidBuffer;
-
-       if (ItemPointerIsValid(&(event->ate_oldctid)))
+       if (ItemPointerIsValid(&(event->ate_ctid1)))
        {
-               ItemPointerCopy(&(event->ate_oldctid), &(oldtuple.t_self));
-               if (!heap_fetch(rel, SnapshotAny, &oldtuple, &oldbuffer, false, NULL))
-                       elog(ERROR, "failed to fetch old tuple for AFTER trigger");
-               LocTriggerData.tg_trigtuple = &oldtuple;
-               LocTriggerData.tg_trigtuplebuf = oldbuffer;
+               ItemPointerCopy(&(event->ate_ctid1), &(tuple1.t_self));
+               if (!heap_fetch(rel, SnapshotAny, &tuple1, &buffer1, false, NULL))
+                       elog(ERROR, "failed to fetch tuple1 for AFTER trigger");
+               LocTriggerData.tg_trigtuple = &tuple1;
+               LocTriggerData.tg_trigtuplebuf = buffer1;
+       }
+       else
+       {
+               LocTriggerData.tg_trigtuple = NULL;
+               LocTriggerData.tg_trigtuplebuf = InvalidBuffer;
        }
 
-       if (ItemPointerIsValid(&(event->ate_newctid)))
+       /* don't touch ctid2 if not there */
+       if ((event->ate_flags & AFTER_TRIGGER_2CTIDS) &&
+               ItemPointerIsValid(&(event->ate_ctid2)))
        {
-               ItemPointerCopy(&(event->ate_newctid), &(newtuple.t_self));
-               if (!heap_fetch(rel, SnapshotAny, &newtuple, &newbuffer, false, NULL))
-                       elog(ERROR, "failed to fetch new tuple for AFTER trigger");
-               if (LocTriggerData.tg_trigtuple != NULL)
-               {
-                       LocTriggerData.tg_newtuple = &newtuple;
-                       LocTriggerData.tg_newtuplebuf = newbuffer;
-               }
-               else
-               {
-                       LocTriggerData.tg_trigtuple = &newtuple;
-                       LocTriggerData.tg_trigtuplebuf = newbuffer;
-               }
+               ItemPointerCopy(&(event->ate_ctid2), &(tuple2.t_self));
+               if (!heap_fetch(rel, SnapshotAny, &tuple2, &buffer2, false, NULL))
+                       elog(ERROR, "failed to fetch tuple2 for AFTER trigger");
+               LocTriggerData.tg_newtuple = &tuple2;
+               LocTriggerData.tg_newtuplebuf = buffer2;
+       }
+       else
+       {
+               LocTriggerData.tg_newtuple = NULL;
+               LocTriggerData.tg_newtuplebuf = InvalidBuffer;
        }
 
        /*
@@ -2564,7 +2781,7 @@ AfterTriggerExecute(AfterTriggerEvent event,
         */
        LocTriggerData.type = T_TriggerData;
        LocTriggerData.tg_event =
-               event->ate_event & (TRIGGER_EVENT_OPMASK | TRIGGER_EVENT_ROW);
+               evtshared->ats_event & (TRIGGER_EVENT_OPMASK | TRIGGER_EVENT_ROW);
        LocTriggerData.tg_relation = rel;
 
        MemoryContextReset(per_tuple_context);
@@ -2578,16 +2795,16 @@ AfterTriggerExecute(AfterTriggerEvent event,
                                                                   finfo,
                                                                   NULL,
                                                                   per_tuple_context);
-       if (rettuple != NULL && rettuple != &oldtuple && rettuple != &newtuple)
+       if (rettuple != NULL && rettuple != &tuple1 && rettuple != &tuple2)
                heap_freetuple(rettuple);
 
        /*
         * Release buffers
         */
-       if (oldbuffer != InvalidBuffer)
-               ReleaseBuffer(oldbuffer);
-       if (newbuffer != InvalidBuffer)
-               ReleaseBuffer(newbuffer);
+       if (buffer1 != InvalidBuffer)
+               ReleaseBuffer(buffer1);
+       if (buffer2 != InvalidBuffer)
+               ReleaseBuffer(buffer2);
 
        /*
         * If doing EXPLAIN ANALYZE, stop charging time to this trigger, and count
@@ -2605,7 +2822,7 @@ AfterTriggerExecute(AfterTriggerEvent event,
  *     that can be invoked now with the current firing ID.
  *
  *     If move_list isn't NULL, events that are not to be invoked now are
- *     removed from the given list and appended to move_list.
+ *     transferred to move_list.
  *
  *     When immediate_only is TRUE, do not invoke currently-deferred triggers.
  *     (This will be FALSE only at main transaction exit.)
@@ -2618,26 +2835,22 @@ afterTriggerMarkEvents(AfterTriggerEventList *events,
                                           bool immediate_only)
 {
        bool            found = false;
-       AfterTriggerEvent event,
-                               prev_event;
-
-       prev_event = NULL;
-       event = events->head;
+       AfterTriggerEvent event;
+       AfterTriggerEventChunk *chunk;
 
-       while (event != NULL)
+       for_each_event_chunk(event, chunk, *events)
        {
+               AfterTriggerShared evtshared = GetTriggerSharedData(event);
                bool            defer_it = false;
-               AfterTriggerEvent next_event;
 
-               if (!(event->ate_event &
+               if (!(event->ate_flags &
                          (AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS)))
                {
                        /*
                         * This trigger hasn't been called or scheduled yet. Check if we
                         * should call it now.
                         */
-                       if (immediate_only &&
-                               afterTriggerCheckState(event->ate_tgoid, event->ate_event))
+                       if (immediate_only && afterTriggerCheckState(evtshared))
                        {
                                defer_it = true;
                        }
@@ -2646,8 +2859,8 @@ afterTriggerMarkEvents(AfterTriggerEventList *events,
                                /*
                                 * Mark it as to be fired in this firing cycle.
                                 */
-                               event->ate_firing_id = afterTriggers->firing_counter;
-                               event->ate_event |= AFTER_TRIGGER_IN_PROGRESS;
+                               evtshared->ats_firing_id = afterTriggers->firing_counter;
+                               event->ate_flags |= AFTER_TRIGGER_IN_PROGRESS;
                                found = true;
                        }
                }
@@ -2655,45 +2868,19 @@ afterTriggerMarkEvents(AfterTriggerEventList *events,
                /*
                 * If it's deferred, move it to move_list, if requested.
                 */
-               next_event = event->ate_next;
-
                if (defer_it && move_list != NULL)
                {
-                       /* Delink it from input list */
-                       if (prev_event)
-                               prev_event->ate_next = next_event;
-                       else
-                               events->head = next_event;
-                       /* and add it to move_list */
-                       event->ate_next = NULL;
-                       if (move_list->tail == NULL)
-                       {
-                               /* first list entry */
-                               move_list->head = event;
-                               move_list->tail = event;
-                       }
-                       else
-                       {
-                               move_list->tail->ate_next = event;
-                               move_list->tail = event;
-                       }
-               }
-               else
-               {
-                       /* Keep it in input list */
-                       prev_event = event;
+                       /* add it to move_list */
+                       afterTriggerAddEvent(move_list, event, evtshared);
+                       /* mark original copy "done" so we don't do it again */
+                       event->ate_flags |= AFTER_TRIGGER_DONE;
                }
-
-               event = next_event;
        }
 
-       /* Update list tail pointer in case we moved tail event */
-       events->tail = prev_event;
-
        return found;
 }
 
-/* ----------
+/*
  * afterTriggerInvokeEvents()
  *
  *     Scan the given event list for events that are marked as to be fired
@@ -2704,18 +2891,24 @@ afterTriggerMarkEvents(AfterTriggerEventList *events,
  *     make one locally to cache the info in case there are multiple trigger
  *     events per rel.
  *
- *     When delete_ok is TRUE, it's okay to delete fully-processed events.
- *     The events list pointers are updated.
- * ----------
+ *     When delete_ok is TRUE, it's safe to delete fully-processed events.
+ *     (We are not very tense about that: we simply reset a chunk to be empty
+ *     if all its events got fired.  The objective here is just to avoid useless
+ *     rescanning of events when a trigger queues new events during transaction
+ *     end, so it's not necessary to worry much about the case where only
+ *     some events are fired.)
+ *
+ *     Returns TRUE if no unfired events remain in the list (this allows us
+ *     to avoid repeating afterTriggerMarkEvents).
  */
-static void
+static bool
 afterTriggerInvokeEvents(AfterTriggerEventList *events,
                                                 CommandId firing_id,
                                                 EState *estate,
                                                 bool delete_ok)
 {
-       AfterTriggerEvent event,
-                               prev_event;
+       bool            all_fired = true;
+       AfterTriggerEventChunk *chunk;
        MemoryContext per_tuple_context;
        bool            local_estate = false;
        Relation        rel = NULL;
@@ -2738,83 +2931,68 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events,
                                                          ALLOCSET_DEFAULT_INITSIZE,
                                                          ALLOCSET_DEFAULT_MAXSIZE);
 
-       prev_event = NULL;
-       event = events->head;
-
-       while (event != NULL)
+       for_each_chunk(chunk, *events)
        {
-               AfterTriggerEvent next_event;
+               AfterTriggerEvent event;
+               bool            all_fired_in_chunk = true;
 
-               /*
-                * Is it one for me to fire?
-                */
-               if ((event->ate_event & AFTER_TRIGGER_IN_PROGRESS) &&
-                       event->ate_firing_id == firing_id)
+               for_each_event(event, chunk)
                {
+                       AfterTriggerShared evtshared = GetTriggerSharedData(event);
+
                        /*
-                        * So let's fire it... but first, find the correct relation if
-                        * this is not the same relation as before.
+                        * Is it one for me to fire?
                         */
-                       if (rel == NULL || RelationGetRelid(rel) != event->ate_relid)
+                       if ((event->ate_flags & AFTER_TRIGGER_IN_PROGRESS) &&
+                               evtshared->ats_firing_id == firing_id)
                        {
-                               ResultRelInfo *rInfo;
-
-                               rInfo = ExecGetTriggerResultRel(estate, event->ate_relid);
-                               rel = rInfo->ri_RelationDesc;
-                               trigdesc = rInfo->ri_TrigDesc;
-                               finfo = rInfo->ri_TrigFunctions;
-                               instr = rInfo->ri_TrigInstrument;
-                               if (trigdesc == NULL)   /* should not happen */
-                                       elog(ERROR, "relation %u has no triggers",
-                                                event->ate_relid);
-                       }
+                               /*
+                                * So let's fire it... but first, find the correct relation if
+                                * this is not the same relation as before.
+                                */
+                               if (rel == NULL || RelationGetRelid(rel) != evtshared->ats_relid)
+                               {
+                                       ResultRelInfo *rInfo;
+
+                                       rInfo = ExecGetTriggerResultRel(estate, evtshared->ats_relid);
+                                       rel = rInfo->ri_RelationDesc;
+                                       trigdesc = rInfo->ri_TrigDesc;
+                                       finfo = rInfo->ri_TrigFunctions;
+                                       instr = rInfo->ri_TrigInstrument;
+                                       if (trigdesc == NULL)   /* should not happen */
+                                               elog(ERROR, "relation %u has no triggers",
+                                                        evtshared->ats_relid);
+                               }
 
-                       /*
-                        * Fire it.  Note that the AFTER_TRIGGER_IN_PROGRESS flag is still
-                        * set, so recursive examinations of the event list won't try to
-                        * re-fire it.
-                        */
-                       AfterTriggerExecute(event, rel, trigdesc, finfo, instr,
-                                                               per_tuple_context);
+                               /*
+                                * Fire it.  Note that the AFTER_TRIGGER_IN_PROGRESS flag is
+                                * still set, so recursive examinations of the event list
+                                * won't try to re-fire it.
+                                */
+                               AfterTriggerExecute(event, rel, trigdesc, finfo, instr,
+                                                                       per_tuple_context);
 
-                       /*
-                        * Mark the event as done.
-                        */
-                       event->ate_event &= ~AFTER_TRIGGER_IN_PROGRESS;
-                       event->ate_event |= AFTER_TRIGGER_DONE;
+                               /*
+                                * Mark the event as done.
+                                */
+                               event->ate_flags &= ~AFTER_TRIGGER_IN_PROGRESS;
+                               event->ate_flags |= AFTER_TRIGGER_DONE;
+                       }
+                       else if (!(event->ate_flags & AFTER_TRIGGER_DONE))
+                       {
+                               /* something remains to be done */
+                               all_fired = all_fired_in_chunk = false;
+                       }
                }
 
-               /*
-                * If it's now done, throw it away, if allowed.
-                *
-                * NB: it's possible the trigger call above added more events to the
-                * queue, or that calls we will do later will want to add more, so we
-                * have to be careful about maintaining list validity at all points
-                * here.
-                */
-               next_event = event->ate_next;
-
-               if ((event->ate_event & AFTER_TRIGGER_DONE) && delete_ok)
+               /* Clear the chunk if delete_ok and nothing left of interest */
+               if (delete_ok && all_fired_in_chunk)
                {
-                       /* Delink it from list and free it */
-                       if (prev_event)
-                               prev_event->ate_next = next_event;
-                       else
-                               events->head = next_event;
-                       pfree(event);
-               }
-               else
-               {
-                       /* Keep it in list */
-                       prev_event = event;
+                       chunk->freeptr = CHUNK_DATA_START(chunk);
+                       chunk->endfree = chunk->endptr;
                }
-
-               event = next_event;
        }
 
-       /* Update list tail pointer in case we just deleted tail event */
-       events->tail = prev_event;
-
        /* Release working resources */
        MemoryContextDelete(per_tuple_context);
 
@@ -2832,6 +3010,8 @@ afterTriggerInvokeEvents(AfterTriggerEventList *events,
                }
                FreeExecutorState(estate);
        }
+
+       return all_fired;
 }
 
 
@@ -2854,10 +3034,11 @@ AfterTriggerBeginXact(void)
                MemoryContextAlloc(TopTransactionContext,
                                                   sizeof(AfterTriggersData));
 
-       afterTriggers->firing_counter = FirstCommandId;
+       afterTriggers->firing_counter = (CommandId) 1; /* mustn't be 0 */
        afterTriggers->state = SetConstraintStateCreate(8);
        afterTriggers->events.head = NULL;
        afterTriggers->events.tail = NULL;
+       afterTriggers->events.tailfree = NULL;
        afterTriggers->query_depth = -1;
 
        /* We initialize the query stack to a reasonable size */
@@ -2870,7 +3051,6 @@ AfterTriggerBeginXact(void)
        afterTriggers->event_cxt = NULL;
 
        /* Subtransaction stack is empty until/unless needed */
-       afterTriggers->cxt_stack = NULL;
        afterTriggers->state_stack = NULL;
        afterTriggers->events_stack = NULL;
        afterTriggers->depth_stack = NULL;
@@ -2891,6 +3071,8 @@ AfterTriggerBeginXact(void)
 void
 AfterTriggerBeginQuery(void)
 {
+       AfterTriggerEventList *events;
+
        /* Must be inside a transaction */
        Assert(afterTriggers != NULL);
 
@@ -2912,8 +3094,10 @@ AfterTriggerBeginQuery(void)
        }
 
        /* Initialize this query's list to empty */
-       afterTriggers->query_stack[afterTriggers->query_depth].head = NULL;
-       afterTriggers->query_stack[afterTriggers->query_depth].tail = NULL;
+       events = &afterTriggers->query_stack[afterTriggers->query_depth];
+       events->head = NULL;
+       events->tail = NULL;
+       events->tailfree = NULL;
 }
 
 
@@ -2950,20 +3134,32 @@ AfterTriggerEndQuery(EState *estate)
         * IMMEDIATE: all events we have decided to defer will be available for it
         * to fire.
         *
-        * We loop in case a trigger queues more events.
+        * We loop in case a trigger queues more events at the same query level
+        * (is that even possible?).  Be careful here: firing a trigger could
+        * result in query_stack being repalloc'd, so we can't save its address
+        * across afterTriggerInvokeEvents calls.
         *
         * If we find no firable events, we don't have to increment
         * firing_counter.
         */
-       events = &afterTriggers->query_stack[afterTriggers->query_depth];
-       while (afterTriggerMarkEvents(events, &afterTriggers->events, true))
+       for (;;)
        {
-               CommandId       firing_id = afterTriggers->firing_counter++;
+               events = &afterTriggers->query_stack[afterTriggers->query_depth];
+               if (afterTriggerMarkEvents(events, &afterTriggers->events, true))
+               {
+                       CommandId       firing_id = afterTriggers->firing_counter++;
 
-               /* OK to delete the immediate events after processing them */
-               afterTriggerInvokeEvents(events, firing_id, estate, true);
+                       /* OK to delete the immediate events after processing them */
+                       if (afterTriggerInvokeEvents(events, firing_id, estate, true))
+                               break;                  /* all fired */
+               }
+               else
+                       break;
        }
 
+       /* Release query-local storage for events */
+       afterTriggerFreeEventList(&afterTriggers->query_stack[afterTriggers->query_depth]);
+
        afterTriggers->query_depth--;
 }
 
@@ -3011,13 +3207,17 @@ AfterTriggerFireDeferred(void)
        {
                CommandId       firing_id = afterTriggers->firing_counter++;
 
-               afterTriggerInvokeEvents(events, firing_id, NULL, true);
+               if (afterTriggerInvokeEvents(events, firing_id, NULL, true))
+                       break;                          /* all fired */
        }
 
+       /*
+        * We don't bother freeing the event list, since it will go away anyway
+        * (and more efficiently than via pfree) in AfterTriggerEndXact.
+        */
+
        if (snap_pushed)
                PopActiveSnapshot();
-
-       Assert(events->head == NULL);
 }
 
 
@@ -3045,10 +3245,6 @@ AfterTriggerEndXact(bool isCommit)
         * pending-events list could be large, and so it's useful to discard it as
         * soon as possible --- especially if we are aborting because we ran out
         * of memory for the list!
-        *
-        * (Note: any event_cxts of child subtransactions could also be deleted
-        * here, but we have no convenient way to find them, so we leave it to
-        * TopTransactionContext reset to clean them up.)
         */
        if (afterTriggers && afterTriggers->event_cxt)
                MemoryContextDelete(afterTriggers->event_cxt);
@@ -3087,8 +3283,6 @@ AfterTriggerBeginSubXact(void)
                        old_cxt = MemoryContextSwitchTo(TopTransactionContext);
 
 #define DEFTRIG_INITALLOC 8
-                       afterTriggers->cxt_stack = (MemoryContext *)
-                               palloc(DEFTRIG_INITALLOC * sizeof(MemoryContext));
                        afterTriggers->state_stack = (SetConstraintState *)
                                palloc(DEFTRIG_INITALLOC * sizeof(SetConstraintState));
                        afterTriggers->events_stack = (AfterTriggerEventList *)
@@ -3106,9 +3300,6 @@ AfterTriggerBeginSubXact(void)
                        /* repalloc will keep the stacks in the same context */
                        int                     new_alloc = afterTriggers->maxtransdepth * 2;
 
-                       afterTriggers->cxt_stack = (MemoryContext *)
-                               repalloc(afterTriggers->cxt_stack,
-                                                new_alloc * sizeof(MemoryContext));
                        afterTriggers->state_stack = (SetConstraintState *)
                                repalloc(afterTriggers->state_stack,
                                                 new_alloc * sizeof(SetConstraintState));
@@ -3130,7 +3321,6 @@ AfterTriggerBeginSubXact(void)
         * is not saved until/unless changed.  Likewise, we don't make a
         * per-subtransaction event context until needed.
         */
-       afterTriggers->cxt_stack[my_level] = NULL;
        afterTriggers->state_stack[my_level] = NULL;
        afterTriggers->events_stack[my_level] = afterTriggers->events;
        afterTriggers->depth_stack[my_level] = afterTriggers->query_depth;
@@ -3148,6 +3338,7 @@ AfterTriggerEndSubXact(bool isCommit)
        int                     my_level = GetCurrentTransactionNestLevel();
        SetConstraintState state;
        AfterTriggerEvent event;
+       AfterTriggerEventChunk *chunk;
        CommandId       subxact_firing_id;
 
        /*
@@ -3172,55 +3363,27 @@ AfterTriggerEndSubXact(bool isCommit)
                afterTriggers->state_stack[my_level] = NULL;
                Assert(afterTriggers->query_depth ==
                           afterTriggers->depth_stack[my_level]);
-
-               /*
-                * It's entirely possible that the subxact created an event_cxt but
-                * there is not anything left in it (because all the triggers were
-                * fired at end-of-statement).  If so, we should release the context
-                * to prevent memory leakage in a long sequence of subtransactions. We
-                * can detect whether there's anything of use in the context by seeing
-                * if anything was added to the global events list since subxact
-                * start.  (This test doesn't catch every case where the context is
-                * deletable; for instance maybe the only additions were from a
-                * sub-sub-xact.  But it handles the common case.)
-                */
-               if (afterTriggers->cxt_stack[my_level] &&
-                       afterTriggers->events.tail == afterTriggers->events_stack[my_level].tail)
-               {
-                       MemoryContextDelete(afterTriggers->cxt_stack[my_level]);
-                       /* avoid double delete if abort later */
-                       afterTriggers->cxt_stack[my_level] = NULL;
-               }
        }
        else
        {
                /*
-                * Aborting.  We don't really need to release the subxact's event_cxt,
-                * since it will go away anyway when CurTransactionContext gets reset,
-                * but doing so early in subxact abort helps free space we might need.
-                *
-                * (Note: any event_cxts of child subtransactions could also be
-                * deleted here, but we have no convenient way to find them, so we
-                * leave it to CurTransactionContext reset to clean them up.)
+                * Aborting.  Release any event lists from queries being aborted,
+                * and restore query_depth to its pre-subxact value.
                 */
-               if (afterTriggers->cxt_stack[my_level])
+               while (afterTriggers->query_depth > afterTriggers->depth_stack[my_level])
                {
-                       MemoryContextDelete(afterTriggers->cxt_stack[my_level]);
-                       /* avoid double delete if repeated aborts */
-                       afterTriggers->cxt_stack[my_level] = NULL;
+                       afterTriggerFreeEventList(&afterTriggers->query_stack[afterTriggers->query_depth]);
+                       afterTriggers->query_depth--;
                }
+               Assert(afterTriggers->query_depth ==
+                          afterTriggers->depth_stack[my_level]);
 
                /*
-                * Restore the pointers from the stacks.
-                */
-               afterTriggers->events = afterTriggers->events_stack[my_level];
-               afterTriggers->query_depth = afterTriggers->depth_stack[my_level];
-
-               /*
-                * Cleanup the tail of the list.
+                * Restore the global deferred-event list to its former length,
+                * discarding any events queued by the subxact.
                 */
-               if (afterTriggers->events.tail != NULL)
-                       afterTriggers->events.tail->ate_next = NULL;
+               afterTriggerRestoreEventList(&afterTriggers->events,
+                                                                        &afterTriggers->events_stack[my_level]);
 
                /*
                 * Restore the trigger state.  If the saved state is NULL, then this
@@ -3244,15 +3407,15 @@ AfterTriggerEndSubXact(bool isCommit)
                 * subxacts started after it.)
                 */
                subxact_firing_id = afterTriggers->firing_stack[my_level];
-               for (event = afterTriggers->events.head;
-                        event != NULL;
-                        event = event->ate_next)
+               for_each_event_chunk(event, chunk, afterTriggers->events)
                {
-                       if (event->ate_event &
+                       AfterTriggerShared evtshared = GetTriggerSharedData(event);
+
+                       if (event->ate_flags &
                                (AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS))
                        {
-                               if (event->ate_firing_id >= subxact_firing_id)
-                                       event->ate_event &=
+                               if (evtshared->ats_firing_id >= subxact_firing_id)
+                                       event->ate_flags &=
                                                ~(AFTER_TRIGGER_DONE | AFTER_TRIGGER_IN_PROGRESS);
                        }
                }
@@ -3579,8 +3742,9 @@ AfterTriggerSetState(ConstraintsSetStmt *stmt)
                         * but we'd better not if inside a subtransaction, since the
                         * subtransaction could later get rolled back.
                         */
-                       afterTriggerInvokeEvents(events, firing_id, NULL,
-                                                                        !IsSubTransaction());
+                       if (afterTriggerInvokeEvents(events, firing_id, NULL,
+                                                                                !IsSubTransaction()))
+                               break;                  /* all fired */
                }
        }
 }
@@ -3604,6 +3768,7 @@ bool
 AfterTriggerPendingOnRel(Oid relid)
 {
        AfterTriggerEvent event;
+       AfterTriggerEventChunk *chunk;
        int                     depth;
 
        /* No-op if we aren't in a transaction.  (Shouldn't happen?) */
@@ -3611,19 +3776,19 @@ AfterTriggerPendingOnRel(Oid relid)
                return false;
 
        /* Scan queued events */
-       for (event = afterTriggers->events.head;
-                event != NULL;
-                event = event->ate_next)
+       for_each_event_chunk(event, chunk, afterTriggers->events)
        {
+               AfterTriggerShared evtshared = GetTriggerSharedData(event);
+
                /*
                 * We can ignore completed events.      (Even if a DONE flag is rolled
                 * back by subxact abort, it's OK because the effects of the TRUNCATE
                 * or whatever must get rolled back too.)
                 */
-               if (event->ate_event & AFTER_TRIGGER_DONE)
+               if (event->ate_flags & AFTER_TRIGGER_DONE)
                        continue;
 
-               if (event->ate_relid == relid)
+               if (evtshared->ats_relid == relid)
                        return true;
        }
 
@@ -3634,14 +3799,14 @@ AfterTriggerPendingOnRel(Oid relid)
         */
        for (depth = 0; depth <= afterTriggers->query_depth; depth++)
        {
-               for (event = afterTriggers->query_stack[depth].head;
-                        event != NULL;
-                        event = event->ate_next)
+               for_each_event_chunk(event, chunk, afterTriggers->query_stack[depth])
                {
-                       if (event->ate_event & AFTER_TRIGGER_DONE)
+                       AfterTriggerShared evtshared = GetTriggerSharedData(event);
+
+                       if (event->ate_flags & AFTER_TRIGGER_DONE)
                                continue;
 
-                       if (event->ate_relid == relid)
+                       if (evtshared->ats_relid == relid)
                                return true;
                }
        }
@@ -3665,35 +3830,85 @@ AfterTriggerSaveEvent(ResultRelInfo *relinfo, int event, bool row_trigger,
 {
        Relation        rel = relinfo->ri_RelationDesc;
        TriggerDesc *trigdesc = relinfo->ri_TrigDesc;
-       int                     my_level = GetCurrentTransactionNestLevel();
-       MemoryContext *cxtptr;
-       AfterTriggerEvent new_event;
+       AfterTriggerEventData new_event;
+       AfterTriggerSharedData new_shared;
        int                     i;
        int                     ntriggers;
        int                *tgindx;
-       ItemPointerData oldctid;
-       ItemPointerData newctid;
 
        if (afterTriggers == NULL)
                elog(ERROR, "AfterTriggerSaveEvent() called outside of transaction");
+       Assert(afterTriggers->query_depth >= 0);
 
        /*
-        * event is used both as a bitmask and an array offset,
-        * so make sure we don't walk off the edge of our arrays
-        */
-       Assert(event >= 0 && event < TRIGGER_NUM_EVENT_CLASSES);
-
-       /*
-        * Get the CTID's of OLD and NEW
+        * Validate the event code and collect the associated tuple CTIDs.
+        *
+        * The event code will be used both as a bitmask and an array offset, so
+        * validation is important to make sure we don't walk off the edge of our
+        * arrays.
         */
-       if (oldtup != NULL)
-               ItemPointerCopy(&(oldtup->t_self), &(oldctid));
-       else
-               ItemPointerSetInvalid(&(oldctid));
-       if (newtup != NULL)
-               ItemPointerCopy(&(newtup->t_self), &(newctid));
-       else
-               ItemPointerSetInvalid(&(newctid));
+       new_event.ate_flags = 0;
+       switch (event)
+       {
+               case TRIGGER_EVENT_INSERT:
+                       if (row_trigger)
+                       {
+                               Assert(oldtup == NULL);
+                               Assert(newtup != NULL);
+                               ItemPointerCopy(&(newtup->t_self), &(new_event.ate_ctid1));
+                               ItemPointerSetInvalid(&(new_event.ate_ctid2));
+                       }
+                       else
+                       {
+                               Assert(oldtup == NULL);
+                               Assert(newtup == NULL);
+                               ItemPointerSetInvalid(&(new_event.ate_ctid1));
+                               ItemPointerSetInvalid(&(new_event.ate_ctid2));
+                       }
+                       break;
+               case TRIGGER_EVENT_DELETE:
+                       if (row_trigger)
+                       {
+                               Assert(oldtup != NULL);
+                               Assert(newtup == NULL);
+                               ItemPointerCopy(&(oldtup->t_self), &(new_event.ate_ctid1));
+                               ItemPointerSetInvalid(&(new_event.ate_ctid2));
+                       }
+                       else
+                       {
+                               Assert(oldtup == NULL);
+                               Assert(newtup == NULL);
+                               ItemPointerSetInvalid(&(new_event.ate_ctid1));
+                               ItemPointerSetInvalid(&(new_event.ate_ctid2));
+                       }
+                       break;
+               case TRIGGER_EVENT_UPDATE:
+                       if (row_trigger)
+                       {
+                               Assert(oldtup != NULL);
+                               Assert(newtup != NULL);
+                               ItemPointerCopy(&(oldtup->t_self), &(new_event.ate_ctid1));
+                               ItemPointerCopy(&(newtup->t_self), &(new_event.ate_ctid2));
+                               new_event.ate_flags |= AFTER_TRIGGER_2CTIDS;
+                       }
+                       else
+                       {
+                               Assert(oldtup == NULL);
+                               Assert(newtup == NULL);
+                               ItemPointerSetInvalid(&(new_event.ate_ctid1));
+                               ItemPointerSetInvalid(&(new_event.ate_ctid2));
+                       }
+                       break;
+               case TRIGGER_EVENT_TRUNCATE:
+                       Assert(oldtup == NULL);
+                       Assert(newtup == NULL);
+                       ItemPointerSetInvalid(&(new_event.ate_ctid1));
+                       ItemPointerSetInvalid(&(new_event.ate_ctid2));
+                       break;
+               default:
+                       elog(ERROR, "invalid after-trigger event code: %d", event);
+                       break;
+       }
 
        /*
         * Scan the appropriate set of triggers
@@ -3772,44 +3987,18 @@ AfterTriggerSaveEvent(ResultRelInfo *relinfo, int event, bool row_trigger,
                }
 
                /*
-                * If we don't yet have an event context for the current (sub)xact,
-                * create one.  Make it a child of CurTransactionContext to ensure it
-                * will go away if the subtransaction aborts.
-                */
-               if (my_level > 1)               /* subtransaction? */
-               {
-                       Assert(my_level < afterTriggers->maxtransdepth);
-                       cxtptr = &afterTriggers->cxt_stack[my_level];
-               }
-               else
-                       cxtptr = &afterTriggers->event_cxt;
-               if (*cxtptr == NULL)
-                       *cxtptr = AllocSetContextCreate(CurTransactionContext,
-                                                                                       "AfterTriggerEvents",
-                                                                                       ALLOCSET_DEFAULT_MINSIZE,
-                                                                                       ALLOCSET_DEFAULT_INITSIZE,
-                                                                                       ALLOCSET_DEFAULT_MAXSIZE);
-
-               /*
-                * Create a new event.
+                * Fill in event structure and add it to the current query's queue.
                 */
-               new_event = (AfterTriggerEvent)
-                       MemoryContextAlloc(*cxtptr, sizeof(AfterTriggerEventData));
-               new_event->ate_next = NULL;
-               new_event->ate_event =
+               new_shared.ats_event =
                        (event & TRIGGER_EVENT_OPMASK) |
                        (row_trigger ? TRIGGER_EVENT_ROW : 0) |
                        (trigger->tgdeferrable ? AFTER_TRIGGER_DEFERRABLE : 0) |
                        (trigger->tginitdeferred ? AFTER_TRIGGER_INITDEFERRED : 0);
-               new_event->ate_firing_id = 0;
-               new_event->ate_tgoid = trigger->tgoid;
-               new_event->ate_relid = rel->rd_id;
-               ItemPointerCopy(&oldctid, &(new_event->ate_oldctid));
-               ItemPointerCopy(&newctid, &(new_event->ate_newctid));
+               new_shared.ats_tgoid = trigger->tgoid;
+               new_shared.ats_relid = RelationGetRelid(rel);
+               new_shared.ats_firing_id = 0;
 
-               /*
-                * Add the new event to the queue.
-                */
-               afterTriggerAddEvent(new_event);
+               afterTriggerAddEvent(&afterTriggers->query_stack[afterTriggers->query_depth],
+                                                        &new_event, &new_shared);
        }
 }
index ae4f9997765f4d19e6025a944f152a03890a244c..bdc089f33afb6bf2e680d71fe661bb39e1bad4f7 100644 (file)
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/commands/trigger.h,v 1.68 2008/09/19 14:43:46 mha Exp $
+ * $PostgreSQL: pgsql/src/include/commands/trigger.h,v 1.69 2008/10/24 23:42:35 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -56,10 +56,8 @@ typedef struct TriggerData
 
 /* More TriggerEvent flags, used only within trigger.c */
 
-#define AFTER_TRIGGER_DONE                             0x00000010
-#define AFTER_TRIGGER_IN_PROGRESS              0x00000020
-#define AFTER_TRIGGER_DEFERRABLE               0x00000040
-#define AFTER_TRIGGER_INITDEFERRED             0x00000080
+#define AFTER_TRIGGER_DEFERRABLE               0x00000010
+#define AFTER_TRIGGER_INITDEFERRED             0x00000020
 
 #define TRIGGER_FIRED_BY_INSERT(event) \
                (((TriggerEvent) (event) & TRIGGER_EVENT_OPMASK) == \