]> granicus.if.org Git - postgresql/commitdiff
Install a search tree depth limit in GIN bulk-insert operations, to prevent
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 24 Mar 2009 22:06:03 +0000 (22:06 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 24 Mar 2009 22:06:03 +0000 (22:06 +0000)
them from degrading badly when the input is sorted or nearly so.  In this
scenario the tree is unbalanced to the point of becoming a mere linked list,
so insertions become O(N^2).  The easiest and most safely back-patchable
solution is to stop growing the tree sooner, ie limit the growth of N.  We
might later consider a rebalancing tree algorithm, but it's not clear that
the benefit would be worth the cost and complexity.  Per report from Sergey
Burladyan and an earlier complaint from Heikki.

Back-patch to 8.2; older versions didn't have GIN indexes.

src/backend/access/gin/ginfast.c
src/backend/access/gin/gininsert.c
src/include/access/gin.h

index d8624237ec17941d510445c7601f005518512738..f474ad6598e03f1b8c5f26233d3b97a1bbb60b2a 100644 (file)
@@ -11,7 +11,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/ginfast.c,v 1.1 2009/03/24 20:17:10 tgl Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/ginfast.c,v 1.2 2009/03/24 22:06:03 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -749,9 +749,10 @@ ginInsertCleanup(Relation index, GinState *ginstate,
                 * XXX using up maintenance_work_mem here is probably unreasonably
                 * much, since vacuum might already be using that much.
                 */
-               if ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber ||
-                        ( GinPageHasFullRow(page) &&
-                          accum.allocatedMemory > maintenance_work_mem * 1024L ) )
+               if (GinPageGetOpaque(page)->rightlink == InvalidBlockNumber ||
+                       (GinPageHasFullRow(page) &&
+                        (accum.allocatedMemory >= maintenance_work_mem * 1024L ||
+                         accum.maxdepth > GIN_MAX_TREE_DEPTH)))
                {
                        ItemPointerData    *list;
                        uint32                  nlist;
index d05882cdb944f53bde3c1d0c34bc52f442983267..f6a348eb85d88f98ee5aa4f2508aa9c1f03f2d57 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.19 2009/03/24 20:17:11 tgl Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.20 2009/03/24 22:06:03 tgl Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -245,7 +245,9 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
                                                                                                                &htup->t_self);
 
        /* If we've maxed out our available memory, dump everything to the index */
-       if (buildstate->accum.allocatedMemory >= maintenance_work_mem * 1024L)
+       /* Also dump if the tree seems to be getting too unbalanced */
+       if (buildstate->accum.allocatedMemory >= maintenance_work_mem * 1024L ||
+               buildstate->accum.maxdepth > GIN_MAX_TREE_DEPTH)
        {
                ItemPointerData *list;
                Datum           entry;
index f0f45bc5e8ad118d9811e7359d5d8abb1b98384d..c591c53638c7215dbdc9ee8838057a0208fc24c9 100644 (file)
@@ -4,7 +4,7 @@
  *
  *     Copyright (c) 2006-2009, PostgreSQL Global Development Group
  *
- *     $PostgreSQL: pgsql/src/include/access/gin.h,v 1.29 2009/03/24 20:17:14 tgl Exp $
+ *     $PostgreSQL: pgsql/src/include/access/gin.h,v 1.30 2009/03/24 22:06:03 tgl Exp $
  *--------------------------------------------------------------------------
  */
 #ifndef GIN_H
 #define GIN_COMPARE_PARTIAL_PROC          5
 #define GINNProcs                                         5
 
+/*
+ * Max depth allowed in search tree during bulk inserts.  This is to keep from
+ * degenerating to O(N^2) behavior when the tree is unbalanced due to sorted
+ * or nearly-sorted input.  (Perhaps it would be better to use a balanced-tree
+ * algorithm, but in common cases that would only add useless overhead.)
+ */
+#define GIN_MAX_TREE_DEPTH 100
+
 /*
  * Page opaque data in a inverted index page.
  *
@@ -434,12 +442,9 @@ extern IndexTuple ginPageGetLinkItup(Buffer buf);
 
 /* gindatapage.c */
 extern int     compareItemPointers(ItemPointer a, ItemPointer b);
-extern void
-MergeItemPointers(
-                                 ItemPointerData *dst,
+extern void MergeItemPointers(ItemPointerData *dst,
                                  ItemPointerData *a, uint32 na,
-                                 ItemPointerData *b, uint32 nb
-);
+                                 ItemPointerData *b, uint32 nb);
 
 extern void GinDataPageAddItem(Page page, void *data, OffsetNumber offset);
 extern void PageDeletePostingItem(Page page, OffsetNumber offset);