]> granicus.if.org Git - postgresql/commitdiff
Generic implementation of red-black binary tree. It's planned to use in
authorTeodor Sigaev <teodor@sigaev.ru>
Thu, 11 Feb 2010 14:29:50 +0000 (14:29 +0000)
committerTeodor Sigaev <teodor@sigaev.ru>
Thu, 11 Feb 2010 14:29:50 +0000 (14:29 +0000)
several places, but for now only GIN uses it during index creation.
Using self-balanced tree greatly speeds up index creation in corner cases
with preordered data.

src/backend/access/gin/ginbulk.c
src/backend/access/gin/ginfast.c
src/backend/access/gin/gininsert.c
src/backend/utils/misc/Makefile
src/backend/utils/misc/rbtree.c [new file with mode: 0644]
src/include/access/gin.h
src/include/utils/rbtree.h [new file with mode: 0644]

index 8f00e1305b37069aadc9a357e3d4bfb6dd1ec340..accd6640375f1cc7065ee33be5563baba756bf57 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.17 2010/01/02 16:57:33 momjian Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.18 2010/02/11 14:29:50 teodor Exp $
  *-------------------------------------------------------------------------
  */
 
 #define DEF_NENTRY     2048
 #define DEF_NPTR       4
 
-void
-ginInitBA(BuildAccumulator *accum)
+static void*
+ginAppendData(void *old, void *new, void *arg)
 {
-       accum->maxdepth = 1;
-       accum->stackpos = 0;
-       accum->entries = NULL;
-       accum->stack = NULL;
-       accum->allocatedMemory = 0;
-       accum->entryallocator = NULL;
-}
+       EntryAccumulator        *eo = (EntryAccumulator*)old,
+                                               *en = (EntryAccumulator*)new;
 
-static EntryAccumulator *
-EAAllocate(BuildAccumulator *accum)
-{
-       if (accum->entryallocator == NULL || accum->length >= DEF_NENTRY)
-       {
-               accum->entryallocator = palloc(sizeof(EntryAccumulator) * DEF_NENTRY);
-               accum->allocatedMemory += GetMemoryChunkSpace(accum->entryallocator);
-               accum->length = 0;
-       }
-
-       accum->length++;
-       return accum->entryallocator + accum->length - 1;
-}
+       BuildAccumulator        *accum = (BuildAccumulator*)arg;
 
-/*
- * Stores heap item pointer. For robust, it checks that
- * item pointer are ordered
- */
-static void
-ginInsertData(BuildAccumulator *accum, EntryAccumulator *entry, ItemPointer heapptr)
-{
-       if (entry->number >= entry->length)
+       if (eo->number >= eo->length)
        {
-               accum->allocatedMemory -= GetMemoryChunkSpace(entry->list);
-               entry->length *= 2;
-               entry->list = (ItemPointerData *) repalloc(entry->list,
-                                                                       sizeof(ItemPointerData) * entry->length);
-               accum->allocatedMemory += GetMemoryChunkSpace(entry->list);
+               accum->allocatedMemory -= GetMemoryChunkSpace(eo->list);
+               eo->length *= 2;
+               eo->list = (ItemPointerData *) repalloc(eo->list,
+                                                                       sizeof(ItemPointerData) * eo->length);
+               accum->allocatedMemory += GetMemoryChunkSpace(eo->list);
        }
 
-       if (entry->shouldSort == FALSE)
+       /* If item pointers are not ordered, they will need to be sorted. */
+       if (eo->shouldSort == FALSE)
        {
-               int                     res = compareItemPointers(entry->list + entry->number - 1, heapptr);
+               int                     res;
 
+               res = compareItemPointers(eo->list + eo->number - 1, en->list);
                Assert(res != 0);
 
                if (res > 0)
-                       entry->shouldSort = TRUE;
+                       eo->shouldSort = TRUE;
        }
 
-       entry->list[entry->number] = *heapptr;
-       entry->number++;
+       eo->list[eo->number] = en->list[0];
+       eo->number++;
+
+       return old;
+}
+
+static int
+cmpEntryAccumulator(const void *a, const void *b, void *arg)
+{
+       EntryAccumulator        *ea = (EntryAccumulator*)a;
+       EntryAccumulator        *eb = (EntryAccumulator*)b;
+       BuildAccumulator        *accum = (BuildAccumulator*)arg;
+
+       return compareAttEntries(accum->ginstate, ea->attnum, ea->value,
+                                                        eb->attnum, eb->value);
+}
+
+void
+ginInitBA(BuildAccumulator *accum)
+{
+       accum->allocatedMemory = 0;
+       accum->entryallocator = NULL;
+       accum->tree = rb_create(cmpEntryAccumulator, ginAppendData, NULL, accum);
+       accum->iterator = NULL;
+       accum->tmpList = NULL;
 }
 
 /*
@@ -103,111 +104,104 @@ getDatumCopy(BuildAccumulator *accum, OffsetNumber attnum, Datum value)
 static void
 ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, Datum entry)
 {
-       EntryAccumulator *ea = accum->entries,
-                          *pea = NULL;
-       int                     res = 0;
-       uint32          depth = 1;
-
-       while (ea)
+       EntryAccumulator        *key,
+                                               *ea;
+
+       /* 
+        * Allocate memory by rather big chunk to decrease overhead, we don't
+        * keep pointer to previously allocated chunks because they will free
+        * by MemoryContextReset() call.
+        */
+       if (accum->entryallocator == NULL || accum->length >= DEF_NENTRY)
        {
-               res = compareAttEntries(accum->ginstate, attnum, entry, ea->attnum, ea->value);
-               if (res == 0)
-                       break;                          /* found */
-               else
-               {
-                       pea = ea;
-                       if (res < 0)
-                               ea = ea->left;
-                       else
-                               ea = ea->right;
-               }
-               depth++;
+               accum->entryallocator = palloc(sizeof(EntryAccumulator) * DEF_NENTRY);
+               accum->allocatedMemory += GetMemoryChunkSpace(accum->entryallocator);
+               accum->length = 0;
        }
 
-       if (depth > accum->maxdepth)
-               accum->maxdepth = depth;
+       /* "Allocate" new key in chunk */
+       key = accum->entryallocator + accum->length;
+       accum->length++;
+
+       key->attnum = attnum;
+       key->value = entry;
+       /* To prevent multiple palloc/pfree cycles, we reuse array */ 
+       if (accum->tmpList == NULL)
+               accum->tmpList =
+                       (ItemPointerData *) palloc(sizeof(ItemPointerData) * DEF_NPTR);
+       key->list = accum->tmpList;
+       key->list[0] = *heapptr;
+
+       ea = rb_insert(accum->tree, key);
 
        if (ea == NULL)
        {
-               ea = EAAllocate(accum);
-
-               ea->left = ea->right = NULL;
-               ea->attnum = attnum;
-               ea->value = getDatumCopy(accum, attnum, entry);
-               ea->length = DEF_NPTR;
-               ea->number = 1;
-               ea->shouldSort = FALSE;
-               ea->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * DEF_NPTR);
-               accum->allocatedMemory += GetMemoryChunkSpace(ea->list);
-               ea->list[0] = *heapptr;
-
-               if (pea == NULL)
-                       accum->entries = ea;
-               else
-               {
-                       Assert(res != 0);
-                       if (res < 0)
-                               pea->left = ea;
-                       else
-                               pea->right = ea;
-               }
+               /*
+                * The key has been inserted, so continue initialization.
+                */
+               key->value = getDatumCopy(accum, attnum, entry);
+               key->length = DEF_NPTR;
+               key->number = 1;
+               key->shouldSort = FALSE;
+               accum->allocatedMemory += GetMemoryChunkSpace(key->list);
+               accum->tmpList = NULL;
        }
        else
-               ginInsertData(accum, ea, heapptr);
-}
-
-/*
- * insert middle of left part the middle of right one,
- * then calls itself for each parts
- */
-static void
-ginChooseElem(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum,
-                         Datum *entries, uint32 nentry,
-                         uint32 low, uint32 high, uint32 offset)
-{
-       uint32          pos;
-       uint32          middle = (low + high) >> 1;
-
-       pos = (low + middle) >> 1;
-       if (low != middle && pos >= offset && pos - offset < nentry)
-               ginInsertEntry(accum, heapptr, attnum, entries[pos - offset]);
-       pos = (high + middle + 1) >> 1;
-       if (middle + 1 != high && pos >= offset && pos - offset < nentry)
-               ginInsertEntry(accum, heapptr, attnum, entries[pos - offset]);
-
-       if (low != middle)
-               ginChooseElem(accum, heapptr, attnum, entries, nentry, low, middle, offset);
-       if (high != middle + 1)
-               ginChooseElem(accum, heapptr, attnum, entries, nentry, middle + 1, high, offset);
+       {
+               /*
+                * The key has been appended, so "free" allocated
+                * key by decrementing chunk's counter.
+                */
+               accum->length--;
+       }
 }
 
 /*
- * Insert one heap pointer. Suppose entries is sorted.
- * Insertion order tries to get binary tree balanced: first insert middle value,
- * next middle on left part and middle of right part.
+ * Insert one heap pointer.
+ *
+ * Since the entries are being inserted into a balanced binary tree, you
+ * might think that the order of insertion wouldn't be critical, but it turns
+ * out that inserting the entries in sorted order results in a lot of
+ * rebalancing operations and is slow.  To prevent this, we attempt to insert
+ * the nodes in an order that will produce a nearly-balanced tree if the input
+ * is in fact sorted.
+ *
+ * We do this as follows.  First, we imagine that we have an array whose size
+ * is the smallest power of two greater than or equal to the actual array
+ * size.  Second, we insert the middle entry of our virtual array into the
+ * tree; then, we insert the middles of each half of out virtual array, then
+ * middles of quarters, etc.
  */
-void
+        void
 ginInsertRecordBA(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum,
                                  Datum *entries, int32 nentry)
 {
-       uint32          i,
-                               nbit = 0,
-                               offset;
+       uint32  step = nentry;
 
        if (nentry <= 0)
                return;
 
        Assert(ItemPointerIsValid(heapptr) && attnum >= FirstOffsetNumber);
 
-       i = nentry - 1;
-       for (; i > 0; i >>= 1)
-               nbit++;
+       /*
+        * step will contain largest power of 2 and <= nentry
+        */
+       step |= (step >>  1);
+       step |= (step >>  2);
+       step |= (step >>  4);
+       step |= (step >>  8);
+       step |= (step >> 16);
+       step >>= 1;
+       step ++;
 
-       nbit = 1 << nbit;
-       offset = (nbit - nentry) / 2;
+       while(step > 0) {
+               int i;
 
-       ginInsertEntry(accum, heapptr, attnum, entries[(nbit >> 1) - offset]);
-       ginChooseElem(accum, heapptr, attnum, entries, nentry, 0, nbit, offset);
+               for (i = step - 1; i < nentry && i >= 0; i += step << 1 /* *2 */)
+                       ginInsertEntry(accum, heapptr, attnum, entries[i]);
+
+               step >>= 1; /* /2 */
+       }
 }
 
 static int
@@ -219,86 +213,16 @@ qsortCompareItemPointers(const void *a, const void *b)
        return res;
 }
 
-/*
- * walk on binary tree and returns ordered nodes
- */
-static EntryAccumulator *
-walkTree(BuildAccumulator *accum)
-{
-       EntryAccumulator *entry = accum->stack[accum->stackpos];
-
-       if (entry->list != NULL)
-       {
-               /* return entry itself: we already was at left sublink */
-               return entry;
-       }
-       else if (entry->right && entry->right != accum->stack[accum->stackpos + 1])
-       {
-               /* go on right sublink */
-               accum->stackpos++;
-               entry = entry->right;
-
-               /* find most-left value */
-               for (;;)
-               {
-                       accum->stack[accum->stackpos] = entry;
-                       if (entry->left)
-                       {
-                               accum->stackpos++;
-                               entry = entry->left;
-                       }
-                       else
-                               break;
-               }
-       }
-       else
-       {
-               /* we already return all left subtree, itself and  right subtree */
-               if (accum->stackpos == 0)
-                       return 0;
-               accum->stackpos--;
-               return walkTree(accum);
-       }
-
-       return entry;
-}
-
 ItemPointerData *
 ginGetEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *value, uint32 *n)
 {
        EntryAccumulator *entry;
        ItemPointerData *list;
 
-       if (accum->stack == NULL)
-       {
-               /* first call */
-               accum->stack = palloc0(sizeof(EntryAccumulator *) * (accum->maxdepth + 1));
-               accum->allocatedMemory += GetMemoryChunkSpace(accum->stack);
-               entry = accum->entries;
-
-               if (entry == NULL)
-                       return NULL;
-
-               /* find most-left value */
-               for (;;)
-               {
-                       accum->stack[accum->stackpos] = entry;
-                       if (entry->left)
-                       {
-                               accum->stackpos++;
-                               entry = entry->left;
-                       }
-                       else
-                               break;
-               }
-       }
-       else
-       {
-               accum->allocatedMemory -= GetMemoryChunkSpace(accum->stack[accum->stackpos]->list);
-               pfree(accum->stack[accum->stackpos]->list);
-               accum->stack[accum->stackpos]->list = NULL;
-               entry = walkTree(accum);
-       }
+       if (accum->iterator == NULL)
+               accum->iterator = rb_begin_iterate(accum->tree, LeftRightWalk);
+
+       entry = rb_iterate(accum->iterator);
 
        if (entry == NULL)
                return NULL;
index fb22bd035e93e1f2989530fc40874128608b6337..f8e0b5ad40f0464ebd8785b053b1e3b9484daaf7 100644 (file)
@@ -11,7 +11,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/ginfast.c,v 1.6 2010/01/02 16:57:33 momjian Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/ginfast.c,v 1.7 2010/02/11 14:29:50 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -765,8 +765,7 @@ ginInsertCleanup(Relation index, GinState *ginstate,
                 */
                if (GinPageGetOpaque(page)->rightlink == InvalidBlockNumber ||
                        (GinPageHasFullRow(page) &&
-                        (accum.allocatedMemory >= maintenance_work_mem * 1024L ||
-                         accum.maxdepth > GIN_MAX_TREE_DEPTH)))
+                        (accum.allocatedMemory >= maintenance_work_mem * 1024L)))
                {
                        ItemPointerData *list;
                        uint32          nlist;
index 54ec6ead0d4c1e38dc0a8b01c9a52c507aacb5a4..e2a5e8b0135d7a5fd5ba8aa00414f79807d2974d 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.25 2010/01/02 16:57:33 momjian Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.26 2010/02/11 14:29:50 teodor Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -247,9 +247,7 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
                                                                                                                        &htup->t_self);
 
        /* If we've maxed out our available memory, dump everything to the index */
-       /* Also dump if the tree seems to be getting too unbalanced */
-       if (buildstate->accum.allocatedMemory >= maintenance_work_mem * 1024L ||
-               buildstate->accum.maxdepth > GIN_MAX_TREE_DEPTH)
+       if (buildstate->accum.allocatedMemory >= maintenance_work_mem * 1024L)
        {
                ItemPointerData *list;
                Datum           entry;
index d94530dca1ff04a26678c1fd65e6b428bb8927c4..596d02d61fcc48e027ba5ec54f90e878bea1a3ff 100644 (file)
@@ -4,7 +4,7 @@
 #    Makefile for utils/misc
 #
 # IDENTIFICATION
-#    $PostgreSQL: pgsql/src/backend/utils/misc/Makefile,v 1.29 2009/08/28 20:26:19 petere Exp $
+#    $PostgreSQL: pgsql/src/backend/utils/misc/Makefile,v 1.30 2010/02/11 14:29:50 teodor Exp $
 #
 #-------------------------------------------------------------------------
 
@@ -14,7 +14,8 @@ include $(top_builddir)/src/Makefile.global
 
 override CPPFLAGS := -I. -I$(srcdir) $(CPPFLAGS)
 
-OBJS = guc.o help_config.o pg_rusage.o ps_status.o superuser.o tzparser.o
+OBJS = guc.o help_config.o pg_rusage.o ps_status.o superuser.o tzparser.o \
+       rbtree.o
 
 # This location might depend on the installation directories. Therefore
 # we can't subsitute it into pg_config.h.
diff --git a/src/backend/utils/misc/rbtree.c b/src/backend/utils/misc/rbtree.c
new file mode 100644 (file)
index 0000000..d7b129f
--- /dev/null
@@ -0,0 +1,790 @@
+/*-------------------------------------------------------------------------
+ *
+ * rbtree.c
+ *       implementation for PostgreSQL generic Red-Black binary tree package
+ *       Adopted from http://algolist.manual.ru/ds/rbtree.php
+ *
+ * This code comes from Thomas Niemann's "Sorting and Searching Algorithms:
+ * a Cookbook".
+ *
+ * See http://www.cs.auckland.ac.nz/software/AlgAnim/niemann/s_man.htm for
+ * license terms: "Source code, when part of a software project, may be used
+ * freely without reference to the author."
+ *
+ * Red-black trees are a type of balanced binary tree wherein (1) any child of
+ * a red node is always black, and (2) every path from root to leaf traverses
+ * an equal number of black nodes.  From these properties, it follows that the
+ * longest path from root to leaf is only about twice as long as the shortest,
+ * so lookups are guaranteed to run in O(lg n) time.
+ *
+ * Copyright (c) 1996-2009, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *       $PostgreSQL: pgsql/src/backend/utils/misc/rbtree.c,v 1.1 2010/02/11 14:29:50 teodor Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "utils/rbtree.h"
+
+/**********************************************************************
+ *                                              Declarations                                                             *
+ **********************************************************************/
+
+/*
+ * Values for RBNode->iteratorState
+ */
+#define InitialState   (0)
+#define FirstStepDone  (1)
+#define SecondStepDone (2)
+#define ThirdStepDone  (3)
+
+/*
+ * Colors of node
+ */
+#define RBBLACK                (0)
+#define RBRED          (1)
+
+typedef struct RBNode
+{
+       uint32          iteratorState:2,
+                               color:  1 ,
+                               unused: 29;
+       struct RBNode *left;
+       struct RBNode *right;
+       struct RBNode *parent;
+       void       *data;
+}      RBNode;
+
+struct RBTree
+{
+       RBNode     *root;
+       rb_comparator comparator;
+       rb_appendator appendator;
+       rb_freefunc freefunc;
+       void       *arg;
+};
+
+struct RBTreeIterator
+{
+       RBNode     *node;
+       void       *(*iterate) (RBTreeIterator *iterator);
+};
+
+/*
+ * all leafs are sentinels, use customized NIL name to prevent
+ * collision with sytem-wide NIL which is actually NULL
+ */
+#define RBNIL &sentinel
+
+RBNode         sentinel = {InitialState, RBBLACK, 0, RBNIL, RBNIL, NULL, NULL};
+
+/**********************************************************************
+ *                                               Create                                                                          *
+ **********************************************************************/
+
+RBTree *
+rb_create(rb_comparator comparator, rb_appendator appendator,
+                                 rb_freefunc freefunc, void *arg)
+{
+       RBTree     *tree = palloc(sizeof(RBTree));
+
+       tree->root = RBNIL;
+       tree->comparator = comparator;
+       tree->appendator = appendator;
+       tree->freefunc = freefunc;
+       tree->arg = arg;
+
+       return tree;
+}
+
+/**********************************************************************
+ *                                               Search                                                                          *
+ **********************************************************************/
+
+void *
+rb_find(RBTree *rb, void *data)
+{
+       RBNode     *node = rb->root;
+       int                     cmp;
+
+       while (node != RBNIL)
+       {
+               cmp = rb->comparator(data, node->data, rb->arg);
+
+               if (cmp == 0)
+                       return node->data;
+               else if (cmp < 0)
+                       node = node->left;
+               else
+                       node = node->right;
+       }
+
+       return NULL;
+}
+
+/**********************************************************************
+ *                                                       Insertion                                                               *
+ **********************************************************************/
+
+/*
+ * Rotate node x to left.
+ *
+ * x's right child takes its place in the tree, and x becomes the left
+ * child of that node.
+ */
+static void
+rb_rotate_left(RBTree *rb, RBNode *x)
+{
+       RBNode     *y = x->right;
+
+       /* establish x->right link */
+       x->right = y->left;
+       if (y->left != RBNIL)
+               y->left->parent = x;
+
+       /* establish y->parent link */
+       if (y != RBNIL)
+               y->parent = x->parent;
+       if (x->parent)
+       {
+               if (x == x->parent->left)
+                       x->parent->left = y;
+               else
+                       x->parent->right = y;
+       }
+       else
+       {
+               rb->root = y;
+       }
+
+       /* link x and y */
+       y->left = x;
+       if (x != RBNIL)
+               x->parent = y;
+}
+
+/*
+ * Rotate node x to right.
+ *
+ * x's left right child takes its place in the tree, and x becomes the right
+ * child of that node.
+ */
+static void
+rb_rotate_right(RBTree *rb, RBNode *x)
+{
+       RBNode     *y = x->left;
+
+       /* establish x->left link */
+       x->left = y->right;
+       if (y->right != RBNIL)
+               y->right->parent = x;
+
+       /* establish y->parent link */
+       if (y != RBNIL)
+               y->parent = x->parent;
+       if (x->parent)
+       {
+               if (x == x->parent->right)
+                       x->parent->right = y;
+               else
+                       x->parent->left = y;
+       }
+       else
+       {
+               rb->root = y;
+       }
+
+       /* link x and y */
+       y->right = x;
+       if (x != RBNIL)
+               x->parent = y;
+}
+
+/*
+ * Maintain Red-Black tree balance after inserting node x.
+ *
+ * The newly inserted node is always initially marked red.  That may lead to
+ * a situation where a red node has a red child, which is prohibited.  We can
+ * always fix the problem by a series of color changes and/or "rotations",
+ * which move the problem progressively higher up in the tree.  If one of the
+ * two red nodes is the root, we can always fix the problem by changing the
+ * root from red to black.
+ *
+ * (This does not work lower down in the tree because we must also maintain
+ * the invariant that every leaf has equal black-height.)
+ */
+static void
+rb_insert_fixup(RBTree *rb, RBNode *x)
+{
+       /*
+        * x is always a red node.  Initially, it is the newly inserted node.
+        * Each iteration of this loop moves it higher up in the tree.
+        */
+       while (x != rb->root && x->parent->color == RBRED)
+       {
+               /*
+                * x and x->parent are both red.  Fix depends on whether x->parent is
+                * a left or right child.  In either case, we define y to be the
+                * "uncle" of x, that is, the other child of x's grandparent.
+                *
+                * If the uncle is red, we flip the grandparent to red and its two
+                * children to black.  Then we loop around again to check whether the
+                * grandparent still has a problem.
+                *
+                * If the uncle is black, we will perform one or two "rotations" to
+                * balance the tree.  Either x or x->parent will take the grandparent's
+                * position in the tree and recolored black, and the original
+                * grandparent will be recolored red and become a child of that node.
+                * This always leaves us with a valid red-black tree, so the loop
+                * will terminate.
+                */
+               if (x->parent == x->parent->parent->left)
+               {
+                       RBNode     *y = x->parent->parent->right;
+
+                       if (y->color == RBRED)
+                       {
+                               /* uncle is RBRED */
+                               x->parent->color = RBBLACK;
+                               y->color = RBBLACK;
+                               x->parent->parent->color = RBRED;
+                               x = x->parent->parent;
+                       }
+                       else
+                       {
+                               /* uncle is RBBLACK */
+                               if (x == x->parent->right)
+                               {
+                                       /* make x a left child */
+                                       x = x->parent;
+                                       rb_rotate_left(rb, x);
+                               }
+
+                               /* recolor and rotate */
+                               x->parent->color = RBBLACK;
+                               x->parent->parent->color = RBRED;
+                               rb_rotate_right(rb, x->parent->parent);
+                       }
+               }
+               else
+               {
+                       /* mirror image of above code */
+                       RBNode     *y = x->parent->parent->left;
+
+                       if (y->color == RBRED)
+                       {
+                               /* uncle is RBRED */
+                               x->parent->color = RBBLACK;
+                               y->color = RBBLACK;
+                               x->parent->parent->color = RBRED;
+                               x = x->parent->parent;
+                       }
+                       else
+                       {
+                               /* uncle is RBBLACK */
+                               if (x == x->parent->left)
+                               {
+                                       x = x->parent;
+                                       rb_rotate_right(rb, x);
+                               }
+                               x->parent->color = RBBLACK;
+                               x->parent->parent->color = RBRED;
+                               rb_rotate_left(rb, x->parent->parent);
+                       }
+               }
+       }
+
+       /*
+        * The root may already have been black; if not, the black-height of every
+        * node in the tree increases by one.
+        */
+       rb->root->color = RBBLACK;
+}
+
+/*
+ * Allocate node for data and insert in tree.
+ *
+ * Return old data (or result of appendator method) if it exists and NULL
+ * otherwise.
+ */
+void *
+rb_insert(RBTree *rb, void *data)
+{
+       RBNode     *current,
+                          *parent,
+                          *x;
+       int                     cmp;
+
+       /* find where node belongs */
+       current = rb->root;
+       parent = NULL;
+       while (current != RBNIL)
+       {
+               cmp = rb->comparator(data, current->data, rb->arg);
+               if (cmp == 0)
+               {
+                       /*
+                        * Found node with given key.  If appendator method is provided,
+                        * call it to join old and new data; else, new data replaces old
+                        * data.
+                        */
+                       if (rb->appendator)
+                       {
+                               current->data = rb->appendator(current->data, data, rb->arg);
+                               return current->data;
+                       }
+                       else
+                       {
+                               void       *old = current->data;
+
+                               current->data = data;
+                               return old;
+                       }
+               }
+               parent = current;
+               current = (cmp < 0) ? current->left : current->right;
+       }
+
+       /* setup new node in tree */
+       x = palloc(sizeof(RBNode));
+       x->data = data;
+       x->parent = parent;
+       x->left = RBNIL;
+       x->right = RBNIL;
+       x->color = RBRED;
+       x->iteratorState = InitialState;
+
+       /* insert node in tree */
+       if (parent)
+       {
+               if (cmp < 0)
+                       parent->left = x;
+               else
+                       parent->right = x;
+       }
+       else
+       {
+               rb->root = x;
+       }
+
+       rb_insert_fixup(rb, x);
+       return NULL;
+}
+
+/**********************************************************************
+ *                                                     Deletion                                                                  *
+ **********************************************************************/
+
+/*
+ * Maintain Red-Black tree balance after deleting a black node.
+ */
+static void
+rb_delete_fixup(RBTree *rb, RBNode *x)
+{
+       /*
+        * x is always a black node.  Initially, it is the former child of the
+        * deleted node.  Each iteration of this loop moves it higher up in the
+        * tree.
+        */
+       while (x != rb->root && x->color == RBBLACK)
+       {
+               /*
+                * Left and right cases are symmetric.  Any nodes that are children
+                * of x have a black-height one less than the remainder of the nodes
+                * in the tree.  We rotate and recolor nodes to move the problem up
+                * the tree: at some stage we'll either fix the problem, or reach the
+                * root (where the black-height is allowed to decrease).
+                */
+               if (x == x->parent->left)
+               {
+                       RBNode     *w = x->parent->right;
+
+                       if (w->color == RBRED)
+                       {
+                               w->color = RBBLACK;
+                               x->parent->color = RBRED;
+                               rb_rotate_left(rb, x->parent);
+                               w = x->parent->right;
+                       }
+
+                       if (w->left->color == RBBLACK && w->right->color == RBBLACK)
+                       {
+                               w->color = RBRED;
+                               x = x->parent;
+                       }
+                       else
+                       {
+                               if (w->right->color == RBBLACK)
+                               {
+                                       w->left->color = RBBLACK;
+                                       w->color = RBRED;
+                                       rb_rotate_right(rb, w);
+                                       w = x->parent->right;
+                               }
+                               w->color = x->parent->color;
+                               x->parent->color = RBBLACK;
+                               w->right->color = RBBLACK;
+                               rb_rotate_left(rb, x->parent);
+                               x = rb->root;           /* Arrange for loop to terminate. */
+                       }
+               }
+               else
+               {
+                       RBNode     *w = x->parent->left;
+
+                       if (w->color == RBRED)
+                       {
+                               w->color = RBBLACK;
+                               x->parent->color = RBRED;
+                               rb_rotate_right(rb, x->parent);
+                               w = x->parent->left;
+                       }
+
+                       if (w->right->color == RBBLACK && w->left->color == RBBLACK)
+                       {
+                               w->color = RBRED;
+                               x = x->parent;
+                       }
+                       else
+                       {
+                               if (w->left->color == RBBLACK)
+                               {
+                                       w->right->color = RBBLACK;
+                                       w->color = RBRED;
+                                       rb_rotate_left(rb, w);
+                                       w = x->parent->left;
+                               }
+                               w->color = x->parent->color;
+                               x->parent->color = RBBLACK;
+                               w->left->color = RBBLACK;
+                               rb_rotate_right(rb, x->parent);
+                               x = rb->root;           /* Arrange for loop to terminate. */
+                       }
+               }
+       }
+       x->color = RBBLACK;
+}
+
+/*
+ * Delete node z from tree.
+ */
+static void
+rb_delete_node(RBTree *rb, RBNode *z)
+{
+       RBNode     *x,
+                          *y;
+
+       if (!z || z == RBNIL)
+               return;
+
+       /*
+        * y is the node that will actually be removed from the tree.  This will
+        * be z if z has fewer than two children, or the tree successor of z
+        * otherwise.
+        */
+       if (z->left == RBNIL || z->right == RBNIL)
+       {
+               /* y has a RBNIL node as a child */
+               y = z;
+       }
+       else
+       {
+               /* find tree successor */
+               y = z->right;
+               while (y->left != RBNIL)
+                       y = y->left;
+       }
+
+       /* x is y's only child */
+       if (y->left != RBNIL)
+               x = y->left;
+       else
+               x = y->right;
+
+       /* Remove y from the tree. */
+       x->parent = y->parent;
+       if (y->parent)
+       {
+               if (y == y->parent->left)
+                       y->parent->left = x;
+               else
+                       y->parent->right = x;
+       }
+       else
+       {
+               rb->root = x;
+       }
+
+       /*
+        * If we removed the tree successor of z rather than z itself, then
+        * attach the data for the removed node to the one we were supposed to
+        * remove.
+        */
+       if (y != z)
+               z->data = y->data;
+
+       /*
+        * Removing a black node might make some paths from root to leaf contain
+        * fewer black nodes than others, or it might make two red nodes adjacent.
+        */
+       if (y->color == RBBLACK)
+               rb_delete_fixup(rb, x);
+
+       pfree(y);
+}
+
+extern void
+rb_delete(RBTree *rb, void *data)
+{
+       RBNode     *node = rb->root;
+       int                     cmp;
+
+       while (node != RBNIL)
+       {
+               cmp = rb->comparator(data, node->data, rb->arg);
+
+               if (cmp == 0)
+               {
+                       /* found node to delete */
+                       if (rb->freefunc)
+                               rb->freefunc(node->data);
+                       node->data = NULL;
+                       rb_delete_node(rb, node);
+                       return;
+               }
+               else if (cmp < 0)
+                       node = node->left;
+               else
+                       node = node->right;
+       }
+}
+
+/*
+ * Return data on left most node and delete
+ * that node
+ */
+extern void *
+rb_leftmost(RBTree *rb)
+{
+       RBNode     *node = rb->root;
+       RBNode     *leftmost = rb->root;
+       void       *res = NULL;
+
+       while (node != RBNIL)
+       {
+               leftmost = node;
+               node = node->left;
+       }
+
+       if (leftmost != RBNIL)
+       {
+               res = leftmost->data;
+               leftmost->data = NULL;
+               rb_delete_node(rb, leftmost);
+       }
+
+       return res;
+}
+
+/**********************************************************************
+ *                                               Traverse                                                                        *
+ **********************************************************************/
+
+static void *
+rb_next_node(RBTreeIterator *iterator, RBNode *node)
+{
+       node->iteratorState = InitialState;
+       iterator->node = node;
+       return iterator->iterate(iterator);
+}
+
+static void *
+rb_left_right_iterator(RBTreeIterator *iterator)
+{
+       RBNode     *node = iterator->node;
+
+       switch (node->iteratorState)
+       {
+               case InitialState:
+                       if (node->left != RBNIL)
+                       {
+                               node->iteratorState = FirstStepDone;
+                               return rb_next_node(iterator, node->left);
+                       }
+               case FirstStepDone:
+                       node->iteratorState = SecondStepDone;
+                       return node->data;
+               case SecondStepDone:
+                       if (node->right != RBNIL)
+                       {
+                               node->iteratorState = ThirdStepDone;
+                               return rb_next_node(iterator, node->right);
+                       }
+               case ThirdStepDone:
+                       if (node->parent)
+                       {
+                               iterator->node = node->parent;
+                               return iterator->iterate(iterator);
+                       }
+                       break;
+               default:
+                       elog(ERROR, "Unknow node state: %d", node->iteratorState);
+       }
+
+       return NULL;
+}
+
+static void *
+rb_right_left_iterator(RBTreeIterator *iterator)
+{
+       RBNode     *node = iterator->node;
+
+       switch (node->iteratorState)
+       {
+               case InitialState:
+                       if (node->right != RBNIL)
+                       {
+                               node->iteratorState = FirstStepDone;
+                               return rb_next_node(iterator, node->right);
+                       }
+               case FirstStepDone:
+                       node->iteratorState = SecondStepDone;
+                       return node->data;
+               case SecondStepDone:
+                       if (node->left != RBNIL)
+                       {
+                               node->iteratorState = ThirdStepDone;
+                               return rb_next_node(iterator, node->left);
+                       }
+               case ThirdStepDone:
+                       if (node->parent)
+                       {
+                               iterator->node = node->parent;
+                               return iterator->iterate(iterator);
+                       }
+                       break;
+               default:
+                       elog(ERROR, "Unknow node state: %d", node->iteratorState);
+       }
+
+       return NULL;
+}
+
+static void *
+rb_direct_iterator(RBTreeIterator *iterator)
+{
+       RBNode     *node = iterator->node;
+
+       switch (node->iteratorState)
+       {
+               case InitialState:
+                       node->iteratorState = FirstStepDone;
+                       return node->data;
+               case FirstStepDone:
+                       if (node->left != RBNIL)
+                       {
+                               node->iteratorState = SecondStepDone;
+                               return rb_next_node(iterator, node->left);
+                       }
+               case SecondStepDone:
+                       if (node->right != RBNIL)
+                       {
+                               node->iteratorState = ThirdStepDone;
+                               return rb_next_node(iterator, node->right);
+                       }
+               case ThirdStepDone:
+                       if (node->parent)
+                       {
+                               iterator->node = node->parent;
+                               return iterator->iterate(iterator);
+                       }
+                       break;
+               default:
+                       elog(ERROR, "Unknow node state: %d", node->iteratorState);
+       }
+
+       return NULL;
+}
+
+static void *
+rb_inverted_iterator(RBTreeIterator *iterator)
+{
+       RBNode     *node = iterator->node;
+
+       switch (node->iteratorState)
+       {
+               case InitialState:
+                       if (node->left != RBNIL)
+                       {
+                               node->iteratorState = FirstStepDone;
+                               return rb_next_node(iterator, node->left);
+                       }
+               case FirstStepDone:
+                       if (node->right != RBNIL)
+                       {
+                               node->iteratorState = SecondStepDone;
+                               return rb_next_node(iterator, node->right);
+                       }
+               case SecondStepDone:
+                       node->iteratorState = ThirdStepDone;
+                       return node->data;
+               case ThirdStepDone:
+                       if (node->parent)
+                       {
+                               iterator->node = node->parent;
+                               return iterator->iterate(iterator);
+                       }
+                       break;
+               default:
+                       elog(ERROR, "Unknow node state: %d", node->iteratorState);
+       }
+
+       return NULL;
+}
+
+RBTreeIterator *
+rb_begin_iterate(RBTree *rb, RBOrderControl ctrl)
+{
+       RBTreeIterator *iterator = palloc(sizeof(RBTreeIterator));
+
+       iterator->node = rb->root;
+       if (iterator->node != RBNIL)
+               iterator->node->iteratorState = InitialState;
+
+       switch (ctrl)
+       {
+               case LeftRightWalk:                     /* visit left, then self, then right */
+                       iterator->iterate = rb_left_right_iterator;
+                       break;
+               case RightLeftWalk:                     /* visit right, then self, then left */
+                       iterator->iterate = rb_right_left_iterator;
+                       break;
+               case DirectWalk:                        /* visit self, then left, then right */
+                       iterator->iterate = rb_direct_iterator;
+                       break;
+               case InvertedWalk:                      /* visit left, then right, then self */
+                       iterator->iterate = rb_inverted_iterator;
+                       break;
+               default:
+                       elog(ERROR, "Unknown iterator order: %d", ctrl);
+       }
+
+       return iterator;
+}
+
+void *
+rb_iterate(RBTreeIterator *iterator)
+{
+       if (iterator->node == RBNIL)
+               return NULL;
+
+       return iterator->iterate(iterator);
+}
+
+void
+rb_free_iterator(RBTreeIterator *iterator)
+{
+       pfree(iterator);
+}
index 0ea25dc4039600a9d917df024e8f0279ab106994..48965613608b6af67cf1f5ea2d9382eb96a2e714 100644 (file)
@@ -4,7 +4,7 @@
  *
  *     Copyright (c) 2006-2010, PostgreSQL Global Development Group
  *
- *     $PostgreSQL: pgsql/src/include/access/gin.h,v 1.36 2010/01/02 16:58:00 momjian Exp $
+ *     $PostgreSQL: pgsql/src/include/access/gin.h,v 1.37 2010/02/11 14:29:50 teodor Exp $
  *--------------------------------------------------------------------------
  */
 #ifndef GIN_H
@@ -13,6 +13,7 @@
 #include "access/genam.h"
 #include "access/itup.h"
 #include "access/xlog.h"
+#include "utils/rbtree.h"
 #include "fmgr.h"
 
 
 #define GIN_COMPARE_PARTIAL_PROC          5
 #define GINNProcs                                         5
 
-/*
- * Max depth allowed in search tree during bulk inserts.  This is to keep from
- * degenerating to O(N^2) behavior when the tree is unbalanced due to sorted
- * or nearly-sorted input.     (Perhaps it would be better to use a balanced-tree
- * algorithm, but in common cases that would only add useless overhead.)
- */
-#define GIN_MAX_TREE_DEPTH 100
-
 /*
  * Page opaque data in a inverted index page.
  *
@@ -570,27 +563,23 @@ extern Datum ginarrayconsistent(PG_FUNCTION_ARGS);
 /* ginbulk.c */
 typedef struct EntryAccumulator
 {
-       OffsetNumber attnum;
        Datum           value;
        uint32          length;
        uint32          number;
-       ItemPointerData *list;
+       OffsetNumber attnum;
        bool            shouldSort;
-       struct EntryAccumulator *left;
-       struct EntryAccumulator *right;
+       ItemPointerData *list;
 } EntryAccumulator;
 
 typedef struct
 {
        GinState   *ginstate;
-       EntryAccumulator *entries;
-       uint32          maxdepth;
-       EntryAccumulator **stack;
-       uint32          stackpos;
        long            allocatedMemory;
-
        uint32          length;
-       EntryAccumulator *entryallocator;
+       EntryAccumulator   *entryallocator;
+       ItemPointerData    *tmpList;
+       RBTree     *tree;
+       RBTreeIterator *iterator;
 } BuildAccumulator;
 
 extern void ginInitBA(BuildAccumulator *accum);
diff --git a/src/include/utils/rbtree.h b/src/include/utils/rbtree.h
new file mode 100644 (file)
index 0000000..535a237
--- /dev/null
@@ -0,0 +1,46 @@
+/*-------------------------------------------------------------------------
+ *
+ * rbtree.h
+ *    interface for PostgreSQL generic Red-Black binary tree package
+ *
+ * Copyright (c) 1996-2009, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *             $PostgreSQL: pgsql/src/include/utils/rbtree.h,v 1.1 2010/02/11 14:29:50 teodor Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef RBTREE_H
+#define RBTREE_H
+
+typedef struct RBTree RBTree;
+typedef struct RBTreeIterator RBTreeIterator;
+
+typedef int (*rb_comparator) (const void *a, const void *b, void *arg);
+typedef void* (*rb_appendator) (void *current, void *new, void *arg);
+typedef void (*rb_freefunc) (void *a);
+
+extern RBTree *rb_create(rb_comparator comparator,
+                                                       rb_appendator appendator,
+                                                       rb_freefunc freefunc,
+                                                       void *arg);
+
+extern void *rb_find(RBTree *rb, void *data);
+extern void *rb_insert(RBTree *rb, void *data);
+extern void rb_delete(RBTree *rb, void *data);
+extern void *rb_leftmost(RBTree *rb);
+
+typedef enum RBOrderControl
+{
+       LeftRightWalk,
+       RightLeftWalk,
+       DirectWalk,
+       InvertedWalk
+} RBOrderControl;
+
+extern RBTreeIterator* rb_begin_iterate(RBTree *rb, RBOrderControl ctrl);
+extern void *rb_iterate(RBTreeIterator *iterator);
+extern void rb_free_iterator(RBTreeIterator *iterator);
+
+#endif