From 5209c084a646018bf429e4a1800e76e7b8b548a7 Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Thu, 11 Feb 2010 14:29:50 +0000 Subject: [PATCH] Generic implementation of red-black binary tree. It's planned to use in several places, but for now only GIN uses it during index creation. Using self-balanced tree greatly speeds up index creation in corner cases with preordered data. --- src/backend/access/gin/ginbulk.c | 318 +++++------- src/backend/access/gin/ginfast.c | 5 +- src/backend/access/gin/gininsert.c | 6 +- src/backend/utils/misc/Makefile | 5 +- src/backend/utils/misc/rbtree.c | 790 +++++++++++++++++++++++++++++ src/include/access/gin.h | 27 +- src/include/utils/rbtree.h | 46 ++ 7 files changed, 972 insertions(+), 225 deletions(-) create mode 100644 src/backend/utils/misc/rbtree.c create mode 100644 src/include/utils/rbtree.h diff --git a/src/backend/access/gin/ginbulk.c b/src/backend/access/gin/ginbulk.c index 8f00e1305b..accd664037 100644 --- a/src/backend/access/gin/ginbulk.c +++ b/src/backend/access/gin/ginbulk.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.17 2010/01/02 16:57:33 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.18 2010/02/11 14:29:50 teodor Exp $ *------------------------------------------------------------------------- */ @@ -22,59 +22,60 @@ #define DEF_NENTRY 2048 #define DEF_NPTR 4 -void -ginInitBA(BuildAccumulator *accum) +static void* +ginAppendData(void *old, void *new, void *arg) { - accum->maxdepth = 1; - accum->stackpos = 0; - accum->entries = NULL; - accum->stack = NULL; - accum->allocatedMemory = 0; - accum->entryallocator = NULL; -} + EntryAccumulator *eo = (EntryAccumulator*)old, + *en = (EntryAccumulator*)new; -static EntryAccumulator * -EAAllocate(BuildAccumulator *accum) -{ - if (accum->entryallocator == NULL || accum->length >= DEF_NENTRY) - { - accum->entryallocator = palloc(sizeof(EntryAccumulator) * DEF_NENTRY); - accum->allocatedMemory += GetMemoryChunkSpace(accum->entryallocator); - accum->length = 0; - } - - accum->length++; - return accum->entryallocator + accum->length - 1; -} + BuildAccumulator *accum = (BuildAccumulator*)arg; -/* - * Stores heap item pointer. For robust, it checks that - * item pointer are ordered - */ -static void -ginInsertData(BuildAccumulator *accum, EntryAccumulator *entry, ItemPointer heapptr) -{ - if (entry->number >= entry->length) + if (eo->number >= eo->length) { - accum->allocatedMemory -= GetMemoryChunkSpace(entry->list); - entry->length *= 2; - entry->list = (ItemPointerData *) repalloc(entry->list, - sizeof(ItemPointerData) * entry->length); - accum->allocatedMemory += GetMemoryChunkSpace(entry->list); + accum->allocatedMemory -= GetMemoryChunkSpace(eo->list); + eo->length *= 2; + eo->list = (ItemPointerData *) repalloc(eo->list, + sizeof(ItemPointerData) * eo->length); + accum->allocatedMemory += GetMemoryChunkSpace(eo->list); } - if (entry->shouldSort == FALSE) + /* If item pointers are not ordered, they will need to be sorted. */ + if (eo->shouldSort == FALSE) { - int res = compareItemPointers(entry->list + entry->number - 1, heapptr); + int res; + res = compareItemPointers(eo->list + eo->number - 1, en->list); Assert(res != 0); if (res > 0) - entry->shouldSort = TRUE; + eo->shouldSort = TRUE; } - entry->list[entry->number] = *heapptr; - entry->number++; + eo->list[eo->number] = en->list[0]; + eo->number++; + + return old; +} + +static int +cmpEntryAccumulator(const void *a, const void *b, void *arg) +{ + EntryAccumulator *ea = (EntryAccumulator*)a; + EntryAccumulator *eb = (EntryAccumulator*)b; + BuildAccumulator *accum = (BuildAccumulator*)arg; + + return compareAttEntries(accum->ginstate, ea->attnum, ea->value, + eb->attnum, eb->value); +} + +void +ginInitBA(BuildAccumulator *accum) +{ + accum->allocatedMemory = 0; + accum->entryallocator = NULL; + accum->tree = rb_create(cmpEntryAccumulator, ginAppendData, NULL, accum); + accum->iterator = NULL; + accum->tmpList = NULL; } /* @@ -103,111 +104,104 @@ getDatumCopy(BuildAccumulator *accum, OffsetNumber attnum, Datum value) static void ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, Datum entry) { - EntryAccumulator *ea = accum->entries, - *pea = NULL; - int res = 0; - uint32 depth = 1; - - while (ea) + EntryAccumulator *key, + *ea; + + /* + * Allocate memory by rather big chunk to decrease overhead, we don't + * keep pointer to previously allocated chunks because they will free + * by MemoryContextReset() call. + */ + if (accum->entryallocator == NULL || accum->length >= DEF_NENTRY) { - res = compareAttEntries(accum->ginstate, attnum, entry, ea->attnum, ea->value); - if (res == 0) - break; /* found */ - else - { - pea = ea; - if (res < 0) - ea = ea->left; - else - ea = ea->right; - } - depth++; + accum->entryallocator = palloc(sizeof(EntryAccumulator) * DEF_NENTRY); + accum->allocatedMemory += GetMemoryChunkSpace(accum->entryallocator); + accum->length = 0; } - if (depth > accum->maxdepth) - accum->maxdepth = depth; + /* "Allocate" new key in chunk */ + key = accum->entryallocator + accum->length; + accum->length++; + + key->attnum = attnum; + key->value = entry; + /* To prevent multiple palloc/pfree cycles, we reuse array */ + if (accum->tmpList == NULL) + accum->tmpList = + (ItemPointerData *) palloc(sizeof(ItemPointerData) * DEF_NPTR); + key->list = accum->tmpList; + key->list[0] = *heapptr; + + ea = rb_insert(accum->tree, key); if (ea == NULL) { - ea = EAAllocate(accum); - - ea->left = ea->right = NULL; - ea->attnum = attnum; - ea->value = getDatumCopy(accum, attnum, entry); - ea->length = DEF_NPTR; - ea->number = 1; - ea->shouldSort = FALSE; - ea->list = (ItemPointerData *) palloc(sizeof(ItemPointerData) * DEF_NPTR); - accum->allocatedMemory += GetMemoryChunkSpace(ea->list); - ea->list[0] = *heapptr; - - if (pea == NULL) - accum->entries = ea; - else - { - Assert(res != 0); - if (res < 0) - pea->left = ea; - else - pea->right = ea; - } + /* + * The key has been inserted, so continue initialization. + */ + key->value = getDatumCopy(accum, attnum, entry); + key->length = DEF_NPTR; + key->number = 1; + key->shouldSort = FALSE; + accum->allocatedMemory += GetMemoryChunkSpace(key->list); + accum->tmpList = NULL; } else - ginInsertData(accum, ea, heapptr); -} - -/* - * insert middle of left part the middle of right one, - * then calls itself for each parts - */ -static void -ginChooseElem(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, - Datum *entries, uint32 nentry, - uint32 low, uint32 high, uint32 offset) -{ - uint32 pos; - uint32 middle = (low + high) >> 1; - - pos = (low + middle) >> 1; - if (low != middle && pos >= offset && pos - offset < nentry) - ginInsertEntry(accum, heapptr, attnum, entries[pos - offset]); - pos = (high + middle + 1) >> 1; - if (middle + 1 != high && pos >= offset && pos - offset < nentry) - ginInsertEntry(accum, heapptr, attnum, entries[pos - offset]); - - if (low != middle) - ginChooseElem(accum, heapptr, attnum, entries, nentry, low, middle, offset); - if (high != middle + 1) - ginChooseElem(accum, heapptr, attnum, entries, nentry, middle + 1, high, offset); + { + /* + * The key has been appended, so "free" allocated + * key by decrementing chunk's counter. + */ + accum->length--; + } } /* - * Insert one heap pointer. Suppose entries is sorted. - * Insertion order tries to get binary tree balanced: first insert middle value, - * next middle on left part and middle of right part. + * Insert one heap pointer. + * + * Since the entries are being inserted into a balanced binary tree, you + * might think that the order of insertion wouldn't be critical, but it turns + * out that inserting the entries in sorted order results in a lot of + * rebalancing operations and is slow. To prevent this, we attempt to insert + * the nodes in an order that will produce a nearly-balanced tree if the input + * is in fact sorted. + * + * We do this as follows. First, we imagine that we have an array whose size + * is the smallest power of two greater than or equal to the actual array + * size. Second, we insert the middle entry of our virtual array into the + * tree; then, we insert the middles of each half of out virtual array, then + * middles of quarters, etc. */ -void + void ginInsertRecordBA(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, Datum *entries, int32 nentry) { - uint32 i, - nbit = 0, - offset; + uint32 step = nentry; if (nentry <= 0) return; Assert(ItemPointerIsValid(heapptr) && attnum >= FirstOffsetNumber); - i = nentry - 1; - for (; i > 0; i >>= 1) - nbit++; + /* + * step will contain largest power of 2 and <= nentry + */ + step |= (step >> 1); + step |= (step >> 2); + step |= (step >> 4); + step |= (step >> 8); + step |= (step >> 16); + step >>= 1; + step ++; - nbit = 1 << nbit; - offset = (nbit - nentry) / 2; + while(step > 0) { + int i; - ginInsertEntry(accum, heapptr, attnum, entries[(nbit >> 1) - offset]); - ginChooseElem(accum, heapptr, attnum, entries, nentry, 0, nbit, offset); + for (i = step - 1; i < nentry && i >= 0; i += step << 1 /* *2 */) + ginInsertEntry(accum, heapptr, attnum, entries[i]); + + step >>= 1; /* /2 */ + } } static int @@ -219,86 +213,16 @@ qsortCompareItemPointers(const void *a, const void *b) return res; } -/* - * walk on binary tree and returns ordered nodes - */ -static EntryAccumulator * -walkTree(BuildAccumulator *accum) -{ - EntryAccumulator *entry = accum->stack[accum->stackpos]; - - if (entry->list != NULL) - { - /* return entry itself: we already was at left sublink */ - return entry; - } - else if (entry->right && entry->right != accum->stack[accum->stackpos + 1]) - { - /* go on right sublink */ - accum->stackpos++; - entry = entry->right; - - /* find most-left value */ - for (;;) - { - accum->stack[accum->stackpos] = entry; - if (entry->left) - { - accum->stackpos++; - entry = entry->left; - } - else - break; - } - } - else - { - /* we already return all left subtree, itself and right subtree */ - if (accum->stackpos == 0) - return 0; - accum->stackpos--; - return walkTree(accum); - } - - return entry; -} - ItemPointerData * ginGetEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *value, uint32 *n) { EntryAccumulator *entry; ItemPointerData *list; - if (accum->stack == NULL) - { - /* first call */ - accum->stack = palloc0(sizeof(EntryAccumulator *) * (accum->maxdepth + 1)); - accum->allocatedMemory += GetMemoryChunkSpace(accum->stack); - entry = accum->entries; - - if (entry == NULL) - return NULL; - - /* find most-left value */ - for (;;) - { - accum->stack[accum->stackpos] = entry; - if (entry->left) - { - accum->stackpos++; - entry = entry->left; - } - else - break; - } - } - else - { - accum->allocatedMemory -= GetMemoryChunkSpace(accum->stack[accum->stackpos]->list); - pfree(accum->stack[accum->stackpos]->list); - accum->stack[accum->stackpos]->list = NULL; - entry = walkTree(accum); - } + if (accum->iterator == NULL) + accum->iterator = rb_begin_iterate(accum->tree, LeftRightWalk); + + entry = rb_iterate(accum->iterator); if (entry == NULL) return NULL; diff --git a/src/backend/access/gin/ginfast.c b/src/backend/access/gin/ginfast.c index fb22bd035e..f8e0b5ad40 100644 --- a/src/backend/access/gin/ginfast.c +++ b/src/backend/access/gin/ginfast.c @@ -11,7 +11,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginfast.c,v 1.6 2010/01/02 16:57:33 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginfast.c,v 1.7 2010/02/11 14:29:50 teodor Exp $ * *------------------------------------------------------------------------- */ @@ -765,8 +765,7 @@ ginInsertCleanup(Relation index, GinState *ginstate, */ if (GinPageGetOpaque(page)->rightlink == InvalidBlockNumber || (GinPageHasFullRow(page) && - (accum.allocatedMemory >= maintenance_work_mem * 1024L || - accum.maxdepth > GIN_MAX_TREE_DEPTH))) + (accum.allocatedMemory >= maintenance_work_mem * 1024L))) { ItemPointerData *list; uint32 nlist; diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index 54ec6ead0d..e2a5e8b013 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.25 2010/01/02 16:57:33 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.26 2010/02/11 14:29:50 teodor Exp $ *------------------------------------------------------------------------- */ @@ -247,9 +247,7 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values, &htup->t_self); /* If we've maxed out our available memory, dump everything to the index */ - /* Also dump if the tree seems to be getting too unbalanced */ - if (buildstate->accum.allocatedMemory >= maintenance_work_mem * 1024L || - buildstate->accum.maxdepth > GIN_MAX_TREE_DEPTH) + if (buildstate->accum.allocatedMemory >= maintenance_work_mem * 1024L) { ItemPointerData *list; Datum entry; diff --git a/src/backend/utils/misc/Makefile b/src/backend/utils/misc/Makefile index d94530dca1..596d02d61f 100644 --- a/src/backend/utils/misc/Makefile +++ b/src/backend/utils/misc/Makefile @@ -4,7 +4,7 @@ # Makefile for utils/misc # # IDENTIFICATION -# $PostgreSQL: pgsql/src/backend/utils/misc/Makefile,v 1.29 2009/08/28 20:26:19 petere Exp $ +# $PostgreSQL: pgsql/src/backend/utils/misc/Makefile,v 1.30 2010/02/11 14:29:50 teodor Exp $ # #------------------------------------------------------------------------- @@ -14,7 +14,8 @@ include $(top_builddir)/src/Makefile.global override CPPFLAGS := -I. -I$(srcdir) $(CPPFLAGS) -OBJS = guc.o help_config.o pg_rusage.o ps_status.o superuser.o tzparser.o +OBJS = guc.o help_config.o pg_rusage.o ps_status.o superuser.o tzparser.o \ + rbtree.o # This location might depend on the installation directories. Therefore # we can't subsitute it into pg_config.h. diff --git a/src/backend/utils/misc/rbtree.c b/src/backend/utils/misc/rbtree.c new file mode 100644 index 0000000000..d7b129fcb2 --- /dev/null +++ b/src/backend/utils/misc/rbtree.c @@ -0,0 +1,790 @@ +/*------------------------------------------------------------------------- + * + * rbtree.c + * implementation for PostgreSQL generic Red-Black binary tree package + * Adopted from http://algolist.manual.ru/ds/rbtree.php + * + * This code comes from Thomas Niemann's "Sorting and Searching Algorithms: + * a Cookbook". + * + * See http://www.cs.auckland.ac.nz/software/AlgAnim/niemann/s_man.htm for + * license terms: "Source code, when part of a software project, may be used + * freely without reference to the author." + * + * Red-black trees are a type of balanced binary tree wherein (1) any child of + * a red node is always black, and (2) every path from root to leaf traverses + * an equal number of black nodes. From these properties, it follows that the + * longest path from root to leaf is only about twice as long as the shortest, + * so lookups are guaranteed to run in O(lg n) time. + * + * Copyright (c) 1996-2009, PostgreSQL Global Development Group + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/backend/utils/misc/rbtree.c,v 1.1 2010/02/11 14:29:50 teodor Exp $ + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "utils/rbtree.h" + +/********************************************************************** + * Declarations * + **********************************************************************/ + +/* + * Values for RBNode->iteratorState + */ +#define InitialState (0) +#define FirstStepDone (1) +#define SecondStepDone (2) +#define ThirdStepDone (3) + +/* + * Colors of node + */ +#define RBBLACK (0) +#define RBRED (1) + +typedef struct RBNode +{ + uint32 iteratorState:2, + color: 1 , + unused: 29; + struct RBNode *left; + struct RBNode *right; + struct RBNode *parent; + void *data; +} RBNode; + +struct RBTree +{ + RBNode *root; + rb_comparator comparator; + rb_appendator appendator; + rb_freefunc freefunc; + void *arg; +}; + +struct RBTreeIterator +{ + RBNode *node; + void *(*iterate) (RBTreeIterator *iterator); +}; + +/* + * all leafs are sentinels, use customized NIL name to prevent + * collision with sytem-wide NIL which is actually NULL + */ +#define RBNIL &sentinel + +RBNode sentinel = {InitialState, RBBLACK, 0, RBNIL, RBNIL, NULL, NULL}; + +/********************************************************************** + * Create * + **********************************************************************/ + +RBTree * +rb_create(rb_comparator comparator, rb_appendator appendator, + rb_freefunc freefunc, void *arg) +{ + RBTree *tree = palloc(sizeof(RBTree)); + + tree->root = RBNIL; + tree->comparator = comparator; + tree->appendator = appendator; + tree->freefunc = freefunc; + tree->arg = arg; + + return tree; +} + +/********************************************************************** + * Search * + **********************************************************************/ + +void * +rb_find(RBTree *rb, void *data) +{ + RBNode *node = rb->root; + int cmp; + + while (node != RBNIL) + { + cmp = rb->comparator(data, node->data, rb->arg); + + if (cmp == 0) + return node->data; + else if (cmp < 0) + node = node->left; + else + node = node->right; + } + + return NULL; +} + +/********************************************************************** + * Insertion * + **********************************************************************/ + +/* + * Rotate node x to left. + * + * x's right child takes its place in the tree, and x becomes the left + * child of that node. + */ +static void +rb_rotate_left(RBTree *rb, RBNode *x) +{ + RBNode *y = x->right; + + /* establish x->right link */ + x->right = y->left; + if (y->left != RBNIL) + y->left->parent = x; + + /* establish y->parent link */ + if (y != RBNIL) + y->parent = x->parent; + if (x->parent) + { + if (x == x->parent->left) + x->parent->left = y; + else + x->parent->right = y; + } + else + { + rb->root = y; + } + + /* link x and y */ + y->left = x; + if (x != RBNIL) + x->parent = y; +} + +/* + * Rotate node x to right. + * + * x's left right child takes its place in the tree, and x becomes the right + * child of that node. + */ +static void +rb_rotate_right(RBTree *rb, RBNode *x) +{ + RBNode *y = x->left; + + /* establish x->left link */ + x->left = y->right; + if (y->right != RBNIL) + y->right->parent = x; + + /* establish y->parent link */ + if (y != RBNIL) + y->parent = x->parent; + if (x->parent) + { + if (x == x->parent->right) + x->parent->right = y; + else + x->parent->left = y; + } + else + { + rb->root = y; + } + + /* link x and y */ + y->right = x; + if (x != RBNIL) + x->parent = y; +} + +/* + * Maintain Red-Black tree balance after inserting node x. + * + * The newly inserted node is always initially marked red. That may lead to + * a situation where a red node has a red child, which is prohibited. We can + * always fix the problem by a series of color changes and/or "rotations", + * which move the problem progressively higher up in the tree. If one of the + * two red nodes is the root, we can always fix the problem by changing the + * root from red to black. + * + * (This does not work lower down in the tree because we must also maintain + * the invariant that every leaf has equal black-height.) + */ +static void +rb_insert_fixup(RBTree *rb, RBNode *x) +{ + /* + * x is always a red node. Initially, it is the newly inserted node. + * Each iteration of this loop moves it higher up in the tree. + */ + while (x != rb->root && x->parent->color == RBRED) + { + /* + * x and x->parent are both red. Fix depends on whether x->parent is + * a left or right child. In either case, we define y to be the + * "uncle" of x, that is, the other child of x's grandparent. + * + * If the uncle is red, we flip the grandparent to red and its two + * children to black. Then we loop around again to check whether the + * grandparent still has a problem. + * + * If the uncle is black, we will perform one or two "rotations" to + * balance the tree. Either x or x->parent will take the grandparent's + * position in the tree and recolored black, and the original + * grandparent will be recolored red and become a child of that node. + * This always leaves us with a valid red-black tree, so the loop + * will terminate. + */ + if (x->parent == x->parent->parent->left) + { + RBNode *y = x->parent->parent->right; + + if (y->color == RBRED) + { + /* uncle is RBRED */ + x->parent->color = RBBLACK; + y->color = RBBLACK; + x->parent->parent->color = RBRED; + x = x->parent->parent; + } + else + { + /* uncle is RBBLACK */ + if (x == x->parent->right) + { + /* make x a left child */ + x = x->parent; + rb_rotate_left(rb, x); + } + + /* recolor and rotate */ + x->parent->color = RBBLACK; + x->parent->parent->color = RBRED; + rb_rotate_right(rb, x->parent->parent); + } + } + else + { + /* mirror image of above code */ + RBNode *y = x->parent->parent->left; + + if (y->color == RBRED) + { + /* uncle is RBRED */ + x->parent->color = RBBLACK; + y->color = RBBLACK; + x->parent->parent->color = RBRED; + x = x->parent->parent; + } + else + { + /* uncle is RBBLACK */ + if (x == x->parent->left) + { + x = x->parent; + rb_rotate_right(rb, x); + } + x->parent->color = RBBLACK; + x->parent->parent->color = RBRED; + rb_rotate_left(rb, x->parent->parent); + } + } + } + + /* + * The root may already have been black; if not, the black-height of every + * node in the tree increases by one. + */ + rb->root->color = RBBLACK; +} + +/* + * Allocate node for data and insert in tree. + * + * Return old data (or result of appendator method) if it exists and NULL + * otherwise. + */ +void * +rb_insert(RBTree *rb, void *data) +{ + RBNode *current, + *parent, + *x; + int cmp; + + /* find where node belongs */ + current = rb->root; + parent = NULL; + while (current != RBNIL) + { + cmp = rb->comparator(data, current->data, rb->arg); + if (cmp == 0) + { + /* + * Found node with given key. If appendator method is provided, + * call it to join old and new data; else, new data replaces old + * data. + */ + if (rb->appendator) + { + current->data = rb->appendator(current->data, data, rb->arg); + return current->data; + } + else + { + void *old = current->data; + + current->data = data; + return old; + } + } + parent = current; + current = (cmp < 0) ? current->left : current->right; + } + + /* setup new node in tree */ + x = palloc(sizeof(RBNode)); + x->data = data; + x->parent = parent; + x->left = RBNIL; + x->right = RBNIL; + x->color = RBRED; + x->iteratorState = InitialState; + + /* insert node in tree */ + if (parent) + { + if (cmp < 0) + parent->left = x; + else + parent->right = x; + } + else + { + rb->root = x; + } + + rb_insert_fixup(rb, x); + return NULL; +} + +/********************************************************************** + * Deletion * + **********************************************************************/ + +/* + * Maintain Red-Black tree balance after deleting a black node. + */ +static void +rb_delete_fixup(RBTree *rb, RBNode *x) +{ + /* + * x is always a black node. Initially, it is the former child of the + * deleted node. Each iteration of this loop moves it higher up in the + * tree. + */ + while (x != rb->root && x->color == RBBLACK) + { + /* + * Left and right cases are symmetric. Any nodes that are children + * of x have a black-height one less than the remainder of the nodes + * in the tree. We rotate and recolor nodes to move the problem up + * the tree: at some stage we'll either fix the problem, or reach the + * root (where the black-height is allowed to decrease). + */ + if (x == x->parent->left) + { + RBNode *w = x->parent->right; + + if (w->color == RBRED) + { + w->color = RBBLACK; + x->parent->color = RBRED; + rb_rotate_left(rb, x->parent); + w = x->parent->right; + } + + if (w->left->color == RBBLACK && w->right->color == RBBLACK) + { + w->color = RBRED; + x = x->parent; + } + else + { + if (w->right->color == RBBLACK) + { + w->left->color = RBBLACK; + w->color = RBRED; + rb_rotate_right(rb, w); + w = x->parent->right; + } + w->color = x->parent->color; + x->parent->color = RBBLACK; + w->right->color = RBBLACK; + rb_rotate_left(rb, x->parent); + x = rb->root; /* Arrange for loop to terminate. */ + } + } + else + { + RBNode *w = x->parent->left; + + if (w->color == RBRED) + { + w->color = RBBLACK; + x->parent->color = RBRED; + rb_rotate_right(rb, x->parent); + w = x->parent->left; + } + + if (w->right->color == RBBLACK && w->left->color == RBBLACK) + { + w->color = RBRED; + x = x->parent; + } + else + { + if (w->left->color == RBBLACK) + { + w->right->color = RBBLACK; + w->color = RBRED; + rb_rotate_left(rb, w); + w = x->parent->left; + } + w->color = x->parent->color; + x->parent->color = RBBLACK; + w->left->color = RBBLACK; + rb_rotate_right(rb, x->parent); + x = rb->root; /* Arrange for loop to terminate. */ + } + } + } + x->color = RBBLACK; +} + +/* + * Delete node z from tree. + */ +static void +rb_delete_node(RBTree *rb, RBNode *z) +{ + RBNode *x, + *y; + + if (!z || z == RBNIL) + return; + + /* + * y is the node that will actually be removed from the tree. This will + * be z if z has fewer than two children, or the tree successor of z + * otherwise. + */ + if (z->left == RBNIL || z->right == RBNIL) + { + /* y has a RBNIL node as a child */ + y = z; + } + else + { + /* find tree successor */ + y = z->right; + while (y->left != RBNIL) + y = y->left; + } + + /* x is y's only child */ + if (y->left != RBNIL) + x = y->left; + else + x = y->right; + + /* Remove y from the tree. */ + x->parent = y->parent; + if (y->parent) + { + if (y == y->parent->left) + y->parent->left = x; + else + y->parent->right = x; + } + else + { + rb->root = x; + } + + /* + * If we removed the tree successor of z rather than z itself, then + * attach the data for the removed node to the one we were supposed to + * remove. + */ + if (y != z) + z->data = y->data; + + /* + * Removing a black node might make some paths from root to leaf contain + * fewer black nodes than others, or it might make two red nodes adjacent. + */ + if (y->color == RBBLACK) + rb_delete_fixup(rb, x); + + pfree(y); +} + +extern void +rb_delete(RBTree *rb, void *data) +{ + RBNode *node = rb->root; + int cmp; + + while (node != RBNIL) + { + cmp = rb->comparator(data, node->data, rb->arg); + + if (cmp == 0) + { + /* found node to delete */ + if (rb->freefunc) + rb->freefunc(node->data); + node->data = NULL; + rb_delete_node(rb, node); + return; + } + else if (cmp < 0) + node = node->left; + else + node = node->right; + } +} + +/* + * Return data on left most node and delete + * that node + */ +extern void * +rb_leftmost(RBTree *rb) +{ + RBNode *node = rb->root; + RBNode *leftmost = rb->root; + void *res = NULL; + + while (node != RBNIL) + { + leftmost = node; + node = node->left; + } + + if (leftmost != RBNIL) + { + res = leftmost->data; + leftmost->data = NULL; + rb_delete_node(rb, leftmost); + } + + return res; +} + +/********************************************************************** + * Traverse * + **********************************************************************/ + +static void * +rb_next_node(RBTreeIterator *iterator, RBNode *node) +{ + node->iteratorState = InitialState; + iterator->node = node; + return iterator->iterate(iterator); +} + +static void * +rb_left_right_iterator(RBTreeIterator *iterator) +{ + RBNode *node = iterator->node; + + switch (node->iteratorState) + { + case InitialState: + if (node->left != RBNIL) + { + node->iteratorState = FirstStepDone; + return rb_next_node(iterator, node->left); + } + case FirstStepDone: + node->iteratorState = SecondStepDone; + return node->data; + case SecondStepDone: + if (node->right != RBNIL) + { + node->iteratorState = ThirdStepDone; + return rb_next_node(iterator, node->right); + } + case ThirdStepDone: + if (node->parent) + { + iterator->node = node->parent; + return iterator->iterate(iterator); + } + break; + default: + elog(ERROR, "Unknow node state: %d", node->iteratorState); + } + + return NULL; +} + +static void * +rb_right_left_iterator(RBTreeIterator *iterator) +{ + RBNode *node = iterator->node; + + switch (node->iteratorState) + { + case InitialState: + if (node->right != RBNIL) + { + node->iteratorState = FirstStepDone; + return rb_next_node(iterator, node->right); + } + case FirstStepDone: + node->iteratorState = SecondStepDone; + return node->data; + case SecondStepDone: + if (node->left != RBNIL) + { + node->iteratorState = ThirdStepDone; + return rb_next_node(iterator, node->left); + } + case ThirdStepDone: + if (node->parent) + { + iterator->node = node->parent; + return iterator->iterate(iterator); + } + break; + default: + elog(ERROR, "Unknow node state: %d", node->iteratorState); + } + + return NULL; +} + +static void * +rb_direct_iterator(RBTreeIterator *iterator) +{ + RBNode *node = iterator->node; + + switch (node->iteratorState) + { + case InitialState: + node->iteratorState = FirstStepDone; + return node->data; + case FirstStepDone: + if (node->left != RBNIL) + { + node->iteratorState = SecondStepDone; + return rb_next_node(iterator, node->left); + } + case SecondStepDone: + if (node->right != RBNIL) + { + node->iteratorState = ThirdStepDone; + return rb_next_node(iterator, node->right); + } + case ThirdStepDone: + if (node->parent) + { + iterator->node = node->parent; + return iterator->iterate(iterator); + } + break; + default: + elog(ERROR, "Unknow node state: %d", node->iteratorState); + } + + return NULL; +} + +static void * +rb_inverted_iterator(RBTreeIterator *iterator) +{ + RBNode *node = iterator->node; + + switch (node->iteratorState) + { + case InitialState: + if (node->left != RBNIL) + { + node->iteratorState = FirstStepDone; + return rb_next_node(iterator, node->left); + } + case FirstStepDone: + if (node->right != RBNIL) + { + node->iteratorState = SecondStepDone; + return rb_next_node(iterator, node->right); + } + case SecondStepDone: + node->iteratorState = ThirdStepDone; + return node->data; + case ThirdStepDone: + if (node->parent) + { + iterator->node = node->parent; + return iterator->iterate(iterator); + } + break; + default: + elog(ERROR, "Unknow node state: %d", node->iteratorState); + } + + return NULL; +} + +RBTreeIterator * +rb_begin_iterate(RBTree *rb, RBOrderControl ctrl) +{ + RBTreeIterator *iterator = palloc(sizeof(RBTreeIterator)); + + iterator->node = rb->root; + if (iterator->node != RBNIL) + iterator->node->iteratorState = InitialState; + + switch (ctrl) + { + case LeftRightWalk: /* visit left, then self, then right */ + iterator->iterate = rb_left_right_iterator; + break; + case RightLeftWalk: /* visit right, then self, then left */ + iterator->iterate = rb_right_left_iterator; + break; + case DirectWalk: /* visit self, then left, then right */ + iterator->iterate = rb_direct_iterator; + break; + case InvertedWalk: /* visit left, then right, then self */ + iterator->iterate = rb_inverted_iterator; + break; + default: + elog(ERROR, "Unknown iterator order: %d", ctrl); + } + + return iterator; +} + +void * +rb_iterate(RBTreeIterator *iterator) +{ + if (iterator->node == RBNIL) + return NULL; + + return iterator->iterate(iterator); +} + +void +rb_free_iterator(RBTreeIterator *iterator) +{ + pfree(iterator); +} diff --git a/src/include/access/gin.h b/src/include/access/gin.h index 0ea25dc403..4896561360 100644 --- a/src/include/access/gin.h +++ b/src/include/access/gin.h @@ -4,7 +4,7 @@ * * Copyright (c) 2006-2010, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.36 2010/01/02 16:58:00 momjian Exp $ + * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.37 2010/02/11 14:29:50 teodor Exp $ *-------------------------------------------------------------------------- */ #ifndef GIN_H @@ -13,6 +13,7 @@ #include "access/genam.h" #include "access/itup.h" #include "access/xlog.h" +#include "utils/rbtree.h" #include "fmgr.h" @@ -26,14 +27,6 @@ #define GIN_COMPARE_PARTIAL_PROC 5 #define GINNProcs 5 -/* - * Max depth allowed in search tree during bulk inserts. This is to keep from - * degenerating to O(N^2) behavior when the tree is unbalanced due to sorted - * or nearly-sorted input. (Perhaps it would be better to use a balanced-tree - * algorithm, but in common cases that would only add useless overhead.) - */ -#define GIN_MAX_TREE_DEPTH 100 - /* * Page opaque data in a inverted index page. * @@ -570,27 +563,23 @@ extern Datum ginarrayconsistent(PG_FUNCTION_ARGS); /* ginbulk.c */ typedef struct EntryAccumulator { - OffsetNumber attnum; Datum value; uint32 length; uint32 number; - ItemPointerData *list; + OffsetNumber attnum; bool shouldSort; - struct EntryAccumulator *left; - struct EntryAccumulator *right; + ItemPointerData *list; } EntryAccumulator; typedef struct { GinState *ginstate; - EntryAccumulator *entries; - uint32 maxdepth; - EntryAccumulator **stack; - uint32 stackpos; long allocatedMemory; - uint32 length; - EntryAccumulator *entryallocator; + EntryAccumulator *entryallocator; + ItemPointerData *tmpList; + RBTree *tree; + RBTreeIterator *iterator; } BuildAccumulator; extern void ginInitBA(BuildAccumulator *accum); diff --git a/src/include/utils/rbtree.h b/src/include/utils/rbtree.h new file mode 100644 index 0000000000..535a23780b --- /dev/null +++ b/src/include/utils/rbtree.h @@ -0,0 +1,46 @@ +/*------------------------------------------------------------------------- + * + * rbtree.h + * interface for PostgreSQL generic Red-Black binary tree package + * + * Copyright (c) 1996-2009, PostgreSQL Global Development Group + * + * IDENTIFICATION + * $PostgreSQL: pgsql/src/include/utils/rbtree.h,v 1.1 2010/02/11 14:29:50 teodor Exp $ + * + *------------------------------------------------------------------------- + */ + +#ifndef RBTREE_H +#define RBTREE_H + +typedef struct RBTree RBTree; +typedef struct RBTreeIterator RBTreeIterator; + +typedef int (*rb_comparator) (const void *a, const void *b, void *arg); +typedef void* (*rb_appendator) (void *current, void *new, void *arg); +typedef void (*rb_freefunc) (void *a); + +extern RBTree *rb_create(rb_comparator comparator, + rb_appendator appendator, + rb_freefunc freefunc, + void *arg); + +extern void *rb_find(RBTree *rb, void *data); +extern void *rb_insert(RBTree *rb, void *data); +extern void rb_delete(RBTree *rb, void *data); +extern void *rb_leftmost(RBTree *rb); + +typedef enum RBOrderControl +{ + LeftRightWalk, + RightLeftWalk, + DirectWalk, + InvertedWalk +} RBOrderControl; + +extern RBTreeIterator* rb_begin_iterate(RBTree *rb, RBOrderControl ctrl); +extern void *rb_iterate(RBTreeIterator *iterator); +extern void rb_free_iterator(RBTreeIterator *iterator); + +#endif -- 2.40.0