]> granicus.if.org Git - postgresql/commitdiff
GIN improvements
authorTeodor Sigaev <teodor@sigaev.ru>
Tue, 11 Jul 2006 16:55:34 +0000 (16:55 +0000)
committerTeodor Sigaev <teodor@sigaev.ru>
Tue, 11 Jul 2006 16:55:34 +0000 (16:55 +0000)
- Replace sorted array of entries in maintenance_work_mem to binary tree,
  this should improve create performance.
- More precisely calculate allocated memory, eliminate leaks
  with user-defined extractValue()
- Improve wordings in tsearch2

contrib/tsearch2/ts_cfg.c
src/backend/access/gin/ginbulk.c
src/backend/access/gin/gininsert.c
src/include/access/gin.h

index 5a662b7dbb508b0fa097d8c800a1849b4014d7ac..d0e18916682629182a3184106c6474f76c67afa8 100644 (file)
@@ -313,12 +313,12 @@ parsetext_v2(TSCfgInfo * cfg, PRSTEXT * prs, char *buf, int4 buflen)
 #ifdef IGNORE_LONGLEXEME
                        ereport(NOTICE,
                                        (errcode(ERRCODE_SYNTAX_ERROR),
-                                        errmsg("word is too long")));
+                                        errmsg("A word you are indexing is too long. It will be ignored.")));
                        continue;
 #else
                        ereport(ERROR,
                                        (errcode(ERRCODE_SYNTAX_ERROR),
-                                        errmsg("word is too long")));
+                                        errmsg("A word you are indexing is too long")));
 #endif
                }
 
@@ -470,12 +470,12 @@ hlparsetext(TSCfgInfo * cfg, HLPRSTEXT * prs, QUERYTYPE * query, char *buf, int4
 #ifdef IGNORE_LONGLEXEME
                        ereport(NOTICE,
                                        (errcode(ERRCODE_SYNTAX_ERROR),
-                                        errmsg("word is too long")));
+                                        errmsg("A word you are indexing is too long. It will be ignored.")));
                        continue;
 #else
                        ereport(ERROR,
                                        (errcode(ERRCODE_SYNTAX_ERROR),
-                                        errmsg("word is too long")));
+                                        errmsg("A word you are indexing is too long")));
 #endif
                }
 
index 6d73f070ba26c5eec42e4738ad9cd355814feb24..58fa1b34d782df4a1e30b683506a9c7ae2bca2b1 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *          $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.1 2006/05/02 11:28:54 teodor Exp $
+ *          $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.2 2006/07/11 16:55:34 teodor Exp $
  *-------------------------------------------------------------------------
  */
 
 #include "utils/memutils.h"
 #include "access/tuptoaster.h"
 
-#define DEF_NENTRY     128
+#define DEF_NENTRY     2048
 #define DEF_NPTR       4
 
 void
 ginInitBA(BuildAccumulator *accum) {
-       
-               accum->number = 0;
-               accum->curget = 0;
-               accum->length = DEF_NENTRY;
-               accum->entries = (EntryAccumulator*)palloc0( sizeof(EntryAccumulator) * DEF_NENTRY );
-               accum->allocatedMemory = sizeof(EntryAccumulator) * DEF_NENTRY;
+       accum->maxdepth = 1;
+       accum->stackpos = 0;
+       accum->entries = NULL;
+       accum->stack = NULL;
+       accum->allocatedMemory = 0;
+       accum->entryallocator = NULL;
+}
+
+static EntryAccumulator*
+EAAllocate( BuildAccumulator *accum ) {
+       if ( accum->entryallocator == NULL || accum->length>=DEF_NENTRY ) {
+               accum->entryallocator = palloc(sizeof(EntryAccumulator)*DEF_NENTRY);
+               accum->allocatedMemory += sizeof(EntryAccumulator)*DEF_NENTRY;
+               accum->length = 0;
+       }
+
+       accum->length++;
+       return accum->entryallocator + accum->length - 1;
 }
 
 /*
@@ -61,64 +73,133 @@ ginInsertData(BuildAccumulator *accum, EntryAccumulator *entry, ItemPointer heap
        entry->number++;
 }
 
+static Datum
+getDatumCopy(BuildAccumulator *accum, Datum value) {
+       Form_pg_attribute *att = accum->ginstate->tupdesc->attrs;
+       Datum   newvalue;
+       int data_length = 0;
+       void *ptr;
+
+       if ( att[0]->attbyval ) {
+               store_att_byval(&newvalue, value, att[0]->attlen);
+       } else {
+               /* pass-by-reference */
+               if (att[0]->attlen == -1) {
+                       /* varlena */
+                       data_length = VARATT_SIZE(DatumGetPointer(value));
+               } else if (att[0]->attlen == -2) {
+                       /* c-string */
+                       data_length = strlen(DatumGetCString(value)) + 1;
+               } else {
+                       /* fixed-length pass-by-reference */
+                       Assert(att[0]->attlen > 0);
+                       data_length = att[0]->attlen;
+               }
+
+               ptr = palloc( data_length );
+               memcpy(ptr, DatumGetPointer(value), data_length);
+               newvalue = PointerGetDatum(ptr);
+       }
+
+       accum->allocatedMemory+=data_length;
+
+       return newvalue;
+}
+
 /*
  * Find/store one entry from indexed value.
- * It supposes, that entry should be located between low and end of array of
- * entries. Returns position of found/inserted entry
  */
-static uint32
-ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr,  Datum entry, uint32 low) {
-       uint32 high = accum->number, mid;
-       int res;
-
-       while(high>low) { 
-               mid = low + ((high - low) / 2);
-
-               res = compareEntries(accum->ginstate, entry, accum->entries[mid].value);
-
-               if ( res == 0 ) {
-                       ginInsertData( accum, accum->entries+mid, heapptr );
-                       return mid;
-               } else if ( res > 0 )
-                       low = mid + 1;
-               else
-                       high = mid;
+static void
+ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr,  Datum entry) {
+       EntryAccumulator        *ea = accum->entries, *pea = NULL;
+       int res = 0;
+       uint32  depth = 1;
+
+       while( ea ) {
+               res = compareEntries(accum->ginstate, entry, ea->value);
+               if ( res == 0 ) 
+                       break;  /* found */
+               else {
+                       pea = ea;
+                       if ( res < 0 ) 
+                               ea = ea->left;
+                       else
+                               ea = ea->right;
+               }
+               depth++;
        }
        
-       /* did not find an entry, insert */
-       if ( accum->number >= accum->length ) {
-               accum->allocatedMemory += sizeof(EntryAccumulator) * accum->length;
-               accum->length *= 2;
-               accum->entries = (EntryAccumulator*)repalloc( accum->entries, 
-                               sizeof(EntryAccumulator) * accum->length );
-       }
-
-       if ( high != accum->number ) 
-               memmove( accum->entries+high+1, accum->entries+high, sizeof(EntryAccumulator) * (accum->number-high) );
+       if ( depth > accum->maxdepth )
+               accum->maxdepth = depth;
 
-       accum->entries[high].value  = entry;
-       accum->entries[high].length = DEF_NPTR;
-       accum->entries[high].number = 1;
-       accum->entries[high].shouldSort = FALSE;
-       accum->entries[high].list   = (ItemPointerData*)palloc(sizeof(ItemPointerData)*DEF_NPTR);
-       accum->entries[high].list[0] = *heapptr;
+       if ( ea == NULL ) {
+               ea = EAAllocate(accum);
 
-       accum->allocatedMemory += sizeof(ItemPointerData)*DEF_NPTR;
-       accum->number++;
+               ea->left = ea->right = NULL;
+               ea->value  = getDatumCopy(accum, entry);
+               ea->length = DEF_NPTR;
+               ea->number = 1;
+               ea->shouldSort = FALSE;
+               ea->list   = (ItemPointerData*)palloc(sizeof(ItemPointerData)*DEF_NPTR);
+               ea->list[0] = *heapptr;
+               accum->allocatedMemory += sizeof(ItemPointerData)*DEF_NPTR;
 
-       return high;
+               if ( pea == NULL )
+                       accum->entries = ea;
+               else {
+                       Assert( res != 0 );
+                       if ( res < 0 ) 
+                               pea->left = ea;
+                       else
+                               pea->right = ea;
+               }
+       } else
+               ginInsertData( accum, ea, heapptr );
 }
 
+/*
+ * insert middle of left part the middle of right one,
+ * then calls itself for each parts
+ */
+static void
+ginChooseElem(BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry, 
+       uint32 low, uint32 high, uint32 offset) {
+       uint32 pos;
+       uint32 middle = (low+high)>>1;
+
+       pos = (low+middle)>>1; 
+       if ( low!=middle && pos>=offset && pos-offset < nentry ) 
+               ginInsertEntry( accum, heapptr, entries[ pos-offset ]);
+       pos = (high+middle+1)>>1; 
+       if ( middle+1 != high && pos>=offset && pos-offset < nentry ) 
+               ginInsertEntry( accum, heapptr, entries[ pos-offset ]);
+
+       if ( low!=middle )
+               ginChooseElem(accum, heapptr, entries, nentry, low, middle, offset );
+       if ( high!=middle+1 )
+               ginChooseElem(accum, heapptr, entries, nentry, middle+1, high, offset );
+}
 
 /*
- * Insert one heap pointer. Requires entries to be sorted!
+ * Insert one heap pointer. Suppose entries is sorted.
+ * Insertion order trys to get binary tree balanced: first insert middle value,
+ * next middle on left part and middle of right part.
  */
 void
 ginInsertRecordBA( BuildAccumulator *accum, ItemPointer heapptr, Datum *entries, uint32 nentry ) {
-       uint32 start=0,i;
+       uint32 i, nbit=0, offset;
+
+       if (nentry==0)
+               return;
+
+       i=nentry-1;
+       for(;i>0;i>>=1) nbit++;
 
-       for(i=0;i<nentry;i++)
-               start = ginInsertEntry( accum, heapptr, entries[i], start);
+       nbit  = 1<<nbit;
+       offset = (nbit-nentry)/2;
+
+       ginInsertEntry( accum, heapptr, entries[ (nbit>>1)-offset ]);
+       ginChooseElem(accum, heapptr, entries, nentry, 0, nbit, offset);
 }
 
 static int 
@@ -128,28 +209,79 @@ qsortCompareItemPointers( const void *a, const void *b ) {
        return res;
 }
 
+/*
+ * walk on binary tree and returns ordered nodes 
+ */ 
+static EntryAccumulator*
+walkTree( BuildAccumulator *accum ) {
+       EntryAccumulator    *entry = accum->stack[ accum->stackpos ];
+
+       if ( entry->list != NULL ) {
+               /* return entry itself: we already was at left sublink */
+               return entry;
+       } else if ( entry->right && entry->right != accum->stack[ accum->stackpos+1 ] ) {
+               /* go on right sublink */
+               accum->stackpos++;
+               entry = entry->right;
+
+               /* find most-left value */
+               for(;;) {
+                       accum->stack[ accum->stackpos ] = entry;
+                       if ( entry->left ) {
+                               accum->stackpos++;
+                               entry = entry->left;
+                       } else 
+                               break;
+               }
+       } else {
+               /* we already return all left subtree, itself and  right subtree */
+               if ( accum->stackpos == 0 )
+                       return 0;
+               accum->stackpos--;
+               return walkTree(accum);
+       }
+
+       return entry;
+}
+
 ItemPointerData*
 ginGetEntry(BuildAccumulator *accum, Datum *value, uint32 *n) {
        EntryAccumulator        *entry;
-
        ItemPointerData *list;
-       if ( accum->curget >= accum->number )
+
+
+       if ( accum->stack == NULL ) {
+               /* first call */
+               accum->stack = palloc0(sizeof(EntryAccumulator*)*(accum->maxdepth+1));
+               entry = accum->entries;
+
+               /* find most-left value */
+               for(;;) {
+                       accum->stack[ accum->stackpos ] = entry;
+                       if ( entry->left ) { 
+                               accum->stackpos++;
+                               entry = entry->left;
+                       } else
+                               break;
+               }
+       } else {
+               pfree( accum->stack[ accum->stackpos ]->list );
+               accum->stack[ accum->stackpos ]->list = NULL;
+               entry = walkTree( accum );
+       }
+
+       if ( entry == NULL )
                return NULL;
-       else if ( accum->curget > 0 )
-               pfree( accum->entries[ accum->curget-1 ].list );
 
-       entry = accum->entries + accum->curget;
        *n              = entry->number;
        *value  = entry->value;
        list    = entry->list;
-       accum->curget++;
+
+       Assert(list != NULL);
 
        if ( entry->shouldSort && entry->number > 1 )
                qsort(list, *n, sizeof(ItemPointerData), qsortCompareItemPointers);
 
-
        return list;
 }
 
-
-
index a4416a94cb3348d970ef10ed90f4c1a28b86d481..b67bf6e8218155570e31d2fa120329ed1659fdce 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *          $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.2 2006/05/10 23:18:38 tgl Exp $
+ *          $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.3 2006/07/11 16:55:34 teodor Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -26,6 +26,7 @@ typedef struct {
        GinState                        ginstate;
        double                          indtuples;
        MemoryContext           tmpCtx;
+       MemoryContext           funcCtx;
        BuildAccumulator        accum;
 } GinBuildState;
 
@@ -189,19 +190,22 @@ ginEntryInsert( Relation index, GinState *ginstate, Datum value, ItemPointerData
  * Function isnt use during normal insert
  */
 static uint32
-ginHeapTupleBulkInsert(BuildAccumulator *accum, Datum value, ItemPointer heapptr) {
+ginHeapTupleBulkInsert(GinBuildState *buildstate, Datum value, ItemPointer heapptr) {
        Datum   *entries;
        uint32 nentries;
+       MemoryContext oldCtx;
 
-       entries = extractEntriesSU( accum->ginstate, value, &nentries);
+       oldCtx = MemoryContextSwitchTo(buildstate->funcCtx);
+       entries = extractEntriesSU( buildstate->accum.ginstate, value, &nentries);
+       MemoryContextSwitchTo(oldCtx);
 
        if ( nentries==0 )
                /* nothing to insert */
                return 0;
 
-       ginInsertRecordBA( accum, heapptr, entries, nentries);
+       ginInsertRecordBA( &buildstate->accum, heapptr, entries, nentries);
 
-       pfree( entries );
+       MemoryContextReset(buildstate->funcCtx);
 
        return nentries;
 }
@@ -218,7 +222,7 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
 
        oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
 
-       buildstate->indtuples += ginHeapTupleBulkInsert(&buildstate->accum, *values, &htup->t_self);
+       buildstate->indtuples += ginHeapTupleBulkInsert(buildstate, *values, &htup->t_self);
 
        /* we use only half maintenance_work_mem, because there is some leaks 
                during insertion and extract values */ 
@@ -297,6 +301,12 @@ ginbuild(PG_FUNCTION_ARGS) {
                                                                                                ALLOCSET_DEFAULT_INITSIZE,
                                                                                                ALLOCSET_DEFAULT_MAXSIZE);
 
+       buildstate.funcCtx = AllocSetContextCreate(buildstate.tmpCtx,
+                                                                                               "Gin build temporary context for user-defined function",
+                                                                                               ALLOCSET_DEFAULT_MINSIZE,
+                                                                                               ALLOCSET_DEFAULT_INITSIZE,
+                                                                                               ALLOCSET_DEFAULT_MAXSIZE);
+
        buildstate.accum.ginstate = &buildstate.ginstate;
        ginInitBA( &buildstate.accum );
 
index c13d726aa917979007ab66e9a9dc900202f84407..ba7d15cf1fc729db4596f17bc7a23705a8ecc824 100644 (file)
@@ -3,7 +3,7 @@
  *    header file for postgres inverted index access method implementation.
  *
  *  Copyright (c) 2006, PostgreSQL Global Development Group
- *  $PostgreSQL: pgsql/src/include/access/gin.h,v 1.4 2006/07/11 13:54:24 momjian Exp $
+ *  $PostgreSQL: pgsql/src/include/access/gin.h,v 1.5 2006/07/11 16:55:34 teodor Exp $
  *--------------------------------------------------------------------------
  */
 
@@ -414,21 +414,26 @@ extern Datum arraycontains(PG_FUNCTION_ARGS);
 extern Datum arraycontained(PG_FUNCTION_ARGS);
 
 /* ginbulk.c */
-typedef struct {
+typedef struct EntryAccumulator {
     Datum       value;
        uint32      length;
        uint32      number;
        ItemPointerData *list;
        bool            shouldSort;
+       struct EntryAccumulator *left;
+       struct EntryAccumulator *right;
 } EntryAccumulator;
 
 typedef struct {
        GinState        *ginstate;
     EntryAccumulator    *entries;
-       uint32      length;
-       uint32      number;
-       uint32      curget;
+       uint32          maxdepth;
+       EntryAccumulator        **stack;
+       uint32                          stackpos;
        uint32      allocatedMemory;
+
+       uint32                          length;
+       EntryAccumulator        *entryallocator;
 } BuildAccumulator;
 
 extern void ginInitBA(BuildAccumulator *accum);