]> granicus.if.org Git - postgresql/blobdiff - contrib/tsearch2/rank.c
Reduce WAL activity for page splits:
[postgresql] / contrib / tsearch2 / rank.c
index 0840eb83470dd639cc303a842f1197999b0566e4..36fc2594009795b798b5aef671c8afa75fd7cdbd 100644 (file)
@@ -3,20 +3,20 @@
  * Teodor Sigaev <teodor@sigaev.ru>
  */
 #include "postgres.h"
+
 #include <math.h>
 
 #include "access/gist.h"
 #include "access/itup.h"
-#include "utils/builtins.h"
+#include "catalog/namespace.h"
+#include "commands/trigger.h"
+#include "executor/spi.h"
 #include "fmgr.h"
 #include "funcapi.h"
-#include "storage/bufpage.h"
-#include "executor/spi.h"
-#include "commands/trigger.h"
 #include "nodes/pg_list.h"
-#include "catalog/namespace.h"
-
+#include "storage/bufpage.h"
 #include "utils/array.h"
+#include "utils/builtins.h"
 
 #include "tsvector.h"
 #include "query.h"
@@ -37,29 +37,44 @@ Datum               rank_cd_def(PG_FUNCTION_ARGS);
 PG_FUNCTION_INFO_V1(get_covers);
 Datum          get_covers(PG_FUNCTION_ARGS);
 
-static float weights[]={0.1, 0.2, 0.4, 1.0};
+static float weights[] = {0.1f, 0.2f, 0.4f, 1.0f};
+
+#define wpos(wep)      ( w[ WEP_GETWEIGHT(wep) ] )
 
-#define wpos(wep)      ( w[ ((WordEntryPos*)(wep))->weight ] )
+#define RANK_NO_NORM           0x00
+#define RANK_NORM_LOGLENGTH            0x01
+#define RANK_NORM_LENGTH       0x02
+#define RANK_NORM_EXTDIST      0x04
+#define RANK_NORM_UNIQ         0x08
+#define RANK_NORM_LOGUNIQ      0x10
+#define DEF_NORM_METHOD                RANK_NO_NORM
 
-#define DEF_NORM_METHOD        0
+static float calc_rank_or(float *w, tsvector * t, QUERYTYPE * q);
+static float calc_rank_and(float *w, tsvector * t, QUERYTYPE * q);
 
 /*
  * Returns a weight of a word collocation
  */
-static float4 word_distance ( int4 w ) {
-       if ( w>100 )
-       return 1e-30;
+static float4
+word_distance(int4 w)
+{
+       if (w > 100)
+               return (float4)1e-30;
 
-       return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
+       return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
 }
 
 static int
-cnt_length( tsvector *t ) {
-       WordEntry       *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
-       int len = 0, clen;
-
-       while(ptr < end) {
-               if ( (clen=POSDATALEN(t, ptr)) == 0 )
+cnt_length(tsvector * t)
+{
+       WordEntry  *ptr = ARRPTR(t),
+                          *end = (WordEntry *) STRPTR(t);
+       int                     len = 0,
+                               clen;
+
+       while (ptr < end)
+       {
+               if ((clen = POSDATALEN(t, ptr)) == 0)
                        len += 1;
                else
                        len += clen;
@@ -70,191 +85,315 @@ cnt_length( tsvector *t ) {
 }
 
 static int4
-WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
-        if (ptr->len == item->length)
-                return strncmp(
-                                           eval + ptr->pos,
-                                           qval + item->distance,
-                                           item->length);
-
-        return (ptr->len > item->length) ? 1 : -1;
+WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item)
+{
+       if (ptr->len == item->length)
+               return strncmp(
+                                          eval + ptr->pos,
+                                          qval + item->distance,
+                                          item->length);
+
+       return (ptr->len > item->length) ? 1 : -1;
 }
 
-static WordEntry*
-find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
-        WordEntry  *StopLow = ARRPTR(t);
-        WordEntry  *StopHigh = (WordEntry*)STRPTR(t);
-        WordEntry  *StopMiddle;
-        int                     difference;
-
-        /* Loop invariant: StopLow <= item < StopHigh */
-
-        while (StopLow < StopHigh)
-        {
-                StopMiddle = StopLow + (StopHigh - StopLow) / 2;
-                difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
-                if (difference == 0)
-                        return StopMiddle;
-                else if (difference < 0)
-                        StopLow = StopMiddle + 1;
-                else
-                        StopHigh = StopMiddle;
-        }
-
-        return NULL;
+static WordEntry *
+find_wordentry(tsvector * t, QUERYTYPE * q, ITEM * item)
+{
+       WordEntry  *StopLow = ARRPTR(t);
+       WordEntry  *StopHigh = (WordEntry *) STRPTR(t);
+       WordEntry  *StopMiddle;
+       int                     difference;
+
+       /* Loop invariant: StopLow <= item < StopHigh */
+
+       while (StopLow < StopHigh)
+       {
+               StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+               difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
+               if (difference == 0)
+                       return StopMiddle;
+               else if (difference < 0)
+                       StopLow = StopMiddle + 1;
+               else
+                       StopHigh = StopMiddle;
+       }
+
+       return NULL;
+}
+
+
+static int
+compareITEM(const void *a, const void *b, void *arg)
+{
+       char *operand = (char *) arg;
+
+       if ((*(ITEM **) a)->length == (*(ITEM **) b)->length)
+               return strncmp(operand + (*(ITEM **) a)->distance,
+                                          operand + (*(ITEM **) b)->distance,
+                                          (*(ITEM **) b)->length);
+
+       return ((*(ITEM **) a)->length > (*(ITEM **) b)->length) ? 1 : -1;
+}
+
+static ITEM **
+SortAndUniqItems(char *operand, ITEM * item, int *size)
+{
+       ITEM      **res,
+                         **ptr,
+                         **prevptr;
+
+       ptr = res = (ITEM **) palloc(sizeof(ITEM *) * *size);
+
+       while ((*size)--)
+       {
+               if (item->type == VAL)
+               {
+                       *ptr = item;
+                       ptr++;
+               }
+               item++;
+       }
+
+       *size = ptr - res;
+       if (*size < 2)
+               return res;
+
+       qsort_arg(res, *size, sizeof(ITEM **), compareITEM, (void *) operand);
+
+       ptr = res + 1;
+       prevptr = res;
+
+       while (ptr - res < *size)
+       {
+               if (compareITEM((void *) ptr, (void *) prevptr, (void *) operand) != 0)
+               {
+                       prevptr++;
+                       *prevptr = *ptr;
+               }
+               ptr++;
+       }
+
+       *size = prevptr + 1 - res;
+       return res;
 }
 
-static WordEntryPos    POSNULL[]={
-       {0,0},
-       {0,MAXENTRYPOS-1}
+static WordEntryPos POSNULL[] = {
+       0,
+       0
 };
 
 static float
-calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
-       uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
-       int i,k,l,p;
-       WordEntry *entry;
-       WordEntryPos    *post,*ct;
-       int4    dimt,lenct,dist;
-       float res=-1.0;
-       ITEM    *item=GETQUERY(q);
-
-       memset(pos,0,sizeof(uint16**) * q->size);
-       *(uint16*)POSNULL = lengthof(POSNULL)-1;
-
-       for(i=0; i<q->size; i++) {
-               
-               if ( item[i].type != VAL )
-                       continue;
-
-               entry=find_wordentry(t,q,&(item[i]));
-               if ( !entry )
+calc_rank_and(float *w, tsvector * t, QUERYTYPE * q)
+{
+       uint16    **pos;
+       int                     i,
+                               k,
+                               l,
+                               p;
+       WordEntry  *entry;
+       WordEntryPos *post,
+                          *ct;
+       int4            dimt,
+                               lenct,
+                               dist;
+       float           res = -1.0;
+       ITEM      **item;
+       int                     size = q->size;
+
+       item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
+       if (size < 2)
+       {
+               pfree(item);
+               return calc_rank_or(w, t, q);
+       }
+       pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
+       memset(pos, 0, sizeof(uint16 *) * q->size);
+       *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
+       WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1);
+
+       for (i = 0; i < size; i++)
+       {
+               entry = find_wordentry(t, q, item[i]);
+               if (!entry)
                        continue;
 
-               if ( entry->haspos )
-                       pos[i] = (uint16*)_POSDATAPTR(t,entry);
+               if (entry->haspos)
+                       pos[i] = (uint16 *) _POSDATAPTR(t, entry);
                else
-                       pos[i] = (uint16*)POSNULL;
-
-
-               dimt = *(uint16*)(pos[i]);
-               post = (WordEntryPos*)(pos[i]+1);
-               for( k=0; k<i; k++ ) {
-                       if ( !pos[k] ) continue;
-                       lenct = *(uint16*)(pos[k]);
-                       ct = (WordEntryPos*)(pos[k]+1);
-                       for(l=0; l<dimt; l++) {
-                               for(p=0; p<lenct; p++) {
-                                       dist = abs( post[l].pos - ct[p].pos );
-                                       if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
-                                               float curw; 
-                                               if ( !dist ) dist=MAXENTRYPOS;  
-                                               curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
-                                               res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
+                       pos[i] = (uint16 *) POSNULL;
+
+
+               dimt = *(uint16 *) (pos[i]);
+               post = (WordEntryPos *) (pos[i] + 1);
+               for (k = 0; k < i; k++)
+               {
+                       if (!pos[k])
+                               continue;
+                       lenct = *(uint16 *) (pos[k]);
+                       ct = (WordEntryPos *) (pos[k] + 1);
+                       for (l = 0; l < dimt; l++)
+                       {
+                               for (p = 0; p < lenct; p++)
+                               {
+                                       dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
+                                       if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL)))
+                                       {
+                                               float           curw;
+
+                                               if (!dist)
+                                                       dist = MAXENTRYPOS;
+                                               curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
+                                               res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
                                        }
                                }
                        }
                }
        }
        pfree(pos);
-       return res;     
+       pfree(item);
+       return res;
 }
 
 static float
-calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
-       WordEntry *entry;
-       WordEntryPos    *post;
-       int4    dimt,j,i;
-       float res=-1.0;
-       ITEM    *item=GETQUERY(q);
-
-       *(uint16*)POSNULL = lengthof(POSNULL)-1;
-
-       for(i=0; i<q->size; i++) {
-               if ( item[i].type != VAL )
+calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
+{
+       WordEntry  *entry;
+       WordEntryPos *post;
+       int4            dimt,
+                               j,
+                               i;
+       float           res = 0.0;
+       ITEM      **item;
+       int                     size = q->size;
+
+       *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
+       item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
+
+       for (i = 0; i < size; i++)
+       {
+               float           resj,
+                                       wjm;
+               int4            jm;
+
+               entry = find_wordentry(t, q, item[i]);
+               if (!entry)
                        continue;
 
-               entry=find_wordentry(t,q,&(item[i]));
-               if ( !entry )
-                       continue;
-
-               if ( entry->haspos ) {
-                       dimt = POSDATALEN(t,entry);
-                       post = POSDATAPTR(t,entry);
-               } else {
-                       dimt = *(uint16*)POSNULL;
-                       post = POSNULL+1;
+               if (entry->haspos)
+               {
+                       dimt = POSDATALEN(t, entry);
+                       post = POSDATAPTR(t, entry);
+               }
+               else
+               {
+                       dimt = *(uint16 *) POSNULL;
+                       post = POSNULL + 1;
                }
 
-               for(j=0;j<dimt;j++) {
-                       if ( res < 0 )
-                               res = wpos( &(post[j]) );
-                       else
-                               res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
+               resj = 0.0;
+               wjm = -1.0;
+               jm = 0;
+               for (j = 0; j < dimt; j++)
+               {
+                       resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
+                       if (wpos(post[j]) > wjm)
+                       {
+                               wjm = wpos(post[j]);
+                               jm = j;
+                       }
                }
+/*
+               limit (sum(i/i^2),i->inf) = pi^2/6
+               resj = sum(wi/i^2),i=1,noccurence,
+               wi - should be sorted desc,
+               don't sort for now, just choose maximum weight. This should be corrected
+               Oleg Bartunov
+*/
+               res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
        }
+       if (size > 0)
+               res = res / size;
+       pfree(item);
        return res;
 }
 
 static float
-calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
-       ITEM *item = GETQUERY(q);
-       float res=0.0;
+calc_rank(float *w, tsvector * t, QUERYTYPE * q, int4 method)
+{
+       ITEM       *item = GETQUERY(q);
+       float           res = 0.0;
+       int                     len;
 
        if (!t->size || !q->size)
                return 0.0;
 
-       res = ( item->type != VAL && item->val == (int4) '&' ) ?
-               calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
+       res = (item->type != VAL && item->val == (int4) '&') ?
+               calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
 
-       if ( res < 0 )
-               res = 1e-20;
+       if (res < 0)
+               res = (float)1e-20;
 
-        switch(method) {
-               case 0: break;
-               case 1: res /= log((float)cnt_length(t)); break;
-               case 2: res /= (float)cnt_length(t); break;
-               default:
-               /* internal error */
-               elog(ERROR,"unrecognized normalization method: %d", method);
-        }
+       if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
+               res /= log((double) (cnt_length(t) + 1)) / log(2.0);
+
+       if (method & RANK_NORM_LENGTH)
+       {
+               len = cnt_length(t);
+               if (len > 0)
+                       res /= (float) len;
+       }
+
+       if ((method & RANK_NORM_UNIQ) && t->size > 0)
+               res /= (float) (t->size);
+
+       if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
+               res /= log((double) (t->size + 1)) / log(2.0);
 
        return res;
 }
 
 Datum
-rank(PG_FUNCTION_ARGS) {
+rank(PG_FUNCTION_ARGS)
+{
        ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
-       tsvector           *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+       tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
        QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
-       int method=DEF_NORM_METHOD;
-       float res=0.0;
-       float ws[ lengthof(weights) ];
-       int i;
+       int                     method = DEF_NORM_METHOD;
+       float           res = 0.0;
+       float           ws[lengthof(weights)];
+       float4     *arrdata;
+       int                     i;
 
-       if ( ARR_NDIM(win) != 1 ) 
+       if (ARR_NDIM(win) != 1)
                ereport(ERROR,
                                (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
                                 errmsg("array of weight must be one-dimensional")));
 
-       if ( ARRNELEMS(win) < lengthof(weights) )
+       if (ARRNELEMS(win) < lengthof(weights))
                ereport(ERROR,
                                (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
                                 errmsg("array of weight is too short")));
 
-       for(i=0;i<lengthof(weights);i++) {
-               ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
-               if ( ws[ i ] > 1.0 ) 
+       if (ARR_HASNULL(win))
+               ereport(ERROR,
+                               (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+                                errmsg("array of weight must not contain nulls")));
+
+       arrdata = (float4 *) ARR_DATA_PTR(win);
+       for (i = 0; i < lengthof(weights); i++)
+       {
+               ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
+               if (ws[i] > 1.0)
                        ereport(ERROR,
                                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                                         errmsg("weight out of range")));
-       } 
+       }
+
+       if (PG_NARGS() == 4)
+               method = PG_GETARG_INT32(3);
 
-       if ( PG_NARGS() == 4 )
-               method=PG_GETARG_INT32(3);
+       res = calc_rank(ws, txt, query, method);
 
-       res=calc_rank(ws, txt, query, method); 
-               
        PG_FREE_IF_COPY(win, 0);
        PG_FREE_IF_COPY(txt, 1);
        PG_FREE_IF_COPY(query, 2);
@@ -262,217 +401,376 @@ rank(PG_FUNCTION_ARGS) {
 }
 
 Datum
-rank_def(PG_FUNCTION_ARGS) {
-       tsvector           *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+rank_def(PG_FUNCTION_ARGS)
+{
+       tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
        QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
-       float res=0.0;
-       int method=DEF_NORM_METHOD;
+       float           res = 0.0;
+       int                     method = DEF_NORM_METHOD;
 
-       if ( PG_NARGS() == 3 )
-               method=PG_GETARG_INT32(2);
+       if (PG_NARGS() == 3)
+               method = PG_GETARG_INT32(2);
+
+       res = calc_rank(weights, txt, query, method);
 
-       res=calc_rank(weights, txt, query, method); 
-               
        PG_FREE_IF_COPY(txt, 0);
        PG_FREE_IF_COPY(query, 1);
        PG_RETURN_FLOAT4(res);
 }
 
 
-typedef struct {
-       ITEM    *item;
-       int32   pos;
-} DocRepresentation;
+typedef struct
+{
+       ITEM      **item;
+       int16           nitem;
+       bool            needfree;
+       uint8           wclass;
+       int32           pos;
+}      DocRepresentation;
 
 static int
-compareDocR(const void *a, const void *b) {
-       if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
-               return 1;
-       return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
+compareDocR(const void *a, const void *b)
+{
+       if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos)
+               return 0;
+       return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
 }
 
-
-typedef struct {
-       DocRepresentation *doc;
-       int len;
-}      ChkDocR;
-
 static bool
-checkcondition_DR(void *checkval, ITEM *val) {
-       DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
-
-       while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
-               if ( val == ptr->item )
-                       return true;
-               ptr++;
-       }       
+checkcondition_ITEM(void *checkval, ITEM * val)
+{
+       return (bool) (val->istrue);
+}
 
-       return false;
+static void
+reset_istrue_flag(QUERYTYPE * query)
+{
+       ITEM       *item = GETQUERY(query);
+       int                     i;
+
+       /* reset istrue flag */
+       for (i = 0; i < query->size; i++)
+       {
+               if (item->type == VAL)
+                       item->istrue = 0;
+               item++;
+       }
 }
 
+typedef struct
+{
+       int                     pos;
+       int                     p;
+       int                     q;
+       DocRepresentation *begin;
+       DocRepresentation *end;
+}      Extention;
+
 
 static bool
-Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
-       int i;
-       DocRepresentation       *ptr,*f=(DocRepresentation*)0xffffffff;
-       ITEM    *item=GETQUERY(query);
-       int lastpos=*pos;
-       int oldq=*q;
-
-       *p=0x7fffffff;
-       *q=0;
-
-       for(i=0; i<query->size; i++) {
-               if ( item->type != VAL ) {
-                       item++;
-                       continue;
-               }
-               ptr = doc + *pos;
-
-               while(ptr-doc<len) {
-                       if ( ptr->item == item ) {
-                               if ( ptr->pos > *q ) {
-                                       *q = ptr->pos;
-                                       lastpos= ptr - doc;
-                               } 
-                               break;
-                       } 
-                       ptr++;
+Cover(DocRepresentation * doc, int len, QUERYTYPE * query, Extention * ext)
+{
+       DocRepresentation *ptr;
+       int                     lastpos = ext->pos;
+       int                     i;
+       bool            found = false;
+
+       reset_istrue_flag(query);
+
+       ext->p = 0x7fffffff;
+       ext->q = 0;
+       ptr = doc + ext->pos;
+
+       /* find upper bound of cover from current position, move up */
+       while (ptr - doc < len)
+       {
+               for (i = 0; i < ptr->nitem; i++)
+                       ptr->item[i]->istrue = 1;
+               if (TS_execute(GETQUERY(query), NULL, false, checkcondition_ITEM))
+               {
+                       if (ptr->pos > ext->q)
+                       {
+                               ext->q = ptr->pos;
+                               ext->end = ptr;
+                               lastpos = ptr - doc;
+                               found = true;
+                       }
+                       break;
                }
-
-               item++;
+               ptr++;
        }
 
-       if (*q==0 )
+       if (!found)
                return false;
 
-       if (*q==oldq) { /* already check this pos */
-               (*pos)++;
-               return Cover(doc, len, query, pos,p,q);
-       } 
-
-       item=GETQUERY(query);
-       for(i=0; i<query->size; i++) {
-               if ( item->type != VAL ) {
-                       item++;
-                       continue;
-               }
-               ptr = doc + lastpos;
-
-               while(ptr>=doc+*pos) {
-                       if ( ptr->item == item ) {
-                               if ( ptr->pos < *p ) {
-                                       *p = ptr->pos;
-                                       f=ptr;
-                               }
-                               break;
+       reset_istrue_flag(query);
+
+       ptr = doc + lastpos;
+
+       /* find lower bound of cover from founded upper bound, move down */
+       while (ptr >= doc)
+       {
+               for (i = 0; i < ptr->nitem; i++)
+                       ptr->item[i]->istrue = 1;
+               if (TS_execute(GETQUERY(query), NULL, true, checkcondition_ITEM))
+               {
+                       if (ptr->pos < ext->p)
+                       {
+                               ext->begin = ptr;
+                               ext->p = ptr->pos;
                        }
-                       ptr--;
+                       break;
                }
-               item++;
+               ptr--;
        }
-       if ( *p<=*q ) {
-               ChkDocR ch = { f, (doc + lastpos)-f+1 };
-               *pos = f-doc+1;
-               if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { 
- /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ 
-                       return true;
-               } else
-                       return Cover(doc, len, query, pos,p,q); 
+
+       if (ext->p <= ext->q)
+       {
+               /*
+                * set position for next try to next lexeme after begining of founded
+                * cover
+                */
+               ext->pos = (ptr - doc) + 1;
+               return true;
        }
-       return false;
+
+       ext->pos++;
+       return Cover(doc, len, query, ext);
 }
 
-static DocRepresentation*
-get_docrep(tsvector     *txt, QUERYTYPE  *query, int *doclen) {
-       ITEM    *item=GETQUERY(query);
-       WordEntry *entry;
-       WordEntryPos    *post;
-       int4    dimt,j,i;
-       int len=query->size*4,cur=0;
+static DocRepresentation *
+get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen)
+{
+       ITEM       *item = GETQUERY(query);
+       WordEntry  *entry;
+       WordEntryPos *post;
+       int4            dimt,
+                               j,
+                               i;
+       int                     len = query->size * 4,
+                               cur = 0;
        DocRepresentation *doc;
+       char       *operand;
+
+       *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
+       doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
+       operand = GETOPERAND(query);
+       reset_istrue_flag(query);
 
-       *(uint16*)POSNULL = lengthof(POSNULL)-1;
-       doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
-       for(i=0; i<query->size; i++) {
-               if ( item[i].type != VAL )
+       for (i = 0; i < query->size; i++)
+       {
+               if (item[i].type != VAL || item[i].istrue)
                        continue;
 
-               entry=find_wordentry(txt,query,&(item[i]));
-               if ( !entry )
+               entry = find_wordentry(txt, query, &(item[i]));
+               if (!entry)
                        continue;
 
-               if ( entry->haspos ) {
-                       dimt = POSDATALEN(txt,entry);
-                       post = POSDATAPTR(txt,entry);
-               } else {
-                       dimt = *(uint16*)POSNULL;
-                       post = POSNULL+1;
+               if (entry->haspos)
+               {
+                       dimt = POSDATALEN(txt, entry);
+                       post = POSDATAPTR(txt, entry);
+               }
+               else
+               {
+                       dimt = *(uint16 *) POSNULL;
+                       post = POSNULL + 1;
                }
 
-               while( cur+dimt >= len ) {
-                       len*=2;
-                       doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
+               while (cur + dimt >= len)
+               {
+                       len *= 2;
+                       doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
                }
 
-               for(j=0;j<dimt;j++) {
-                       doc[cur].item=&(item[i]);
-                       doc[cur].pos=post[j].pos;
+               for (j = 0; j < dimt; j++)
+               {
+                       if (j == 0)
+                       {
+                               ITEM       *kptr,
+                                                  *iptr = item + i;
+                               int                     k;
+
+                               doc[cur].needfree = false;
+                               doc[cur].nitem = 0;
+                               doc[cur].item = (ITEM **) palloc(sizeof(ITEM *) * query->size);
+
+                               for (k = 0; k < query->size; k++)
+                               {
+                                       kptr = item + k;
+                                       if (k == i ||
+                                               (item[k].type == VAL &&
+                                                compareITEM(&kptr, &iptr, operand) == 0))
+                                       {
+                                               doc[cur].item[doc[cur].nitem] = item + k;
+                                               doc[cur].nitem++;
+                                               kptr->istrue = 1;
+                                       }
+                               }
+                       }
+                       else
+                       {
+                               doc[cur].needfree = false;
+                               doc[cur].nitem = doc[cur - 1].nitem;
+                               doc[cur].item = doc[cur - 1].item;
+                       }
+                       doc[cur].pos = WEP_GETPOS(post[j]);
+                       doc[cur].wclass = WEP_GETWEIGHT(post[j]);
                        cur++;
                }
        }
 
-       *doclen=cur;
-       
-       if ( cur>0 ) {
-               if ( cur>1 ) 
+       *doclen = cur;
+
+       if (cur > 0)
+       {
+               if (cur > 1)
                        qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
                return doc;
        }
-       
+
        pfree(doc);
        return NULL;
 }
 
+static float4
+calc_rank_cd(float4 *arrdata, tsvector * txt, QUERYTYPE * query, int method)
+{
+       DocRepresentation *doc;
+       int                     len,
+                               i,
+                               doclen = 0;
+       Extention       ext;
+       double          Wdoc = 0.0;
+       double          invws[lengthof(weights)];
+       double          SumDist = 0.0,
+                               PrevExtPos = 0.0,
+                               CurExtPos = 0.0;
+       int                     NExtent = 0;
+
+       for (i = 0; i < lengthof(weights); i++)
+       {
+               invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
+               if (invws[i] > 1.0)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("weight out of range")));
+               invws[i] = 1.0 / invws[i];
+       }
 
-Datum
-rank_cd(PG_FUNCTION_ARGS) {
-       int K = PG_GETARG_INT32(0);
-       tsvector           *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
-       QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
-       int method=DEF_NORM_METHOD;
-       DocRepresentation       *doc;
-       float   res=0.0;
-       int p=0,q=0,len,cur;
-
-       doc = get_docrep(txt, query, &len);
-       if ( !doc ) {
-               PG_FREE_IF_COPY(txt, 1);
-               PG_FREE_IF_COPY(query, 2);
-               PG_RETURN_FLOAT4(0.0);
+       doc = get_docrep(txt, query, &doclen);
+       if (!doc)
+               return 0.0;
+
+       MemSet(&ext, 0, sizeof(Extention));
+       while (Cover(doc, doclen, query, &ext))
+       {
+               double          Cpos = 0.0;
+               double          InvSum = 0.0;
+               int                     nNoise;
+               DocRepresentation *ptr = ext.begin;
+
+               while (ptr <= ext.end)
+               {
+                       InvSum += invws[ptr->wclass];
+                       ptr++;
+               }
+
+               Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
+               /*
+                * if doc are big enough then ext.q may be equal to ext.p
+                * due to limit of posional information. In this case we 
+                * approximate number of noise word as half cover's
+                * length
+                */
+               nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
+               if ( nNoise < 0 )
+                       nNoise = (ext.end - ext.begin) / 2;
+               Wdoc += Cpos / ((double) (1 + nNoise));
+
+               CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
+               if (NExtent > 0 && CurExtPos > PrevExtPos               /* prevent devision by
+                                                                                                                * zero in a case of
+                               multiple lexize */ )
+                       SumDist += 1.0 / (CurExtPos - PrevExtPos);
+
+               PrevExtPos = CurExtPos;
+               NExtent++;
+       }
+
+       if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
+               Wdoc /= log((double) (cnt_length(txt) + 1));
+
+       if (method & RANK_NORM_LENGTH)
+       {
+               len = cnt_length(txt);
+               if (len > 0)
+                       Wdoc /= (double) len;
        }
 
-       cur=0;
-       if (K<=0)
-               K=4;    
-       while( Cover(doc, len, query, &cur, &p, &q) ) 
-               res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
+       if ((method & RANK_NORM_EXTDIST) && SumDist > 0)
+               Wdoc /= ((double) NExtent) / SumDist;
 
-       if ( PG_NARGS() == 4 )
-               method=PG_GETARG_INT32(3);
+       if ((method & RANK_NORM_UNIQ) && txt->size > 0)
+               Wdoc /= (double) (txt->size);
 
-        switch(method) {
-               case 0: break;
-               case 1: res /= log((float)cnt_length(txt)); break;
-               case 2: res /= (float)cnt_length(txt); break;
-               default:
-               /* internal error */
-               elog(ERROR,"unrecognized normalization method: %d", method);
-        }
+       if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
+               Wdoc /= log((double) (txt->size + 1)) / log(2.0);
 
+       for (i = 0; i < doclen; i++)
+               if (doc[i].needfree)
+                       pfree(doc[i].item);
        pfree(doc);
+
+       return (float4) Wdoc;
+}
+
+Datum
+rank_cd(PG_FUNCTION_ARGS)
+{
+       ArrayType  *win;
+       tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
+       QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(2));
+       int                     method = DEF_NORM_METHOD;
+       float4          res;
+
+       /*
+        * Pre-8.2, rank_cd took just a plain int as its first argument.
+        * It was a mistake to keep the same C function name while changing the
+        * signature, but it's too late to fix that.  Instead, do a runtime test
+        * to make sure the expected datatype has been passed.  This is needed
+        * to prevent core dumps if tsearch2 function definitions from an old
+        * database are loaded into an 8.2 server.
+        */
+       if (get_fn_expr_argtype(fcinfo->flinfo, 0) != FLOAT4ARRAYOID)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
+                                errmsg("rank_cd() now takes real[] as its first argument, not integer")));
+
+       /* now safe to dereference the first arg */
+       win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+
+       if (ARR_NDIM(win) != 1)
+               ereport(ERROR,
+                               (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+                                errmsg("array of weight must be one-dimensional")));
+
+       if (ARRNELEMS(win) < lengthof(weights))
+               ereport(ERROR,
+                               (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+                                errmsg("array of weight is too short")));
+
+       if (ARR_HASNULL(win))
+               ereport(ERROR,
+                               (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+                                errmsg("array of weight must not contain nulls")));
+
+       if (PG_NARGS() == 4)
+               method = PG_GETARG_INT32(3);
+
+       res = calc_rank_cd((float4 *) ARR_DATA_PTR(win), txt, query, method);
+
+       PG_FREE_IF_COPY(win, 0);
        PG_FREE_IF_COPY(txt, 1);
        PG_FREE_IF_COPY(query, 2);
 
@@ -481,120 +779,146 @@ rank_cd(PG_FUNCTION_ARGS) {
 
 
 Datum
-rank_cd_def(PG_FUNCTION_ARGS) {
-       PG_RETURN_DATUM( DirectFunctionCall4(   
-               rank_cd,
-               Int32GetDatum(-1),
-               PG_GETARG_DATUM(0),
-               PG_GETARG_DATUM(1),
-               ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
-       )); 
+rank_cd_def(PG_FUNCTION_ARGS)
+{
+       tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+       QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1));
+       float4          res;
+
+       res = calc_rank_cd(weights, txt, query, (PG_NARGS() == 3) ? PG_GETARG_DATUM(2) : DEF_NORM_METHOD);
+
+       PG_FREE_IF_COPY(txt, 0);
+       PG_FREE_IF_COPY(query, 1);
+
+       PG_RETURN_FLOAT4(res);
 }
 
 /**************debug*************/
 
-typedef struct {
-       char    *w;
-       int2    len;
-       int2    pos;
-       int2    start;
-       int2    finish;
-} DocWord;
+typedef struct
+{
+       char       *w;
+       int2            len;
+       int2            pos;
+       int2            start;
+       int2            finish;
+}      DocWord;
 
 static int
-compareDocWord(const void *a, const void *b) {
-       if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
-               return 1;
-       return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
+compareDocWord(const void *a, const void *b)
+{
+       if (((DocWord *) a)->pos == ((DocWord *) b)->pos)
+               return 0;
+       return (((DocWord *) a)->pos > ((DocWord *) b)->pos) ? 1 : -1;
 }
 
 
-Datum 
-get_covers(PG_FUNCTION_ARGS) {
-       tsvector     *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
-       QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
-       WordEntry       *pptr=ARRPTR(txt);
-       int i,dlen=0,j,cur=0,len=0,rlen;
-       DocWord *dw,*dwptr;
-       text    *out;
-       char *cptr;
+Datum
+get_covers(PG_FUNCTION_ARGS)
+{
+       tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+       QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1));
+       WordEntry  *pptr = ARRPTR(txt);
+       int                     i,
+                               dlen = 0,
+                               j,
+                               cur = 0,
+                               len = 0,
+                               rlen;
+       DocWord    *dw,
+                          *dwptr;
+       text       *out;
+       char       *cptr;
        DocRepresentation *doc;
-       int pos=0,p,q,olddwpos=0;
-       int ncover=1;
+       int                     olddwpos = 0;
+       int                     ncover = 1;
+       Extention       ext;
 
        doc = get_docrep(txt, query, &rlen);
 
-       if ( !doc ) {
-               out=palloc(VARHDRSZ);
+       if (!doc)
+       {
+               out = palloc(VARHDRSZ);
                VARATT_SIZEP(out) = VARHDRSZ;
-               PG_FREE_IF_COPY(txt,0);
-               PG_FREE_IF_COPY(query,1);
+               PG_FREE_IF_COPY(txt, 0);
+               PG_FREE_IF_COPY(query, 1);
                PG_RETURN_POINTER(out);
        }
 
-       for(i=0;i<txt->size;i++) {
+       for (i = 0; i < txt->size; i++)
+       {
                if (!pptr[i].haspos)
                        ereport(ERROR,
                                        (errcode(ERRCODE_SYNTAX_ERROR),
                                         errmsg("no pos info")));
-                dlen += POSDATALEN(txt,&(pptr[i]));
+               dlen += POSDATALEN(txt, &(pptr[i]));
        }
 
-       dwptr=dw=palloc(sizeof(DocWord)*dlen);
-       memset(dw,0,sizeof(DocWord)*dlen);
+       dwptr = dw = palloc(sizeof(DocWord) * dlen);
+       memset(dw, 0, sizeof(DocWord) * dlen);
 
-       for(i=0;i<txt->size;i++) {
-               WordEntryPos    *posdata = POSDATAPTR(txt,&(pptr[i]));
-               for(j=0;j<POSDATALEN(txt,&(pptr[i]));j++) {
-                       dw[cur].w=STRPTR(txt)+pptr[i].pos;      
-                       dw[cur].len=pptr[i].len;        
-                       dw[cur].pos=posdata[j].pos;
+       for (i = 0; i < txt->size; i++)
+       {
+               WordEntryPos *posdata = POSDATAPTR(txt, &(pptr[i]));
+
+               for (j = 0; j < POSDATALEN(txt, &(pptr[i])); j++)
+               {
+                       dw[cur].w = STRPTR(txt) + pptr[i].pos;
+                       dw[cur].len = pptr[i].len;
+                       dw[cur].pos = WEP_GETPOS(posdata[j]);
                        cur++;
                }
-               len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
+               len += (pptr[i].len + 1) * (int) POSDATALEN(txt, &(pptr[i]));
        }
        qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
 
-       while( Cover(doc, rlen, query, &pos, &p, &q) ) {
-               dwptr=dw+olddwpos;
-               while(dwptr->pos < p && dwptr-dw<dlen)
+       MemSet(&ext, 0, sizeof(Extention));
+       while (Cover(doc, rlen, query, &ext))
+       {
+               dwptr = dw + olddwpos;
+               while (dwptr->pos < ext.p && dwptr - dw < dlen)
                        dwptr++;
-               olddwpos=dwptr-dw;
-               dwptr->start=ncover;
-               while(dwptr->pos < q+1 && dwptr-dw<dlen)
+               olddwpos = dwptr - dw;
+               dwptr->start = ncover;
+               while (dwptr->pos < ext.q + 1 && dwptr - dw < dlen)
                        dwptr++;
-               (dwptr-1)->finish=ncover;
-               len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
-               ncover++; 
-       } 
-       
-       out=palloc(VARHDRSZ+len);
-       cptr=((char*)out)+VARHDRSZ;
-       dwptr=dw;
-
-       while( dwptr-dw < dlen) {
-               if ( dwptr->start ) {
-                       sprintf(cptr,"{%d ",dwptr->start);
-                       cptr=strchr(cptr,'\0');
+               (dwptr - 1)->finish = ncover;
+               len += 4 /* {}+two spaces */ + 2 * 16 /* numbers */ ;
+               ncover++;
+       }
+
+       out = palloc(VARHDRSZ + len);
+       cptr = ((char *) out) + VARHDRSZ;
+       dwptr = dw;
+
+       while (dwptr - dw < dlen)
+       {
+               if (dwptr->start)
+               {
+                       sprintf(cptr, "{%d ", dwptr->start);
+                       cptr = strchr(cptr, '\0');
                }
-               memcpy(cptr,dwptr->w,dwptr->len);
-               cptr+=dwptr->len;
-               *cptr=' ';
+               memcpy(cptr, dwptr->w, dwptr->len);
+               cptr += dwptr->len;
+               *cptr = ' ';
                cptr++;
-               if ( dwptr->finish ) { 
-                       sprintf(cptr,"}%d ",dwptr->finish);
-                       cptr=strchr(cptr,'\0');
+               if (dwptr->finish)
+               {
+                       sprintf(cptr, "}%d ", dwptr->finish);
+                       cptr = strchr(cptr, '\0');
                }
                dwptr++;
-       }       
+       }
+
+       VARATT_SIZEP(out) = cptr - ((char *) out);
 
-       VARATT_SIZEP(out) = cptr - ((char*)out);
-       
        pfree(dw);
+       for (i = 0; i < rlen; i++)
+               if (doc[i].needfree)
+                       pfree(doc[i].item);
        pfree(doc);
 
-       PG_FREE_IF_COPY(txt,0);
-       PG_FREE_IF_COPY(query,1);
+       PG_FREE_IF_COPY(txt, 0);
+       PG_FREE_IF_COPY(query, 1);
        PG_RETURN_POINTER(out);
 }
-