]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/tsrank.c
c9e71c9e21d1bd6e477efbf58b6ffad41cc3dca7
[postgresql] / src / backend / utils / adt / tsrank.c
1 /*-------------------------------------------------------------------------
2  *
3  * tsrank.c
4  *              rank tsvector by tsquery
5  *
6  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *        src/backend/utils/adt/tsrank.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15
16 #include <math.h>
17
18 #include "tsearch/ts_utils.h"
19 #include "utils/array.h"
20 #include "miscadmin.h"
21
22
23 static const float weights[] = {0.1f, 0.2f, 0.4f, 1.0f};
24
25 #define wpos(wep)       ( w[ WEP_GETWEIGHT(wep) ] )
26
27 #define RANK_NO_NORM                    0x00
28 #define RANK_NORM_LOGLENGTH             0x01
29 #define RANK_NORM_LENGTH                0x02
30 #define RANK_NORM_EXTDIST               0x04
31 #define RANK_NORM_UNIQ                  0x08
32 #define RANK_NORM_LOGUNIQ               0x10
33 #define RANK_NORM_RDIVRPLUS1    0x20
34 #define DEF_NORM_METHOD                 RANK_NO_NORM
35
36 static float calc_rank_or(const float *w, TSVector t, TSQuery q);
37 static float calc_rank_and(const float *w, TSVector t, TSQuery q);
38
39 /*
40  * Returns a weight of a word collocation
41  */
42 static float4
43 word_distance(int32 w)
44 {
45         if (w > 100)
46                 return 1e-30f;
47
48         return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
49 }
50
51 static int
52 cnt_length(TSVector t)
53 {
54         WordEntry  *ptr = ARRPTR(t),
55                            *end = (WordEntry *) STRPTR(t);
56         int                     len = 0;
57
58         while (ptr < end)
59         {
60                 int                     clen = POSDATALEN(t, ptr);
61
62                 if (clen == 0)
63                         len += 1;
64                 else
65                         len += clen;
66
67                 ptr++;
68         }
69
70         return len;
71 }
72
73
74 #define WordECompareQueryItem(e,q,p,i,m) \
75         tsCompareString((q) + (i)->distance, (i)->length,       \
76                                         (e) + (p)->pos, (p)->len, (m))
77
78
79 /*
80  * Returns a pointer to a WordEntry's array corresponding to 'item' from
81  * tsvector 't'. 'q' is the TSQuery containing 'item'.
82  * Returns NULL if not found.
83  */
84 static WordEntry *
85 find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
86 {
87         WordEntry  *StopLow = ARRPTR(t);
88         WordEntry  *StopHigh = (WordEntry *) STRPTR(t);
89         WordEntry  *StopMiddle = StopHigh;
90         int                     difference;
91
92         *nitem = 0;
93
94         /* Loop invariant: StopLow <= item < StopHigh */
95         while (StopLow < StopHigh)
96         {
97                 StopMiddle = StopLow + (StopHigh - StopLow) / 2;
98                 difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
99                 if (difference == 0)
100                 {
101                         StopHigh = StopMiddle;
102                         *nitem = 1;
103                         break;
104                 }
105                 else if (difference > 0)
106                         StopLow = StopMiddle + 1;
107                 else
108                         StopHigh = StopMiddle;
109         }
110
111         if (item->prefix)
112         {
113                 if (StopLow >= StopHigh)
114                         StopMiddle = StopHigh;
115
116                 *nitem = 0;
117
118                 while (StopMiddle < (WordEntry *) STRPTR(t) &&
119                            WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0)
120                 {
121                         (*nitem)++;
122                         StopMiddle++;
123                 }
124         }
125
126         return (*nitem > 0) ? StopHigh : NULL;
127 }
128
129
130 /*
131  * sort QueryOperands by (length, word)
132  */
133 static int
134 compareQueryOperand(const void *a, const void *b, void *arg)
135 {
136         char       *operand = (char *) arg;
137         QueryOperand *qa = (*(QueryOperand *const *) a);
138         QueryOperand *qb = (*(QueryOperand *const *) b);
139
140         return tsCompareString(operand + qa->distance, qa->length,
141                                                    operand + qb->distance, qb->length,
142                                                    false);
143 }
144
145 /*
146  * Returns a sorted, de-duplicated array of QueryOperands in a query.
147  * The returned QueryOperands are pointers to the original QueryOperands
148  * in the query.
149  *
150  * Length of the returned array is stored in *size
151  */
152 static QueryOperand **
153 SortAndUniqItems(TSQuery q, int *size)
154 {
155         char       *operand = GETOPERAND(q);
156         QueryItem  *item = GETQUERY(q);
157         QueryOperand **res,
158                           **ptr,
159                           **prevptr;
160
161         ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
162
163         /* Collect all operands from the tree to res */
164         while ((*size)--)
165         {
166                 if (item->type == QI_VAL)
167                 {
168                         *ptr = (QueryOperand *) item;
169                         ptr++;
170                 }
171                 item++;
172         }
173
174         *size = ptr - res;
175         if (*size < 2)
176                 return res;
177
178         qsort_arg(res, *size, sizeof(QueryOperand *), compareQueryOperand, (void *) operand);
179
180         ptr = res + 1;
181         prevptr = res;
182
183         /* remove duplicates */
184         while (ptr - res < *size)
185         {
186                 if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0)
187                 {
188                         prevptr++;
189                         *prevptr = *ptr;
190                 }
191                 ptr++;
192         }
193
194         *size = prevptr + 1 - res;
195         return res;
196 }
197
198 /* A dummy WordEntryPos array to use when haspos is false */
199 static WordEntryPosVector POSNULL = {
200         1,                                                      /* Number of elements that follow */
201         {0}
202 };
203
204 static float
205 calc_rank_and(const float *w, TSVector t, TSQuery q)
206 {
207         WordEntryPosVector **pos;
208         int                     i,
209                                 k,
210                                 l,
211                                 p;
212         WordEntry  *entry,
213                            *firstentry;
214         WordEntryPos *post,
215                            *ct;
216         int32           dimt,
217                                 lenct,
218                                 dist,
219                                 nitem;
220         float           res = -1.0;
221         QueryOperand **item;
222         int                     size = q->size;
223
224         item = SortAndUniqItems(q, &size);
225         if (size < 2)
226         {
227                 pfree(item);
228                 return calc_rank_or(w, t, q);
229         }
230         pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size);
231         WEP_SETPOS(POSNULL.pos[0], MAXENTRYPOS - 1);
232
233         for (i = 0; i < size; i++)
234         {
235                 firstentry = entry = find_wordentry(t, q, item[i], &nitem);
236                 if (!entry)
237                         continue;
238
239                 while (entry - firstentry < nitem)
240                 {
241                         if (entry->haspos)
242                                 pos[i] = _POSVECPTR(t, entry);
243                         else
244                                 pos[i] = &POSNULL;
245
246                         dimt = pos[i]->npos;
247                         post = pos[i]->pos;
248                         for (k = 0; k < i; k++)
249                         {
250                                 if (!pos[k])
251                                         continue;
252                                 lenct = pos[k]->npos;
253                                 ct = pos[k]->pos;
254                                 for (l = 0; l < dimt; l++)
255                                 {
256                                         for (p = 0; p < lenct; p++)
257                                         {
258                                                 dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
259                                                 if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL)))
260                                                 {
261                                                         float           curw;
262
263                                                         if (!dist)
264                                                                 dist = MAXENTRYPOS;
265                                                         curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
266                                                         res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
267                                                 }
268                                         }
269                                 }
270                         }
271
272                         entry++;
273                 }
274         }
275         pfree(pos);
276         pfree(item);
277         return res;
278 }
279
280 static float
281 calc_rank_or(const float *w, TSVector t, TSQuery q)
282 {
283         WordEntry  *entry,
284                            *firstentry;
285         WordEntryPos *post;
286         int32           dimt,
287                                 j,
288                                 i,
289                                 nitem;
290         float           res = 0.0;
291         QueryOperand **item;
292         int                     size = q->size;
293
294         item = SortAndUniqItems(q, &size);
295
296         for (i = 0; i < size; i++)
297         {
298                 float           resj,
299                                         wjm;
300                 int32           jm;
301
302                 firstentry = entry = find_wordentry(t, q, item[i], &nitem);
303                 if (!entry)
304                         continue;
305
306                 while (entry - firstentry < nitem)
307                 {
308                         if (entry->haspos)
309                         {
310                                 dimt = POSDATALEN(t, entry);
311                                 post = POSDATAPTR(t, entry);
312                         }
313                         else
314                         {
315                                 dimt = POSNULL.npos;
316                                 post = POSNULL.pos;
317                         }
318
319                         resj = 0.0;
320                         wjm = -1.0;
321                         jm = 0;
322                         for (j = 0; j < dimt; j++)
323                         {
324                                 resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
325                                 if (wpos(post[j]) > wjm)
326                                 {
327                                         wjm = wpos(post[j]);
328                                         jm = j;
329                                 }
330                         }
331 /*
332                         limit (sum(i/i^2),i->inf) = pi^2/6
333                         resj = sum(wi/i^2),i=1,noccurence,
334                         wi - should be sorted desc,
335                         don't sort for now, just choose maximum weight. This should be corrected
336                         Oleg Bartunov
337 */
338                         res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
339
340                         entry++;
341                 }
342         }
343         if (size > 0)
344                 res = res / size;
345         pfree(item);
346         return res;
347 }
348
349 static float
350 calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
351 {
352         QueryItem  *item = GETQUERY(q);
353         float           res = 0.0;
354         int                     len;
355
356         if (!t->size || !q->size)
357                 return 0.0;
358
359         /* XXX: What about NOT? */
360         res = (item->type == QI_OPR && item->qoperator.oper == OP_AND) ?
361                 calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
362
363         if (res < 0)
364                 res = 1e-20f;
365
366         if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
367                 res /= log((double) (cnt_length(t) + 1)) / log(2.0);
368
369         if (method & RANK_NORM_LENGTH)
370         {
371                 len = cnt_length(t);
372                 if (len > 0)
373                         res /= (float) len;
374         }
375
376         /* RANK_NORM_EXTDIST not applicable */
377
378         if ((method & RANK_NORM_UNIQ) && t->size > 0)
379                 res /= (float) (t->size);
380
381         if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
382                 res /= log((double) (t->size + 1)) / log(2.0);
383
384         if (method & RANK_NORM_RDIVRPLUS1)
385                 res /= (res + 1);
386
387         return res;
388 }
389
390 static const float *
391 getWeights(ArrayType *win)
392 {
393         static float ws[lengthof(weights)];
394         int                     i;
395         float4     *arrdata;
396
397         if (win == NULL)
398                 return weights;
399
400         if (ARR_NDIM(win) != 1)
401                 ereport(ERROR,
402                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
403                                  errmsg("array of weight must be one-dimensional")));
404
405         if (ArrayGetNItems(ARR_NDIM(win), ARR_DIMS(win)) < lengthof(weights))
406                 ereport(ERROR,
407                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
408                                  errmsg("array of weight is too short")));
409
410         if (array_contains_nulls(win))
411                 ereport(ERROR,
412                                 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
413                                  errmsg("array of weight must not contain nulls")));
414
415         arrdata = (float4 *) ARR_DATA_PTR(win);
416         for (i = 0; i < lengthof(weights); i++)
417         {
418                 ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
419                 if (ws[i] > 1.0)
420                         ereport(ERROR,
421                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
422                                          errmsg("weight out of range")));
423         }
424
425         return ws;
426 }
427
428 Datum
429 ts_rank_wttf(PG_FUNCTION_ARGS)
430 {
431         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
432         TSVector        txt = PG_GETARG_TSVECTOR(1);
433         TSQuery         query = PG_GETARG_TSQUERY(2);
434         int                     method = PG_GETARG_INT32(3);
435         float           res;
436
437         res = calc_rank(getWeights(win), txt, query, method);
438
439         PG_FREE_IF_COPY(win, 0);
440         PG_FREE_IF_COPY(txt, 1);
441         PG_FREE_IF_COPY(query, 2);
442         PG_RETURN_FLOAT4(res);
443 }
444
445 Datum
446 ts_rank_wtt(PG_FUNCTION_ARGS)
447 {
448         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
449         TSVector        txt = PG_GETARG_TSVECTOR(1);
450         TSQuery         query = PG_GETARG_TSQUERY(2);
451         float           res;
452
453         res = calc_rank(getWeights(win), txt, query, DEF_NORM_METHOD);
454
455         PG_FREE_IF_COPY(win, 0);
456         PG_FREE_IF_COPY(txt, 1);
457         PG_FREE_IF_COPY(query, 2);
458         PG_RETURN_FLOAT4(res);
459 }
460
461 Datum
462 ts_rank_ttf(PG_FUNCTION_ARGS)
463 {
464         TSVector        txt = PG_GETARG_TSVECTOR(0);
465         TSQuery         query = PG_GETARG_TSQUERY(1);
466         int                     method = PG_GETARG_INT32(2);
467         float           res;
468
469         res = calc_rank(getWeights(NULL), txt, query, method);
470
471         PG_FREE_IF_COPY(txt, 0);
472         PG_FREE_IF_COPY(query, 1);
473         PG_RETURN_FLOAT4(res);
474 }
475
476 Datum
477 ts_rank_tt(PG_FUNCTION_ARGS)
478 {
479         TSVector        txt = PG_GETARG_TSVECTOR(0);
480         TSQuery         query = PG_GETARG_TSQUERY(1);
481         float           res;
482
483         res = calc_rank(getWeights(NULL), txt, query, DEF_NORM_METHOD);
484
485         PG_FREE_IF_COPY(txt, 0);
486         PG_FREE_IF_COPY(query, 1);
487         PG_RETURN_FLOAT4(res);
488 }
489
490 typedef struct
491 {
492         QueryItem **item;
493         int16           nitem;
494         uint8           wclass;
495         int32           pos;
496 } DocRepresentation;
497
498 static int
499 compareDocR(const void *va, const void *vb)
500 {
501         const DocRepresentation *a = (const DocRepresentation *) va;
502         const DocRepresentation *b = (const DocRepresentation *) vb;
503
504         if (a->pos == b->pos)
505                 return 0;
506         return (a->pos > b->pos) ? 1 : -1;
507 }
508
509 typedef struct
510 {
511         TSQuery         query;
512         bool       *operandexist;
513 } QueryRepresentation;
514
515 #define QR_GET_OPERAND_EXISTS(q, v)             ( (q)->operandexist[ ((QueryItem*)(v)) - GETQUERY((q)->query) ] )
516 #define QR_SET_OPERAND_EXISTS(q, v)  QR_GET_OPERAND_EXISTS(q,v) = true
517
518 static bool
519 checkcondition_QueryOperand(void *checkval, QueryOperand *val)
520 {
521         QueryRepresentation *qr = (QueryRepresentation *) checkval;
522
523         return QR_GET_OPERAND_EXISTS(qr, val);
524 }
525
526 typedef struct
527 {
528         int                     pos;
529         int                     p;
530         int                     q;
531         DocRepresentation *begin;
532         DocRepresentation *end;
533 } CoverExt;
534
535
536 static bool
537 Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
538 {
539         DocRepresentation *ptr;
540         int                     lastpos = ext->pos;
541         int                     i;
542         bool            found = false;
543
544         /*
545          * since this function recurses, it could be driven to stack overflow.
546          * (though any decent compiler will optimize away the tail-recursion.
547          */
548         check_stack_depth();
549
550         memset(qr->operandexist, 0, sizeof(bool) * qr->query->size);
551
552         ext->p = 0x7fffffff;
553         ext->q = 0;
554         ptr = doc + ext->pos;
555
556         /* find upper bound of cover from current position, move up */
557         while (ptr - doc < len)
558         {
559                 for (i = 0; i < ptr->nitem; i++)
560                 {
561                         if (ptr->item[i]->type == QI_VAL)
562                                 QR_SET_OPERAND_EXISTS(qr, ptr->item[i]);
563                 }
564                 if (TS_execute(GETQUERY(qr->query), (void *) qr, false, checkcondition_QueryOperand))
565                 {
566                         if (ptr->pos > ext->q)
567                         {
568                                 ext->q = ptr->pos;
569                                 ext->end = ptr;
570                                 lastpos = ptr - doc;
571                                 found = true;
572                         }
573                         break;
574                 }
575                 ptr++;
576         }
577
578         if (!found)
579                 return false;
580
581         memset(qr->operandexist, 0, sizeof(bool) * qr->query->size);
582
583         ptr = doc + lastpos;
584
585         /* find lower bound of cover from found upper bound, move down */
586         while (ptr >= doc + ext->pos)
587         {
588                 for (i = 0; i < ptr->nitem; i++)
589                         if (ptr->item[i]->type == QI_VAL)
590                                 QR_SET_OPERAND_EXISTS(qr, ptr->item[i]);
591                 if (TS_execute(GETQUERY(qr->query), (void *) qr, true, checkcondition_QueryOperand))
592                 {
593                         if (ptr->pos < ext->p)
594                         {
595                                 ext->begin = ptr;
596                                 ext->p = ptr->pos;
597                         }
598                         break;
599                 }
600                 ptr--;
601         }
602
603         if (ext->p <= ext->q)
604         {
605                 /*
606                  * set position for next try to next lexeme after beginning of found
607                  * cover
608                  */
609                 ext->pos = (ptr - doc) + 1;
610                 return true;
611         }
612
613         ext->pos++;
614         return Cover(doc, len, qr, ext);
615 }
616
617 static DocRepresentation *
618 get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
619 {
620         QueryItem  *item = GETQUERY(qr->query);
621         WordEntry  *entry,
622                            *firstentry;
623         WordEntryPos *post;
624         int32           dimt,
625                                 j,
626                                 i,
627                                 nitem;
628         int                     len = qr->query->size * 4,
629                                 cur = 0;
630         DocRepresentation *doc;
631         char       *operand;
632
633         doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
634         operand = GETOPERAND(qr->query);
635
636         for (i = 0; i < qr->query->size; i++)
637         {
638                 QueryOperand *curoperand;
639
640                 if (item[i].type != QI_VAL)
641                         continue;
642
643                 curoperand = &item[i].qoperand;
644
645                 if (QR_GET_OPERAND_EXISTS(qr, &item[i]))
646                         continue;
647
648                 firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
649                 if (!entry)
650                         continue;
651
652                 while (entry - firstentry < nitem)
653                 {
654                         if (entry->haspos)
655                         {
656                                 dimt = POSDATALEN(txt, entry);
657                                 post = POSDATAPTR(txt, entry);
658                         }
659                         else
660                         {
661                                 dimt = POSNULL.npos;
662                                 post = POSNULL.pos;
663                         }
664
665                         while (cur + dimt >= len)
666                         {
667                                 len *= 2;
668                                 doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
669                         }
670
671                         for (j = 0; j < dimt; j++)
672                         {
673                                 if (j == 0)
674                                 {
675                                         int                     k;
676
677                                         doc[cur].nitem = 0;
678                                         doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size);
679
680                                         for (k = 0; k < qr->query->size; k++)
681                                         {
682                                                 QueryOperand *kptr = &item[k].qoperand;
683                                                 QueryOperand *iptr = &item[i].qoperand;
684
685                                                 if (k == i ||
686                                                         (item[k].type == QI_VAL &&
687                                                          compareQueryOperand(&kptr, &iptr, operand) == 0))
688                                                 {
689                                                         /*
690                                                          * if k == i, we've already checked above that
691                                                          * it's type == Q_VAL
692                                                          */
693                                                         doc[cur].item[doc[cur].nitem] = item + k;
694                                                         doc[cur].nitem++;
695                                                         QR_SET_OPERAND_EXISTS(qr, item + k);
696                                                 }
697                                         }
698                                 }
699                                 else
700                                 {
701                                         doc[cur].nitem = doc[cur - 1].nitem;
702                                         doc[cur].item = doc[cur - 1].item;
703                                 }
704                                 doc[cur].pos = WEP_GETPOS(post[j]);
705                                 doc[cur].wclass = WEP_GETWEIGHT(post[j]);
706                                 cur++;
707                         }
708
709                         entry++;
710                 }
711         }
712
713         *doclen = cur;
714
715         if (cur > 0)
716         {
717                 qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
718                 return doc;
719         }
720
721         pfree(doc);
722         return NULL;
723 }
724
725 static float4
726 calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
727 {
728         DocRepresentation *doc;
729         int                     len,
730                                 i,
731                                 doclen = 0;
732         CoverExt        ext;
733         double          Wdoc = 0.0;
734         double          invws[lengthof(weights)];
735         double          SumDist = 0.0,
736                                 PrevExtPos = 0.0,
737                                 CurExtPos = 0.0;
738         int                     NExtent = 0;
739         QueryRepresentation qr;
740
741
742         for (i = 0; i < lengthof(weights); i++)
743         {
744                 invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
745                 if (invws[i] > 1.0)
746                         ereport(ERROR,
747                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
748                                          errmsg("weight out of range")));
749                 invws[i] = 1.0 / invws[i];
750         }
751
752         qr.query = query;
753         qr.operandexist = (bool *) palloc0(sizeof(bool) * query->size);
754
755         doc = get_docrep(txt, &qr, &doclen);
756         if (!doc)
757         {
758                 pfree(qr.operandexist);
759                 return 0.0;
760         }
761
762         MemSet(&ext, 0, sizeof(CoverExt));
763         while (Cover(doc, doclen, &qr, &ext))
764         {
765                 double          Cpos = 0.0;
766                 double          InvSum = 0.0;
767                 int                     nNoise;
768                 DocRepresentation *ptr = ext.begin;
769
770                 while (ptr <= ext.end)
771                 {
772                         InvSum += invws[ptr->wclass];
773                         ptr++;
774                 }
775
776                 Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
777
778                 /*
779                  * if doc are big enough then ext.q may be equal to ext.p due to limit
780                  * of posional information. In this case we approximate number of
781                  * noise word as half cover's length
782                  */
783                 nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
784                 if (nNoise < 0)
785                         nNoise = (ext.end - ext.begin) / 2;
786                 Wdoc += Cpos / ((double) (1 + nNoise));
787
788                 CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
789                 if (NExtent > 0 && CurExtPos > PrevExtPos               /* prevent devision by
790                                                                                                                  * zero in a case of
791                                 multiple lexize */ )
792                         SumDist += 1.0 / (CurExtPos - PrevExtPos);
793
794                 PrevExtPos = CurExtPos;
795                 NExtent++;
796         }
797
798         if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
799                 Wdoc /= log((double) (cnt_length(txt) + 1));
800
801         if (method & RANK_NORM_LENGTH)
802         {
803                 len = cnt_length(txt);
804                 if (len > 0)
805                         Wdoc /= (double) len;
806         }
807
808         if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0)
809                 Wdoc /= ((double) NExtent) / SumDist;
810
811         if ((method & RANK_NORM_UNIQ) && txt->size > 0)
812                 Wdoc /= (double) (txt->size);
813
814         if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
815                 Wdoc /= log((double) (txt->size + 1)) / log(2.0);
816
817         if (method & RANK_NORM_RDIVRPLUS1)
818                 Wdoc /= (Wdoc + 1);
819
820         pfree(doc);
821
822         pfree(qr.operandexist);
823
824         return (float4) Wdoc;
825 }
826
827 Datum
828 ts_rankcd_wttf(PG_FUNCTION_ARGS)
829 {
830         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
831         TSVector        txt = PG_GETARG_TSVECTOR(1);
832         TSQuery         query = PG_GETARG_TSQUERY(2);
833         int                     method = PG_GETARG_INT32(3);
834         float           res;
835
836         res = calc_rank_cd(getWeights(win), txt, query, method);
837
838         PG_FREE_IF_COPY(win, 0);
839         PG_FREE_IF_COPY(txt, 1);
840         PG_FREE_IF_COPY(query, 2);
841         PG_RETURN_FLOAT4(res);
842 }
843
844 Datum
845 ts_rankcd_wtt(PG_FUNCTION_ARGS)
846 {
847         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
848         TSVector        txt = PG_GETARG_TSVECTOR(1);
849         TSQuery         query = PG_GETARG_TSQUERY(2);
850         float           res;
851
852         res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
853
854         PG_FREE_IF_COPY(win, 0);
855         PG_FREE_IF_COPY(txt, 1);
856         PG_FREE_IF_COPY(query, 2);
857         PG_RETURN_FLOAT4(res);
858 }
859
860 Datum
861 ts_rankcd_ttf(PG_FUNCTION_ARGS)
862 {
863         TSVector        txt = PG_GETARG_TSVECTOR(0);
864         TSQuery         query = PG_GETARG_TSQUERY(1);
865         int                     method = PG_GETARG_INT32(2);
866         float           res;
867
868         res = calc_rank_cd(getWeights(NULL), txt, query, method);
869
870         PG_FREE_IF_COPY(txt, 0);
871         PG_FREE_IF_COPY(query, 1);
872         PG_RETURN_FLOAT4(res);
873 }
874
875 Datum
876 ts_rankcd_tt(PG_FUNCTION_ARGS)
877 {
878         TSVector        txt = PG_GETARG_TSVECTOR(0);
879         TSQuery         query = PG_GETARG_TSQUERY(1);
880         float           res;
881
882         res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
883
884         PG_FREE_IF_COPY(txt, 0);
885         PG_FREE_IF_COPY(query, 1);
886         PG_RETURN_FLOAT4(res);
887 }