]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/tsrank.c
Update copyright for 2016
[postgresql] / src / backend / utils / adt / tsrank.c
1 /*-------------------------------------------------------------------------
2  *
3  * tsrank.c
4  *              rank tsvector by tsquery
5  *
6  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *        src/backend/utils/adt/tsrank.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15
16 #include <limits.h>
17 #include <math.h>
18
19 #include "tsearch/ts_utils.h"
20 #include "utils/array.h"
21 #include "miscadmin.h"
22
23
24 static const float weights[] = {0.1f, 0.2f, 0.4f, 1.0f};
25
26 #define wpos(wep)       ( w[ WEP_GETWEIGHT(wep) ] )
27
28 #define RANK_NO_NORM                    0x00
29 #define RANK_NORM_LOGLENGTH             0x01
30 #define RANK_NORM_LENGTH                0x02
31 #define RANK_NORM_EXTDIST               0x04
32 #define RANK_NORM_UNIQ                  0x08
33 #define RANK_NORM_LOGUNIQ               0x10
34 #define RANK_NORM_RDIVRPLUS1    0x20
35 #define DEF_NORM_METHOD                 RANK_NO_NORM
36
37 static float calc_rank_or(const float *w, TSVector t, TSQuery q);
38 static float calc_rank_and(const float *w, TSVector t, TSQuery q);
39
40 /*
41  * Returns a weight of a word collocation
42  */
43 static float4
44 word_distance(int32 w)
45 {
46         if (w > 100)
47                 return 1e-30f;
48
49         return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
50 }
51
52 static int
53 cnt_length(TSVector t)
54 {
55         WordEntry  *ptr = ARRPTR(t),
56                            *end = (WordEntry *) STRPTR(t);
57         int                     len = 0;
58
59         while (ptr < end)
60         {
61                 int                     clen = POSDATALEN(t, ptr);
62
63                 if (clen == 0)
64                         len += 1;
65                 else
66                         len += clen;
67
68                 ptr++;
69         }
70
71         return len;
72 }
73
74
75 #define WordECompareQueryItem(e,q,p,i,m) \
76         tsCompareString((q) + (i)->distance, (i)->length,       \
77                                         (e) + (p)->pos, (p)->len, (m))
78
79
80 /*
81  * Returns a pointer to a WordEntry's array corresponding to 'item' from
82  * tsvector 't'. 'q' is the TSQuery containing 'item'.
83  * Returns NULL if not found.
84  */
85 static WordEntry *
86 find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
87 {
88         WordEntry  *StopLow = ARRPTR(t);
89         WordEntry  *StopHigh = (WordEntry *) STRPTR(t);
90         WordEntry  *StopMiddle = StopHigh;
91         int                     difference;
92
93         *nitem = 0;
94
95         /* Loop invariant: StopLow <= item < StopHigh */
96         while (StopLow < StopHigh)
97         {
98                 StopMiddle = StopLow + (StopHigh - StopLow) / 2;
99                 difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
100                 if (difference == 0)
101                 {
102                         StopHigh = StopMiddle;
103                         *nitem = 1;
104                         break;
105                 }
106                 else if (difference > 0)
107                         StopLow = StopMiddle + 1;
108                 else
109                         StopHigh = StopMiddle;
110         }
111
112         if (item->prefix)
113         {
114                 if (StopLow >= StopHigh)
115                         StopMiddle = StopHigh;
116
117                 *nitem = 0;
118
119                 while (StopMiddle < (WordEntry *) STRPTR(t) &&
120                            WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0)
121                 {
122                         (*nitem)++;
123                         StopMiddle++;
124                 }
125         }
126
127         return (*nitem > 0) ? StopHigh : NULL;
128 }
129
130
131 /*
132  * sort QueryOperands by (length, word)
133  */
134 static int
135 compareQueryOperand(const void *a, const void *b, void *arg)
136 {
137         char       *operand = (char *) arg;
138         QueryOperand *qa = (*(QueryOperand *const *) a);
139         QueryOperand *qb = (*(QueryOperand *const *) b);
140
141         return tsCompareString(operand + qa->distance, qa->length,
142                                                    operand + qb->distance, qb->length,
143                                                    false);
144 }
145
146 /*
147  * Returns a sorted, de-duplicated array of QueryOperands in a query.
148  * The returned QueryOperands are pointers to the original QueryOperands
149  * in the query.
150  *
151  * Length of the returned array is stored in *size
152  */
153 static QueryOperand **
154 SortAndUniqItems(TSQuery q, int *size)
155 {
156         char       *operand = GETOPERAND(q);
157         QueryItem  *item = GETQUERY(q);
158         QueryOperand **res,
159                           **ptr,
160                           **prevptr;
161
162         ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
163
164         /* Collect all operands from the tree to res */
165         while ((*size)--)
166         {
167                 if (item->type == QI_VAL)
168                 {
169                         *ptr = (QueryOperand *) item;
170                         ptr++;
171                 }
172                 item++;
173         }
174
175         *size = ptr - res;
176         if (*size < 2)
177                 return res;
178
179         qsort_arg(res, *size, sizeof(QueryOperand *), compareQueryOperand, (void *) operand);
180
181         ptr = res + 1;
182         prevptr = res;
183
184         /* remove duplicates */
185         while (ptr - res < *size)
186         {
187                 if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0)
188                 {
189                         prevptr++;
190                         *prevptr = *ptr;
191                 }
192                 ptr++;
193         }
194
195         *size = prevptr + 1 - res;
196         return res;
197 }
198
199 static float
200 calc_rank_and(const float *w, TSVector t, TSQuery q)
201 {
202         WordEntryPosVector **pos;
203         WordEntryPosVector1 posnull;
204         WordEntryPosVector *POSNULL;
205         int                     i,
206                                 k,
207                                 l,
208                                 p;
209         WordEntry  *entry,
210                            *firstentry;
211         WordEntryPos *post,
212                            *ct;
213         int32           dimt,
214                                 lenct,
215                                 dist,
216                                 nitem;
217         float           res = -1.0;
218         QueryOperand **item;
219         int                     size = q->size;
220
221         item = SortAndUniqItems(q, &size);
222         if (size < 2)
223         {
224                 pfree(item);
225                 return calc_rank_or(w, t, q);
226         }
227         pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size);
228
229         /* A dummy WordEntryPos array to use when haspos is false */
230         posnull.npos = 1;
231         posnull.pos[0] = 0;
232         WEP_SETPOS(posnull.pos[0], MAXENTRYPOS - 1);
233         POSNULL = (WordEntryPosVector *) &posnull;
234
235         for (i = 0; i < size; i++)
236         {
237                 firstentry = entry = find_wordentry(t, q, item[i], &nitem);
238                 if (!entry)
239                         continue;
240
241                 while (entry - firstentry < nitem)
242                 {
243                         if (entry->haspos)
244                                 pos[i] = _POSVECPTR(t, entry);
245                         else
246                                 pos[i] = POSNULL;
247
248                         dimt = pos[i]->npos;
249                         post = pos[i]->pos;
250                         for (k = 0; k < i; k++)
251                         {
252                                 if (!pos[k])
253                                         continue;
254                                 lenct = pos[k]->npos;
255                                 ct = pos[k]->pos;
256                                 for (l = 0; l < dimt; l++)
257                                 {
258                                         for (p = 0; p < lenct; p++)
259                                         {
260                                                 dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
261                                                 if (dist || (dist == 0 && (pos[i] == POSNULL || pos[k] == POSNULL)))
262                                                 {
263                                                         float           curw;
264
265                                                         if (!dist)
266                                                                 dist = MAXENTRYPOS;
267                                                         curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
268                                                         res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
269                                                 }
270                                         }
271                                 }
272                         }
273
274                         entry++;
275                 }
276         }
277         pfree(pos);
278         pfree(item);
279         return res;
280 }
281
282 static float
283 calc_rank_or(const float *w, TSVector t, TSQuery q)
284 {
285         WordEntry  *entry,
286                            *firstentry;
287         WordEntryPosVector1 posnull;
288         WordEntryPos *post;
289         int32           dimt,
290                                 j,
291                                 i,
292                                 nitem;
293         float           res = 0.0;
294         QueryOperand **item;
295         int                     size = q->size;
296
297         /* A dummy WordEntryPos array to use when haspos is false */
298         posnull.npos = 1;
299         posnull.pos[0] = 0;
300
301         item = SortAndUniqItems(q, &size);
302
303         for (i = 0; i < size; i++)
304         {
305                 float           resj,
306                                         wjm;
307                 int32           jm;
308
309                 firstentry = entry = find_wordentry(t, q, item[i], &nitem);
310                 if (!entry)
311                         continue;
312
313                 while (entry - firstentry < nitem)
314                 {
315                         if (entry->haspos)
316                         {
317                                 dimt = POSDATALEN(t, entry);
318                                 post = POSDATAPTR(t, entry);
319                         }
320                         else
321                         {
322                                 dimt = posnull.npos;
323                                 post = posnull.pos;
324                         }
325
326                         resj = 0.0;
327                         wjm = -1.0;
328                         jm = 0;
329                         for (j = 0; j < dimt; j++)
330                         {
331                                 resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
332                                 if (wpos(post[j]) > wjm)
333                                 {
334                                         wjm = wpos(post[j]);
335                                         jm = j;
336                                 }
337                         }
338 /*
339                         limit (sum(i/i^2),i->inf) = pi^2/6
340                         resj = sum(wi/i^2),i=1,noccurence,
341                         wi - should be sorted desc,
342                         don't sort for now, just choose maximum weight. This should be corrected
343                         Oleg Bartunov
344 */
345                         res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
346
347                         entry++;
348                 }
349         }
350         if (size > 0)
351                 res = res / size;
352         pfree(item);
353         return res;
354 }
355
356 static float
357 calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
358 {
359         QueryItem  *item = GETQUERY(q);
360         float           res = 0.0;
361         int                     len;
362
363         if (!t->size || !q->size)
364                 return 0.0;
365
366         /* XXX: What about NOT? */
367         res = (item->type == QI_OPR && item->qoperator.oper == OP_AND) ?
368                 calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
369
370         if (res < 0)
371                 res = 1e-20f;
372
373         if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
374                 res /= log((double) (cnt_length(t) + 1)) / log(2.0);
375
376         if (method & RANK_NORM_LENGTH)
377         {
378                 len = cnt_length(t);
379                 if (len > 0)
380                         res /= (float) len;
381         }
382
383         /* RANK_NORM_EXTDIST not applicable */
384
385         if ((method & RANK_NORM_UNIQ) && t->size > 0)
386                 res /= (float) (t->size);
387
388         if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
389                 res /= log((double) (t->size + 1)) / log(2.0);
390
391         if (method & RANK_NORM_RDIVRPLUS1)
392                 res /= (res + 1);
393
394         return res;
395 }
396
397 static const float *
398 getWeights(ArrayType *win)
399 {
400         static float ws[lengthof(weights)];
401         int                     i;
402         float4     *arrdata;
403
404         if (win == NULL)
405                 return weights;
406
407         if (ARR_NDIM(win) != 1)
408                 ereport(ERROR,
409                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
410                                  errmsg("array of weight must be one-dimensional")));
411
412         if (ArrayGetNItems(ARR_NDIM(win), ARR_DIMS(win)) < lengthof(weights))
413                 ereport(ERROR,
414                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
415                                  errmsg("array of weight is too short")));
416
417         if (array_contains_nulls(win))
418                 ereport(ERROR,
419                                 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
420                                  errmsg("array of weight must not contain nulls")));
421
422         arrdata = (float4 *) ARR_DATA_PTR(win);
423         for (i = 0; i < lengthof(weights); i++)
424         {
425                 ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
426                 if (ws[i] > 1.0)
427                         ereport(ERROR,
428                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
429                                          errmsg("weight out of range")));
430         }
431
432         return ws;
433 }
434
435 Datum
436 ts_rank_wttf(PG_FUNCTION_ARGS)
437 {
438         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
439         TSVector        txt = PG_GETARG_TSVECTOR(1);
440         TSQuery         query = PG_GETARG_TSQUERY(2);
441         int                     method = PG_GETARG_INT32(3);
442         float           res;
443
444         res = calc_rank(getWeights(win), txt, query, method);
445
446         PG_FREE_IF_COPY(win, 0);
447         PG_FREE_IF_COPY(txt, 1);
448         PG_FREE_IF_COPY(query, 2);
449         PG_RETURN_FLOAT4(res);
450 }
451
452 Datum
453 ts_rank_wtt(PG_FUNCTION_ARGS)
454 {
455         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
456         TSVector        txt = PG_GETARG_TSVECTOR(1);
457         TSQuery         query = PG_GETARG_TSQUERY(2);
458         float           res;
459
460         res = calc_rank(getWeights(win), txt, query, DEF_NORM_METHOD);
461
462         PG_FREE_IF_COPY(win, 0);
463         PG_FREE_IF_COPY(txt, 1);
464         PG_FREE_IF_COPY(query, 2);
465         PG_RETURN_FLOAT4(res);
466 }
467
468 Datum
469 ts_rank_ttf(PG_FUNCTION_ARGS)
470 {
471         TSVector        txt = PG_GETARG_TSVECTOR(0);
472         TSQuery         query = PG_GETARG_TSQUERY(1);
473         int                     method = PG_GETARG_INT32(2);
474         float           res;
475
476         res = calc_rank(getWeights(NULL), txt, query, method);
477
478         PG_FREE_IF_COPY(txt, 0);
479         PG_FREE_IF_COPY(query, 1);
480         PG_RETURN_FLOAT4(res);
481 }
482
483 Datum
484 ts_rank_tt(PG_FUNCTION_ARGS)
485 {
486         TSVector        txt = PG_GETARG_TSVECTOR(0);
487         TSQuery         query = PG_GETARG_TSQUERY(1);
488         float           res;
489
490         res = calc_rank(getWeights(NULL), txt, query, DEF_NORM_METHOD);
491
492         PG_FREE_IF_COPY(txt, 0);
493         PG_FREE_IF_COPY(query, 1);
494         PG_RETURN_FLOAT4(res);
495 }
496
497 typedef struct
498 {
499         QueryItem **item;
500         int16           nitem;
501         uint8           wclass;
502         int32           pos;
503 } DocRepresentation;
504
505 static int
506 compareDocR(const void *va, const void *vb)
507 {
508         const DocRepresentation *a = (const DocRepresentation *) va;
509         const DocRepresentation *b = (const DocRepresentation *) vb;
510
511         if (a->pos == b->pos)
512                 return 0;
513         return (a->pos > b->pos) ? 1 : -1;
514 }
515
516 typedef struct
517 {
518         TSQuery         query;
519         bool       *operandexist;
520 } QueryRepresentation;
521
522 #define QR_GET_OPERAND_EXISTS(q, v)             ( (q)->operandexist[ ((QueryItem*)(v)) - GETQUERY((q)->query) ] )
523 #define QR_SET_OPERAND_EXISTS(q, v)  QR_GET_OPERAND_EXISTS(q,v) = true
524
525 static bool
526 checkcondition_QueryOperand(void *checkval, QueryOperand *val)
527 {
528         QueryRepresentation *qr = (QueryRepresentation *) checkval;
529
530         return QR_GET_OPERAND_EXISTS(qr, val);
531 }
532
533 typedef struct
534 {
535         int                     pos;
536         int                     p;
537         int                     q;
538         DocRepresentation *begin;
539         DocRepresentation *end;
540 } CoverExt;
541
542
543 static bool
544 Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
545 {
546         DocRepresentation *ptr;
547         int                     lastpos = ext->pos;
548         int                     i;
549         bool            found = false;
550
551         /*
552          * since this function recurses, it could be driven to stack overflow.
553          * (though any decent compiler will optimize away the tail-recursion.
554          */
555         check_stack_depth();
556
557         memset(qr->operandexist, 0, sizeof(bool) * qr->query->size);
558
559         ext->p = INT_MAX;
560         ext->q = 0;
561         ptr = doc + ext->pos;
562
563         /* find upper bound of cover from current position, move up */
564         while (ptr - doc < len)
565         {
566                 for (i = 0; i < ptr->nitem; i++)
567                 {
568                         if (ptr->item[i]->type == QI_VAL)
569                                 QR_SET_OPERAND_EXISTS(qr, ptr->item[i]);
570                 }
571                 if (TS_execute(GETQUERY(qr->query), (void *) qr, false, checkcondition_QueryOperand))
572                 {
573                         if (ptr->pos > ext->q)
574                         {
575                                 ext->q = ptr->pos;
576                                 ext->end = ptr;
577                                 lastpos = ptr - doc;
578                                 found = true;
579                         }
580                         break;
581                 }
582                 ptr++;
583         }
584
585         if (!found)
586                 return false;
587
588         memset(qr->operandexist, 0, sizeof(bool) * qr->query->size);
589
590         ptr = doc + lastpos;
591
592         /* find lower bound of cover from found upper bound, move down */
593         while (ptr >= doc + ext->pos)
594         {
595                 for (i = 0; i < ptr->nitem; i++)
596                         if (ptr->item[i]->type == QI_VAL)
597                                 QR_SET_OPERAND_EXISTS(qr, ptr->item[i]);
598                 if (TS_execute(GETQUERY(qr->query), (void *) qr, true, checkcondition_QueryOperand))
599                 {
600                         if (ptr->pos < ext->p)
601                         {
602                                 ext->begin = ptr;
603                                 ext->p = ptr->pos;
604                         }
605                         break;
606                 }
607                 ptr--;
608         }
609
610         if (ext->p <= ext->q)
611         {
612                 /*
613                  * set position for next try to next lexeme after beginning of found
614                  * cover
615                  */
616                 ext->pos = (ptr - doc) + 1;
617                 return true;
618         }
619
620         ext->pos++;
621         return Cover(doc, len, qr, ext);
622 }
623
624 static DocRepresentation *
625 get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
626 {
627         QueryItem  *item = GETQUERY(qr->query);
628         WordEntry  *entry,
629                            *firstentry;
630         WordEntryPos *post;
631         int32           dimt,
632                                 j,
633                                 i,
634                                 nitem;
635         int                     len = qr->query->size * 4,
636                                 cur = 0;
637         DocRepresentation *doc;
638         char       *operand;
639
640         doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
641         operand = GETOPERAND(qr->query);
642
643         for (i = 0; i < qr->query->size; i++)
644         {
645                 QueryOperand *curoperand;
646
647                 if (item[i].type != QI_VAL)
648                         continue;
649
650                 curoperand = &item[i].qoperand;
651
652                 if (QR_GET_OPERAND_EXISTS(qr, &item[i]))
653                         continue;
654
655                 firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
656                 if (!entry)
657                         continue;
658
659                 while (entry - firstentry < nitem)
660                 {
661                         if (entry->haspos)
662                         {
663                                 dimt = POSDATALEN(txt, entry);
664                                 post = POSDATAPTR(txt, entry);
665                         }
666                         else
667                         {
668                                 /* ignore words without positions */
669                                 entry++;
670                                 continue;
671                         }
672
673                         while (cur + dimt >= len)
674                         {
675                                 len *= 2;
676                                 doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
677                         }
678
679                         for (j = 0; j < dimt; j++)
680                         {
681                                 if (j == 0)
682                                 {
683                                         int                     k;
684
685                                         doc[cur].nitem = 0;
686                                         doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size);
687
688                                         for (k = 0; k < qr->query->size; k++)
689                                         {
690                                                 QueryOperand *kptr = &item[k].qoperand;
691                                                 QueryOperand *iptr = &item[i].qoperand;
692
693                                                 if (k == i ||
694                                                         (item[k].type == QI_VAL &&
695                                                          compareQueryOperand(&kptr, &iptr, operand) == 0))
696                                                 {
697                                                         /*
698                                                          * if k == i, we've already checked above that
699                                                          * it's type == Q_VAL
700                                                          */
701                                                         doc[cur].item[doc[cur].nitem] = item + k;
702                                                         doc[cur].nitem++;
703                                                         QR_SET_OPERAND_EXISTS(qr, item + k);
704                                                 }
705                                         }
706                                 }
707                                 else
708                                 {
709                                         doc[cur].nitem = doc[cur - 1].nitem;
710                                         doc[cur].item = doc[cur - 1].item;
711                                 }
712                                 doc[cur].pos = WEP_GETPOS(post[j]);
713                                 doc[cur].wclass = WEP_GETWEIGHT(post[j]);
714                                 cur++;
715                         }
716
717                         entry++;
718                 }
719         }
720
721         *doclen = cur;
722
723         if (cur > 0)
724         {
725                 qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
726                 return doc;
727         }
728
729         pfree(doc);
730         return NULL;
731 }
732
733 static float4
734 calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
735 {
736         DocRepresentation *doc;
737         int                     len,
738                                 i,
739                                 doclen = 0;
740         CoverExt        ext;
741         double          Wdoc = 0.0;
742         double          invws[lengthof(weights)];
743         double          SumDist = 0.0,
744                                 PrevExtPos = 0.0,
745                                 CurExtPos = 0.0;
746         int                     NExtent = 0;
747         QueryRepresentation qr;
748
749
750         for (i = 0; i < lengthof(weights); i++)
751         {
752                 invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
753                 if (invws[i] > 1.0)
754                         ereport(ERROR,
755                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
756                                          errmsg("weight out of range")));
757                 invws[i] = 1.0 / invws[i];
758         }
759
760         qr.query = query;
761         qr.operandexist = (bool *) palloc0(sizeof(bool) * query->size);
762
763         doc = get_docrep(txt, &qr, &doclen);
764         if (!doc)
765         {
766                 pfree(qr.operandexist);
767                 return 0.0;
768         }
769
770         MemSet(&ext, 0, sizeof(CoverExt));
771         while (Cover(doc, doclen, &qr, &ext))
772         {
773                 double          Cpos = 0.0;
774                 double          InvSum = 0.0;
775                 int                     nNoise;
776                 DocRepresentation *ptr = ext.begin;
777
778                 while (ptr <= ext.end)
779                 {
780                         InvSum += invws[ptr->wclass];
781                         ptr++;
782                 }
783
784                 Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
785
786                 /*
787                  * if doc are big enough then ext.q may be equal to ext.p due to limit
788                  * of posional information. In this case we approximate number of
789                  * noise word as half cover's length
790                  */
791                 nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
792                 if (nNoise < 0)
793                         nNoise = (ext.end - ext.begin) / 2;
794                 Wdoc += Cpos / ((double) (1 + nNoise));
795
796                 CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
797                 if (NExtent > 0 && CurExtPos > PrevExtPos               /* prevent devision by
798                                                                                                                  * zero in a case of
799                                 multiple lexize */ )
800                         SumDist += 1.0 / (CurExtPos - PrevExtPos);
801
802                 PrevExtPos = CurExtPos;
803                 NExtent++;
804         }
805
806         if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
807                 Wdoc /= log((double) (cnt_length(txt) + 1));
808
809         if (method & RANK_NORM_LENGTH)
810         {
811                 len = cnt_length(txt);
812                 if (len > 0)
813                         Wdoc /= (double) len;
814         }
815
816         if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0)
817                 Wdoc /= ((double) NExtent) / SumDist;
818
819         if ((method & RANK_NORM_UNIQ) && txt->size > 0)
820                 Wdoc /= (double) (txt->size);
821
822         if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
823                 Wdoc /= log((double) (txt->size + 1)) / log(2.0);
824
825         if (method & RANK_NORM_RDIVRPLUS1)
826                 Wdoc /= (Wdoc + 1);
827
828         pfree(doc);
829
830         pfree(qr.operandexist);
831
832         return (float4) Wdoc;
833 }
834
835 Datum
836 ts_rankcd_wttf(PG_FUNCTION_ARGS)
837 {
838         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
839         TSVector        txt = PG_GETARG_TSVECTOR(1);
840         TSQuery         query = PG_GETARG_TSQUERY(2);
841         int                     method = PG_GETARG_INT32(3);
842         float           res;
843
844         res = calc_rank_cd(getWeights(win), txt, query, method);
845
846         PG_FREE_IF_COPY(win, 0);
847         PG_FREE_IF_COPY(txt, 1);
848         PG_FREE_IF_COPY(query, 2);
849         PG_RETURN_FLOAT4(res);
850 }
851
852 Datum
853 ts_rankcd_wtt(PG_FUNCTION_ARGS)
854 {
855         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
856         TSVector        txt = PG_GETARG_TSVECTOR(1);
857         TSQuery         query = PG_GETARG_TSQUERY(2);
858         float           res;
859
860         res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
861
862         PG_FREE_IF_COPY(win, 0);
863         PG_FREE_IF_COPY(txt, 1);
864         PG_FREE_IF_COPY(query, 2);
865         PG_RETURN_FLOAT4(res);
866 }
867
868 Datum
869 ts_rankcd_ttf(PG_FUNCTION_ARGS)
870 {
871         TSVector        txt = PG_GETARG_TSVECTOR(0);
872         TSQuery         query = PG_GETARG_TSQUERY(1);
873         int                     method = PG_GETARG_INT32(2);
874         float           res;
875
876         res = calc_rank_cd(getWeights(NULL), txt, query, method);
877
878         PG_FREE_IF_COPY(txt, 0);
879         PG_FREE_IF_COPY(query, 1);
880         PG_RETURN_FLOAT4(res);
881 }
882
883 Datum
884 ts_rankcd_tt(PG_FUNCTION_ARGS)
885 {
886         TSVector        txt = PG_GETARG_TSVECTOR(0);
887         TSQuery         query = PG_GETARG_TSQUERY(1);
888         float           res;
889
890         res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
891
892         PG_FREE_IF_COPY(txt, 0);
893         PG_FREE_IF_COPY(query, 1);
894         PG_RETURN_FLOAT4(res);
895 }