]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/tsrank.c
Fix encoding issue when lc_monetary or lc_numeric are different encoding
[postgresql] / src / backend / utils / adt / tsrank.c
1 /*-------------------------------------------------------------------------
2  *
3  * tsrank.c
4  *              rank tsvector by tsquery
5  *
6  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *        $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.17 2010/01/02 16:57:55 momjian Exp $
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15
16 #include <math.h>
17
18 #include "tsearch/ts_type.h"
19 #include "tsearch/ts_utils.h"
20 #include "utils/array.h"
21 #include "miscadmin.h"
22
23
24 static float weights[] = {0.1f, 0.2f, 0.4f, 1.0f};
25
26 #define wpos(wep)       ( w[ WEP_GETWEIGHT(wep) ] )
27
28 #define RANK_NO_NORM                    0x00
29 #define RANK_NORM_LOGLENGTH             0x01
30 #define RANK_NORM_LENGTH                0x02
31 #define RANK_NORM_EXTDIST               0x04
32 #define RANK_NORM_UNIQ                  0x08
33 #define RANK_NORM_LOGUNIQ               0x10
34 #define RANK_NORM_RDIVRPLUS1    0x20
35 #define DEF_NORM_METHOD                 RANK_NO_NORM
36
37 static float calc_rank_or(float *w, TSVector t, TSQuery q);
38 static float calc_rank_and(float *w, TSVector t, TSQuery q);
39
40 /*
41  * Returns a weight of a word collocation
42  */
43 static float4
44 word_distance(int4 w)
45 {
46         if (w > 100)
47                 return 1e-30f;
48
49         return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
50 }
51
52 static int
53 cnt_length(TSVector t)
54 {
55         WordEntry  *ptr = ARRPTR(t),
56                            *end = (WordEntry *) STRPTR(t);
57         int                     len = 0;
58
59         while (ptr < end)
60         {
61                 int                     clen = POSDATALEN(t, ptr);
62
63                 if (clen == 0)
64                         len += 1;
65                 else
66                         len += clen;
67
68                 ptr++;
69         }
70
71         return len;
72 }
73
74
75 #define WordECompareQueryItem(e,q,p,i,m) \
76         tsCompareString((q) + (i)->distance, (i)->length,       \
77                                         (e) + (p)->pos, (p)->len, (m))
78
79
80 /*
81  * Returns a pointer to a WordEntry's array corresponding to 'item' from
82  * tsvector 't'. 'q' is the TSQuery containing 'item'.
83  * Returns NULL if not found.
84  */
85 static WordEntry *
86 find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
87 {
88         WordEntry  *StopLow = ARRPTR(t);
89         WordEntry  *StopHigh = (WordEntry *) STRPTR(t);
90         WordEntry  *StopMiddle = StopHigh;
91         int                     difference;
92
93         *nitem = 0;
94
95         /* Loop invariant: StopLow <= item < StopHigh */
96         while (StopLow < StopHigh)
97         {
98                 StopMiddle = StopLow + (StopHigh - StopLow) / 2;
99                 difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
100                 if (difference == 0)
101                 {
102                         StopHigh = StopMiddle;
103                         *nitem = 1;
104                         break;
105                 }
106                 else if (difference > 0)
107                         StopLow = StopMiddle + 1;
108                 else
109                         StopHigh = StopMiddle;
110         }
111
112         if (item->prefix == true)
113         {
114                 if (StopLow >= StopHigh)
115                         StopMiddle = StopHigh;
116
117                 *nitem = 0;
118
119                 while (StopMiddle < (WordEntry *) STRPTR(t) &&
120                            WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0)
121                 {
122                         (*nitem)++;
123                         StopMiddle++;
124                 }
125         }
126
127         return (*nitem > 0) ? StopHigh : NULL;
128 }
129
130
131 /*
132  * sort QueryOperands by (length, word)
133  */
134 static int
135 compareQueryOperand(const void *a, const void *b, void *arg)
136 {
137         char       *operand = (char *) arg;
138         QueryOperand *qa = (*(QueryOperand **) a);
139         QueryOperand *qb = (*(QueryOperand **) b);
140
141         return tsCompareString(operand + qa->distance, qa->length,
142                                                    operand + qb->distance, qb->length,
143                                                    false);
144 }
145
146 /*
147  * Returns a sorted, de-duplicated array of QueryOperands in a query.
148  * The returned QueryOperands are pointers to the original QueryOperands
149  * in the query.
150  *
151  * Length of the returned array is stored in *size
152  */
153 static QueryOperand **
154 SortAndUniqItems(TSQuery q, int *size)
155 {
156         char       *operand = GETOPERAND(q);
157         QueryItem  *item = GETQUERY(q);
158         QueryOperand **res,
159                           **ptr,
160                           **prevptr;
161
162         ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
163
164         /* Collect all operands from the tree to res */
165         while ((*size)--)
166         {
167                 if (item->type == QI_VAL)
168                 {
169                         *ptr = (QueryOperand *) item;
170                         ptr++;
171                 }
172                 item++;
173         }
174
175         *size = ptr - res;
176         if (*size < 2)
177                 return res;
178
179         qsort_arg(res, *size, sizeof(QueryOperand **), compareQueryOperand, (void *) operand);
180
181         ptr = res + 1;
182         prevptr = res;
183
184         /* remove duplicates */
185         while (ptr - res < *size)
186         {
187                 if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0)
188                 {
189                         prevptr++;
190                         *prevptr = *ptr;
191                 }
192                 ptr++;
193         }
194
195         *size = prevptr + 1 - res;
196         return res;
197 }
198
199 /* A dummy WordEntryPos array to use when haspos is false */
200 static WordEntryPosVector POSNULL = {
201         1,                                                      /* Number of elements that follow */
202         {0}
203 };
204
205 static float
206 calc_rank_and(float *w, TSVector t, TSQuery q)
207 {
208         WordEntryPosVector **pos;
209         int                     i,
210                                 k,
211                                 l,
212                                 p;
213         WordEntry  *entry,
214                            *firstentry;
215         WordEntryPos *post,
216                            *ct;
217         int4            dimt,
218                                 lenct,
219                                 dist,
220                                 nitem;
221         float           res = -1.0;
222         QueryOperand **item;
223         int                     size = q->size;
224
225         item = SortAndUniqItems(q, &size);
226         if (size < 2)
227         {
228                 pfree(item);
229                 return calc_rank_or(w, t, q);
230         }
231         pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size);
232         WEP_SETPOS(POSNULL.pos[0], MAXENTRYPOS - 1);
233
234         for (i = 0; i < size; i++)
235         {
236                 firstentry = entry = find_wordentry(t, q, item[i], &nitem);
237                 if (!entry)
238                         continue;
239
240                 while (entry - firstentry < nitem)
241                 {
242                         if (entry->haspos)
243                                 pos[i] = _POSVECPTR(t, entry);
244                         else
245                                 pos[i] = &POSNULL;
246
247                         dimt = pos[i]->npos;
248                         post = pos[i]->pos;
249                         for (k = 0; k < i; k++)
250                         {
251                                 if (!pos[k])
252                                         continue;
253                                 lenct = pos[k]->npos;
254                                 ct = pos[k]->pos;
255                                 for (l = 0; l < dimt; l++)
256                                 {
257                                         for (p = 0; p < lenct; p++)
258                                         {
259                                                 dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
260                                                 if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL)))
261                                                 {
262                                                         float           curw;
263
264                                                         if (!dist)
265                                                                 dist = MAXENTRYPOS;
266                                                         curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
267                                                         res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
268                                                 }
269                                         }
270                                 }
271                         }
272
273                         entry++;
274                 }
275         }
276         pfree(pos);
277         pfree(item);
278         return res;
279 }
280
281 static float
282 calc_rank_or(float *w, TSVector t, TSQuery q)
283 {
284         WordEntry  *entry,
285                            *firstentry;
286         WordEntryPos *post;
287         int4            dimt,
288                                 j,
289                                 i,
290                                 nitem;
291         float           res = 0.0;
292         QueryOperand **item;
293         int                     size = q->size;
294
295         item = SortAndUniqItems(q, &size);
296
297         for (i = 0; i < size; i++)
298         {
299                 float           resj,
300                                         wjm;
301                 int4            jm;
302
303                 firstentry = entry = find_wordentry(t, q, item[i], &nitem);
304                 if (!entry)
305                         continue;
306
307                 while (entry - firstentry < nitem)
308                 {
309                         if (entry->haspos)
310                         {
311                                 dimt = POSDATALEN(t, entry);
312                                 post = POSDATAPTR(t, entry);
313                         }
314                         else
315                         {
316                                 dimt = POSNULL.npos;
317                                 post = POSNULL.pos;
318                         }
319
320                         resj = 0.0;
321                         wjm = -1.0;
322                         jm = 0;
323                         for (j = 0; j < dimt; j++)
324                         {
325                                 resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
326                                 if (wpos(post[j]) > wjm)
327                                 {
328                                         wjm = wpos(post[j]);
329                                         jm = j;
330                                 }
331                         }
332 /*
333                         limit (sum(i/i^2),i->inf) = pi^2/6
334                         resj = sum(wi/i^2),i=1,noccurence,
335                         wi - should be sorted desc,
336                         don't sort for now, just choose maximum weight. This should be corrected
337                         Oleg Bartunov
338 */
339                         res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
340
341                         entry++;
342                 }
343         }
344         if (size > 0)
345                 res = res / size;
346         pfree(item);
347         return res;
348 }
349
350 static float
351 calc_rank(float *w, TSVector t, TSQuery q, int4 method)
352 {
353         QueryItem  *item = GETQUERY(q);
354         float           res = 0.0;
355         int                     len;
356
357         if (!t->size || !q->size)
358                 return 0.0;
359
360         /* XXX: What about NOT? */
361         res = (item->type == QI_OPR && item->qoperator.oper == OP_AND) ?
362                 calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
363
364         if (res < 0)
365                 res = 1e-20f;
366
367         if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
368                 res /= log((double) (cnt_length(t) + 1)) / log(2.0);
369
370         if (method & RANK_NORM_LENGTH)
371         {
372                 len = cnt_length(t);
373                 if (len > 0)
374                         res /= (float) len;
375         }
376
377         /* RANK_NORM_EXTDIST not applicable */
378
379         if ((method & RANK_NORM_UNIQ) && t->size > 0)
380                 res /= (float) (t->size);
381
382         if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
383                 res /= log((double) (t->size + 1)) / log(2.0);
384
385         if (method & RANK_NORM_RDIVRPLUS1)
386                 res /= (res + 1);
387
388         return res;
389 }
390
391 static float *
392 getWeights(ArrayType *win)
393 {
394         static float ws[lengthof(weights)];
395         int                     i;
396         float4     *arrdata;
397
398         if (win == 0)
399                 return weights;
400
401         if (ARR_NDIM(win) != 1)
402                 ereport(ERROR,
403                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
404                                  errmsg("array of weight must be one-dimensional")));
405
406         if (ArrayGetNItems(ARR_NDIM(win), ARR_DIMS(win)) < lengthof(weights))
407                 ereport(ERROR,
408                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
409                                  errmsg("array of weight is too short")));
410
411         if (ARR_HASNULL(win))
412                 ereport(ERROR,
413                                 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
414                                  errmsg("array of weight must not contain nulls")));
415
416         arrdata = (float4 *) ARR_DATA_PTR(win);
417         for (i = 0; i < lengthof(weights); i++)
418         {
419                 ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
420                 if (ws[i] > 1.0)
421                         ereport(ERROR,
422                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
423                                          errmsg("weight out of range")));
424         }
425
426         return ws;
427 }
428
429 Datum
430 ts_rank_wttf(PG_FUNCTION_ARGS)
431 {
432         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
433         TSVector        txt = PG_GETARG_TSVECTOR(1);
434         TSQuery         query = PG_GETARG_TSQUERY(2);
435         int                     method = PG_GETARG_INT32(3);
436         float           res;
437
438         res = calc_rank(getWeights(win), txt, query, method);
439
440         PG_FREE_IF_COPY(win, 0);
441         PG_FREE_IF_COPY(txt, 1);
442         PG_FREE_IF_COPY(query, 2);
443         PG_RETURN_FLOAT4(res);
444 }
445
446 Datum
447 ts_rank_wtt(PG_FUNCTION_ARGS)
448 {
449         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
450         TSVector        txt = PG_GETARG_TSVECTOR(1);
451         TSQuery         query = PG_GETARG_TSQUERY(2);
452         float           res;
453
454         res = calc_rank(getWeights(win), txt, query, DEF_NORM_METHOD);
455
456         PG_FREE_IF_COPY(win, 0);
457         PG_FREE_IF_COPY(txt, 1);
458         PG_FREE_IF_COPY(query, 2);
459         PG_RETURN_FLOAT4(res);
460 }
461
462 Datum
463 ts_rank_ttf(PG_FUNCTION_ARGS)
464 {
465         TSVector        txt = PG_GETARG_TSVECTOR(0);
466         TSQuery         query = PG_GETARG_TSQUERY(1);
467         int                     method = PG_GETARG_INT32(2);
468         float           res;
469
470         res = calc_rank(getWeights(NULL), txt, query, method);
471
472         PG_FREE_IF_COPY(txt, 0);
473         PG_FREE_IF_COPY(query, 1);
474         PG_RETURN_FLOAT4(res);
475 }
476
477 Datum
478 ts_rank_tt(PG_FUNCTION_ARGS)
479 {
480         TSVector        txt = PG_GETARG_TSVECTOR(0);
481         TSQuery         query = PG_GETARG_TSQUERY(1);
482         float           res;
483
484         res = calc_rank(getWeights(NULL), txt, query, DEF_NORM_METHOD);
485
486         PG_FREE_IF_COPY(txt, 0);
487         PG_FREE_IF_COPY(query, 1);
488         PG_RETURN_FLOAT4(res);
489 }
490
491 typedef struct
492 {
493         QueryItem **item;
494         int16           nitem;
495         uint8           wclass;
496         int32           pos;
497 } DocRepresentation;
498
499 static int
500 compareDocR(const void *va, const void *vb)
501 {
502         DocRepresentation *a = (DocRepresentation *) va;
503         DocRepresentation *b = (DocRepresentation *) vb;
504
505         if (a->pos == b->pos)
506                 return 0;
507         return (a->pos > b->pos) ? 1 : -1;
508 }
509
510 typedef struct
511 {
512         TSQuery         query;
513         bool       *operandexist;
514 } QueryRepresentation;
515
516 #define QR_GET_OPERAND_EXISTS(q, v)             ( (q)->operandexist[ ((QueryItem*)(v)) - GETQUERY((q)->query) ] )
517 #define QR_SET_OPERAND_EXISTS(q, v)  QR_GET_OPERAND_EXISTS(q,v) = true
518
519 static bool
520 checkcondition_QueryOperand(void *checkval, QueryOperand *val)
521 {
522         QueryRepresentation *qr = (QueryRepresentation *) checkval;
523
524         return QR_GET_OPERAND_EXISTS(qr, val);
525 }
526
527 typedef struct
528 {
529         int                     pos;
530         int                     p;
531         int                     q;
532         DocRepresentation *begin;
533         DocRepresentation *end;
534 } Extention;
535
536
537 static bool
538 Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, Extention *ext)
539 {
540         DocRepresentation *ptr;
541         int                     lastpos = ext->pos;
542         int                     i;
543         bool            found = false;
544
545         /*
546          * since this function recurses, it could be driven to stack overflow.
547          * (though any decent compiler will optimize away the tail-recursion.
548          */
549         check_stack_depth();
550
551         memset(qr->operandexist, 0, sizeof(bool) * qr->query->size);
552
553         ext->p = 0x7fffffff;
554         ext->q = 0;
555         ptr = doc + ext->pos;
556
557         /* find upper bound of cover from current position, move up */
558         while (ptr - doc < len)
559         {
560                 for (i = 0; i < ptr->nitem; i++)
561                 {
562                         if (ptr->item[i]->type == QI_VAL)
563                                 QR_SET_OPERAND_EXISTS(qr, ptr->item[i]);
564                 }
565                 if (TS_execute(GETQUERY(qr->query), (void *) qr, false, checkcondition_QueryOperand))
566                 {
567                         if (ptr->pos > ext->q)
568                         {
569                                 ext->q = ptr->pos;
570                                 ext->end = ptr;
571                                 lastpos = ptr - doc;
572                                 found = true;
573                         }
574                         break;
575                 }
576                 ptr++;
577         }
578
579         if (!found)
580                 return false;
581
582         memset(qr->operandexist, 0, sizeof(bool) * qr->query->size);
583
584         ptr = doc + lastpos;
585
586         /* find lower bound of cover from found upper bound, move down */
587         while (ptr >= doc + ext->pos)
588         {
589                 for (i = 0; i < ptr->nitem; i++)
590                         if (ptr->item[i]->type == QI_VAL)
591                                 QR_SET_OPERAND_EXISTS(qr, ptr->item[i]);
592                 if (TS_execute(GETQUERY(qr->query), (void *) qr, true, checkcondition_QueryOperand))
593                 {
594                         if (ptr->pos < ext->p)
595                         {
596                                 ext->begin = ptr;
597                                 ext->p = ptr->pos;
598                         }
599                         break;
600                 }
601                 ptr--;
602         }
603
604         if (ext->p <= ext->q)
605         {
606                 /*
607                  * set position for next try to next lexeme after begining of founded
608                  * cover
609                  */
610                 ext->pos = (ptr - doc) + 1;
611                 return true;
612         }
613
614         ext->pos++;
615         return Cover(doc, len, qr, ext);
616 }
617
618 static DocRepresentation *
619 get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
620 {
621         QueryItem  *item = GETQUERY(qr->query);
622         WordEntry  *entry,
623                            *firstentry;
624         WordEntryPos *post;
625         int4            dimt,
626                                 j,
627                                 i,
628                                 nitem;
629         int                     len = qr->query->size * 4,
630                                 cur = 0;
631         DocRepresentation *doc;
632         char       *operand;
633
634         doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
635         operand = GETOPERAND(qr->query);
636
637         for (i = 0; i < qr->query->size; i++)
638         {
639                 QueryOperand *curoperand;
640
641                 if (item[i].type != QI_VAL)
642                         continue;
643
644                 curoperand = &item[i].qoperand;
645
646                 if (QR_GET_OPERAND_EXISTS(qr, &item[i]))
647                         continue;
648
649                 firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
650                 if (!entry)
651                         continue;
652
653                 while (entry - firstentry < nitem)
654                 {
655                         if (entry->haspos)
656                         {
657                                 dimt = POSDATALEN(txt, entry);
658                                 post = POSDATAPTR(txt, entry);
659                         }
660                         else
661                         {
662                                 dimt = POSNULL.npos;
663                                 post = POSNULL.pos;
664                         }
665
666                         while (cur + dimt >= len)
667                         {
668                                 len *= 2;
669                                 doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
670                         }
671
672                         for (j = 0; j < dimt; j++)
673                         {
674                                 if (j == 0)
675                                 {
676                                         int                     k;
677
678                                         doc[cur].nitem = 0;
679                                         doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size);
680
681                                         for (k = 0; k < qr->query->size; k++)
682                                         {
683                                                 QueryOperand *kptr = &item[k].qoperand;
684                                                 QueryOperand *iptr = &item[i].qoperand;
685
686                                                 if (k == i ||
687                                                         (item[k].type == QI_VAL &&
688                                                          compareQueryOperand(&kptr, &iptr, operand) == 0))
689                                                 {
690                                                         /*
691                                                          * if k == i, we've already checked above that
692                                                          * it's type == Q_VAL
693                                                          */
694                                                         doc[cur].item[doc[cur].nitem] = item + k;
695                                                         doc[cur].nitem++;
696                                                         QR_SET_OPERAND_EXISTS(qr, item + k);
697                                                 }
698                                         }
699                                 }
700                                 else
701                                 {
702                                         doc[cur].nitem = doc[cur - 1].nitem;
703                                         doc[cur].item = doc[cur - 1].item;
704                                 }
705                                 doc[cur].pos = WEP_GETPOS(post[j]);
706                                 doc[cur].wclass = WEP_GETWEIGHT(post[j]);
707                                 cur++;
708                         }
709
710                         entry++;
711                 }
712         }
713
714         *doclen = cur;
715
716         if (cur > 0)
717         {
718                 qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
719                 return doc;
720         }
721
722         pfree(doc);
723         return NULL;
724 }
725
726 static float4
727 calc_rank_cd(float4 *arrdata, TSVector txt, TSQuery query, int method)
728 {
729         DocRepresentation *doc;
730         int                     len,
731                                 i,
732                                 doclen = 0;
733         Extention       ext;
734         double          Wdoc = 0.0;
735         double          invws[lengthof(weights)];
736         double          SumDist = 0.0,
737                                 PrevExtPos = 0.0,
738                                 CurExtPos = 0.0;
739         int                     NExtent = 0;
740         QueryRepresentation qr;
741
742
743         for (i = 0; i < lengthof(weights); i++)
744         {
745                 invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
746                 if (invws[i] > 1.0)
747                         ereport(ERROR,
748                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
749                                          errmsg("weight out of range")));
750                 invws[i] = 1.0 / invws[i];
751         }
752
753         qr.query = query;
754         qr.operandexist = (bool *) palloc0(sizeof(bool) * query->size);
755
756         doc = get_docrep(txt, &qr, &doclen);
757         if (!doc)
758         {
759                 pfree(qr.operandexist);
760                 return 0.0;
761         }
762
763         MemSet(&ext, 0, sizeof(Extention));
764         while (Cover(doc, doclen, &qr, &ext))
765         {
766                 double          Cpos = 0.0;
767                 double          InvSum = 0.0;
768                 int                     nNoise;
769                 DocRepresentation *ptr = ext.begin;
770
771                 while (ptr <= ext.end)
772                 {
773                         InvSum += invws[ptr->wclass];
774                         ptr++;
775                 }
776
777                 Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
778
779                 /*
780                  * if doc are big enough then ext.q may be equal to ext.p due to limit
781                  * of posional information. In this case we approximate number of
782                  * noise word as half cover's length
783                  */
784                 nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
785                 if (nNoise < 0)
786                         nNoise = (ext.end - ext.begin) / 2;
787                 Wdoc += Cpos / ((double) (1 + nNoise));
788
789                 CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
790                 if (NExtent > 0 && CurExtPos > PrevExtPos               /* prevent devision by
791                                                                                                                  * zero in a case of
792                                 multiple lexize */ )
793                         SumDist += 1.0 / (CurExtPos - PrevExtPos);
794
795                 PrevExtPos = CurExtPos;
796                 NExtent++;
797         }
798
799         if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
800                 Wdoc /= log((double) (cnt_length(txt) + 1));
801
802         if (method & RANK_NORM_LENGTH)
803         {
804                 len = cnt_length(txt);
805                 if (len > 0)
806                         Wdoc /= (double) len;
807         }
808
809         if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0)
810                 Wdoc /= ((double) NExtent) / SumDist;
811
812         if ((method & RANK_NORM_UNIQ) && txt->size > 0)
813                 Wdoc /= (double) (txt->size);
814
815         if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
816                 Wdoc /= log((double) (txt->size + 1)) / log(2.0);
817
818         if (method & RANK_NORM_RDIVRPLUS1)
819                 Wdoc /= (Wdoc + 1);
820
821         pfree(doc);
822
823         pfree(qr.operandexist);
824
825         return (float4) Wdoc;
826 }
827
828 Datum
829 ts_rankcd_wttf(PG_FUNCTION_ARGS)
830 {
831         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
832         TSVector        txt = PG_GETARG_TSVECTOR(1);
833         TSQuery         query = PG_GETARG_TSQUERY(2);
834         int                     method = PG_GETARG_INT32(3);
835         float           res;
836
837         res = calc_rank_cd(getWeights(win), txt, query, method);
838
839         PG_FREE_IF_COPY(win, 0);
840         PG_FREE_IF_COPY(txt, 1);
841         PG_FREE_IF_COPY(query, 2);
842         PG_RETURN_FLOAT4(res);
843 }
844
845 Datum
846 ts_rankcd_wtt(PG_FUNCTION_ARGS)
847 {
848         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
849         TSVector        txt = PG_GETARG_TSVECTOR(1);
850         TSQuery         query = PG_GETARG_TSQUERY(2);
851         float           res;
852
853         res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
854
855         PG_FREE_IF_COPY(win, 0);
856         PG_FREE_IF_COPY(txt, 1);
857         PG_FREE_IF_COPY(query, 2);
858         PG_RETURN_FLOAT4(res);
859 }
860
861 Datum
862 ts_rankcd_ttf(PG_FUNCTION_ARGS)
863 {
864         TSVector        txt = PG_GETARG_TSVECTOR(0);
865         TSQuery         query = PG_GETARG_TSQUERY(1);
866         int                     method = PG_GETARG_INT32(2);
867         float           res;
868
869         res = calc_rank_cd(getWeights(NULL), txt, query, method);
870
871         PG_FREE_IF_COPY(txt, 0);
872         PG_FREE_IF_COPY(query, 1);
873         PG_RETURN_FLOAT4(res);
874 }
875
876 Datum
877 ts_rankcd_tt(PG_FUNCTION_ARGS)
878 {
879         TSVector        txt = PG_GETARG_TSVECTOR(0);
880         TSQuery         query = PG_GETARG_TSQUERY(1);
881         float           res;
882
883         res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
884
885         PG_FREE_IF_COPY(txt, 0);
886         PG_FREE_IF_COPY(query, 1);
887         PG_RETURN_FLOAT4(res);
888 }