]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/tsrank.c
Fix initialization of fake LSN for unlogged relations
[postgresql] / src / backend / utils / adt / tsrank.c
1 /*-------------------------------------------------------------------------
2  *
3  * tsrank.c
4  *              rank tsvector by tsquery
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *        src/backend/utils/adt/tsrank.c
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15
16 #include <limits.h>
17 #include <math.h>
18
19 #include "tsearch/ts_utils.h"
20 #include "utils/array.h"
21 #include "utils/builtins.h"
22 #include "miscadmin.h"
23
24
25 static const float weights[] = {0.1f, 0.2f, 0.4f, 1.0f};
26
27 #define wpos(wep)       ( w[ WEP_GETWEIGHT(wep) ] )
28
29 #define RANK_NO_NORM                    0x00
30 #define RANK_NORM_LOGLENGTH             0x01
31 #define RANK_NORM_LENGTH                0x02
32 #define RANK_NORM_EXTDIST               0x04
33 #define RANK_NORM_UNIQ                  0x08
34 #define RANK_NORM_LOGUNIQ               0x10
35 #define RANK_NORM_RDIVRPLUS1    0x20
36 #define DEF_NORM_METHOD                 RANK_NO_NORM
37
38 static float calc_rank_or(const float *w, TSVector t, TSQuery q);
39 static float calc_rank_and(const float *w, TSVector t, TSQuery q);
40
41 /*
42  * Returns a weight of a word collocation
43  */
44 static float4
45 word_distance(int32 w)
46 {
47         if (w > 100)
48                 return 1e-30f;
49
50         return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
51 }
52
53 static int
54 cnt_length(TSVector t)
55 {
56         WordEntry  *ptr = ARRPTR(t),
57                            *end = (WordEntry *) STRPTR(t);
58         int                     len = 0;
59
60         while (ptr < end)
61         {
62                 int                     clen = POSDATALEN(t, ptr);
63
64                 if (clen == 0)
65                         len += 1;
66                 else
67                         len += clen;
68
69                 ptr++;
70         }
71
72         return len;
73 }
74
75
76 #define WordECompareQueryItem(e,q,p,i,m) \
77         tsCompareString((q) + (i)->distance, (i)->length,       \
78                                         (e) + (p)->pos, (p)->len, (m))
79
80
81 /*
82  * Returns a pointer to a WordEntry's array corresponding to 'item' from
83  * tsvector 't'. 'q' is the TSQuery containing 'item'.
84  * Returns NULL if not found.
85  */
86 static WordEntry *
87 find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
88 {
89         WordEntry  *StopLow = ARRPTR(t);
90         WordEntry  *StopHigh = (WordEntry *) STRPTR(t);
91         WordEntry  *StopMiddle = StopHigh;
92         int                     difference;
93
94         *nitem = 0;
95
96         /* Loop invariant: StopLow <= item < StopHigh */
97         while (StopLow < StopHigh)
98         {
99                 StopMiddle = StopLow + (StopHigh - StopLow) / 2;
100                 difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
101                 if (difference == 0)
102                 {
103                         StopHigh = StopMiddle;
104                         *nitem = 1;
105                         break;
106                 }
107                 else if (difference > 0)
108                         StopLow = StopMiddle + 1;
109                 else
110                         StopHigh = StopMiddle;
111         }
112
113         if (item->prefix)
114         {
115                 if (StopLow >= StopHigh)
116                         StopMiddle = StopHigh;
117
118                 *nitem = 0;
119
120                 while (StopMiddle < (WordEntry *) STRPTR(t) &&
121                            WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0)
122                 {
123                         (*nitem)++;
124                         StopMiddle++;
125                 }
126         }
127
128         return (*nitem > 0) ? StopHigh : NULL;
129 }
130
131
132 /*
133  * sort QueryOperands by (length, word)
134  */
135 static int
136 compareQueryOperand(const void *a, const void *b, void *arg)
137 {
138         char       *operand = (char *) arg;
139         QueryOperand *qa = (*(QueryOperand *const *) a);
140         QueryOperand *qb = (*(QueryOperand *const *) b);
141
142         return tsCompareString(operand + qa->distance, qa->length,
143                                                    operand + qb->distance, qb->length,
144                                                    false);
145 }
146
147 /*
148  * Returns a sorted, de-duplicated array of QueryOperands in a query.
149  * The returned QueryOperands are pointers to the original QueryOperands
150  * in the query.
151  *
152  * Length of the returned array is stored in *size
153  */
154 static QueryOperand **
155 SortAndUniqItems(TSQuery q, int *size)
156 {
157         char       *operand = GETOPERAND(q);
158         QueryItem  *item = GETQUERY(q);
159         QueryOperand **res,
160                           **ptr,
161                           **prevptr;
162
163         ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
164
165         /* Collect all operands from the tree to res */
166         while ((*size)--)
167         {
168                 if (item->type == QI_VAL)
169                 {
170                         *ptr = (QueryOperand *) item;
171                         ptr++;
172                 }
173                 item++;
174         }
175
176         *size = ptr - res;
177         if (*size < 2)
178                 return res;
179
180         qsort_arg(res, *size, sizeof(QueryOperand *), compareQueryOperand, (void *) operand);
181
182         ptr = res + 1;
183         prevptr = res;
184
185         /* remove duplicates */
186         while (ptr - res < *size)
187         {
188                 if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0)
189                 {
190                         prevptr++;
191                         *prevptr = *ptr;
192                 }
193                 ptr++;
194         }
195
196         *size = prevptr + 1 - res;
197         return res;
198 }
199
200 static float
201 calc_rank_and(const float *w, TSVector t, TSQuery q)
202 {
203         WordEntryPosVector **pos;
204         WordEntryPosVector1 posnull;
205         WordEntryPosVector *POSNULL;
206         int                     i,
207                                 k,
208                                 l,
209                                 p;
210         WordEntry  *entry,
211                            *firstentry;
212         WordEntryPos *post,
213                            *ct;
214         int32           dimt,
215                                 lenct,
216                                 dist,
217                                 nitem;
218         float           res = -1.0;
219         QueryOperand **item;
220         int                     size = q->size;
221
222         item = SortAndUniqItems(q, &size);
223         if (size < 2)
224         {
225                 pfree(item);
226                 return calc_rank_or(w, t, q);
227         }
228         pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size);
229
230         /* A dummy WordEntryPos array to use when haspos is false */
231         posnull.npos = 1;
232         posnull.pos[0] = 0;
233         WEP_SETPOS(posnull.pos[0], MAXENTRYPOS - 1);
234         POSNULL = (WordEntryPosVector *) &posnull;
235
236         for (i = 0; i < size; i++)
237         {
238                 firstentry = entry = find_wordentry(t, q, item[i], &nitem);
239                 if (!entry)
240                         continue;
241
242                 while (entry - firstentry < nitem)
243                 {
244                         if (entry->haspos)
245                                 pos[i] = _POSVECPTR(t, entry);
246                         else
247                                 pos[i] = POSNULL;
248
249                         dimt = pos[i]->npos;
250                         post = pos[i]->pos;
251                         for (k = 0; k < i; k++)
252                         {
253                                 if (!pos[k])
254                                         continue;
255                                 lenct = pos[k]->npos;
256                                 ct = pos[k]->pos;
257                                 for (l = 0; l < dimt; l++)
258                                 {
259                                         for (p = 0; p < lenct; p++)
260                                         {
261                                                 dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
262                                                 if (dist || (dist == 0 && (pos[i] == POSNULL || pos[k] == POSNULL)))
263                                                 {
264                                                         float           curw;
265
266                                                         if (!dist)
267                                                                 dist = MAXENTRYPOS;
268                                                         curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
269                                                         res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
270                                                 }
271                                         }
272                                 }
273                         }
274
275                         entry++;
276                 }
277         }
278         pfree(pos);
279         pfree(item);
280         return res;
281 }
282
283 static float
284 calc_rank_or(const float *w, TSVector t, TSQuery q)
285 {
286         WordEntry  *entry,
287                            *firstentry;
288         WordEntryPosVector1 posnull;
289         WordEntryPos *post;
290         int32           dimt,
291                                 j,
292                                 i,
293                                 nitem;
294         float           res = 0.0;
295         QueryOperand **item;
296         int                     size = q->size;
297
298         /* A dummy WordEntryPos array to use when haspos is false */
299         posnull.npos = 1;
300         posnull.pos[0] = 0;
301
302         item = SortAndUniqItems(q, &size);
303
304         for (i = 0; i < size; i++)
305         {
306                 float           resj,
307                                         wjm;
308                 int32           jm;
309
310                 firstentry = entry = find_wordentry(t, q, item[i], &nitem);
311                 if (!entry)
312                         continue;
313
314                 while (entry - firstentry < nitem)
315                 {
316                         if (entry->haspos)
317                         {
318                                 dimt = POSDATALEN(t, entry);
319                                 post = POSDATAPTR(t, entry);
320                         }
321                         else
322                         {
323                                 dimt = posnull.npos;
324                                 post = posnull.pos;
325                         }
326
327                         resj = 0.0;
328                         wjm = -1.0;
329                         jm = 0;
330                         for (j = 0; j < dimt; j++)
331                         {
332                                 resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
333                                 if (wpos(post[j]) > wjm)
334                                 {
335                                         wjm = wpos(post[j]);
336                                         jm = j;
337                                 }
338                         }
339 /*
340                         limit (sum(i/i^2),i->inf) = pi^2/6
341                         resj = sum(wi/i^2),i=1,noccurence,
342                         wi - should be sorted desc,
343                         don't sort for now, just choose maximum weight. This should be corrected
344                         Oleg Bartunov
345 */
346                         res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
347
348                         entry++;
349                 }
350         }
351         if (size > 0)
352                 res = res / size;
353         pfree(item);
354         return res;
355 }
356
357 static float
358 calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
359 {
360         QueryItem  *item = GETQUERY(q);
361         float           res = 0.0;
362         int                     len;
363
364         if (!t->size || !q->size)
365                 return 0.0;
366
367         /* XXX: What about NOT? */
368         res = (item->type == QI_OPR && (item->qoperator.oper == OP_AND ||
369                                                                         item->qoperator.oper == OP_PHRASE)) ?
370                 calc_rank_and(w, t, q) :
371                 calc_rank_or(w, t, q);
372
373         if (res < 0)
374                 res = 1e-20f;
375
376         if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
377                 res /= log((double) (cnt_length(t) + 1)) / log(2.0);
378
379         if (method & RANK_NORM_LENGTH)
380         {
381                 len = cnt_length(t);
382                 if (len > 0)
383                         res /= (float) len;
384         }
385
386         /* RANK_NORM_EXTDIST not applicable */
387
388         if ((method & RANK_NORM_UNIQ) && t->size > 0)
389                 res /= (float) (t->size);
390
391         if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
392                 res /= log((double) (t->size + 1)) / log(2.0);
393
394         if (method & RANK_NORM_RDIVRPLUS1)
395                 res /= (res + 1);
396
397         return res;
398 }
399
400 static const float *
401 getWeights(ArrayType *win)
402 {
403         static float ws[lengthof(weights)];
404         int                     i;
405         float4     *arrdata;
406
407         if (win == NULL)
408                 return weights;
409
410         if (ARR_NDIM(win) != 1)
411                 ereport(ERROR,
412                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
413                                  errmsg("array of weight must be one-dimensional")));
414
415         if (ArrayGetNItems(ARR_NDIM(win), ARR_DIMS(win)) < lengthof(weights))
416                 ereport(ERROR,
417                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
418                                  errmsg("array of weight is too short")));
419
420         if (array_contains_nulls(win))
421                 ereport(ERROR,
422                                 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
423                                  errmsg("array of weight must not contain nulls")));
424
425         arrdata = (float4 *) ARR_DATA_PTR(win);
426         for (i = 0; i < lengthof(weights); i++)
427         {
428                 ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
429                 if (ws[i] > 1.0)
430                         ereport(ERROR,
431                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
432                                          errmsg("weight out of range")));
433         }
434
435         return ws;
436 }
437
438 Datum
439 ts_rank_wttf(PG_FUNCTION_ARGS)
440 {
441         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
442         TSVector        txt = PG_GETARG_TSVECTOR(1);
443         TSQuery         query = PG_GETARG_TSQUERY(2);
444         int                     method = PG_GETARG_INT32(3);
445         float           res;
446
447         res = calc_rank(getWeights(win), txt, query, method);
448
449         PG_FREE_IF_COPY(win, 0);
450         PG_FREE_IF_COPY(txt, 1);
451         PG_FREE_IF_COPY(query, 2);
452         PG_RETURN_FLOAT4(res);
453 }
454
455 Datum
456 ts_rank_wtt(PG_FUNCTION_ARGS)
457 {
458         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
459         TSVector        txt = PG_GETARG_TSVECTOR(1);
460         TSQuery         query = PG_GETARG_TSQUERY(2);
461         float           res;
462
463         res = calc_rank(getWeights(win), txt, query, DEF_NORM_METHOD);
464
465         PG_FREE_IF_COPY(win, 0);
466         PG_FREE_IF_COPY(txt, 1);
467         PG_FREE_IF_COPY(query, 2);
468         PG_RETURN_FLOAT4(res);
469 }
470
471 Datum
472 ts_rank_ttf(PG_FUNCTION_ARGS)
473 {
474         TSVector        txt = PG_GETARG_TSVECTOR(0);
475         TSQuery         query = PG_GETARG_TSQUERY(1);
476         int                     method = PG_GETARG_INT32(2);
477         float           res;
478
479         res = calc_rank(getWeights(NULL), txt, query, method);
480
481         PG_FREE_IF_COPY(txt, 0);
482         PG_FREE_IF_COPY(query, 1);
483         PG_RETURN_FLOAT4(res);
484 }
485
486 Datum
487 ts_rank_tt(PG_FUNCTION_ARGS)
488 {
489         TSVector        txt = PG_GETARG_TSVECTOR(0);
490         TSQuery         query = PG_GETARG_TSQUERY(1);
491         float           res;
492
493         res = calc_rank(getWeights(NULL), txt, query, DEF_NORM_METHOD);
494
495         PG_FREE_IF_COPY(txt, 0);
496         PG_FREE_IF_COPY(query, 1);
497         PG_RETURN_FLOAT4(res);
498 }
499
500 typedef struct
501 {
502         union
503         {
504                 struct
505                 {                                               /* compiled doc representation */
506                         QueryItem **items;
507                         int16           nitem;
508                 }                       query;
509                 struct
510                 {                                               /* struct is used for preparing doc
511                                                                  * representation */
512                         QueryItem  *item;
513                         WordEntry  *entry;
514                 }                       map;
515         }                       data;
516         WordEntryPos pos;
517 } DocRepresentation;
518
519 static int
520 compareDocR(const void *va, const void *vb)
521 {
522         const DocRepresentation *a = (const DocRepresentation *) va;
523         const DocRepresentation *b = (const DocRepresentation *) vb;
524
525         if (WEP_GETPOS(a->pos) == WEP_GETPOS(b->pos))
526         {
527                 if (WEP_GETWEIGHT(a->pos) == WEP_GETWEIGHT(b->pos))
528                 {
529                         if (a->data.map.entry == b->data.map.entry)
530                                 return 0;
531
532                         return (a->data.map.entry > b->data.map.entry) ? 1 : -1;
533                 }
534
535                 return (WEP_GETWEIGHT(a->pos) > WEP_GETWEIGHT(b->pos)) ? 1 : -1;
536         }
537
538         return (WEP_GETPOS(a->pos) > WEP_GETPOS(b->pos)) ? 1 : -1;
539 }
540
541 #define MAXQROPOS       MAXENTRYPOS
542 typedef struct
543 {
544         bool            operandexists;
545         bool            reverseinsert;  /* indicates insert order, true means
546                                                                  * descending order */
547         uint32          npos;
548         WordEntryPos pos[MAXQROPOS];
549 } QueryRepresentationOperand;
550
551 typedef struct
552 {
553         TSQuery         query;
554         QueryRepresentationOperand *operandData;
555 } QueryRepresentation;
556
557 #define QR_GET_OPERAND_DATA(q, v) \
558         ( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) )
559
560 static bool
561 checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *data)
562 {
563         QueryRepresentation *qr = (QueryRepresentation *) checkval;
564         QueryRepresentationOperand *opData = QR_GET_OPERAND_DATA(qr, val);
565
566         if (!opData->operandexists)
567                 return false;
568
569         if (data)
570         {
571                 data->npos = opData->npos;
572                 data->pos = opData->pos;
573                 if (opData->reverseinsert)
574                         data->pos += MAXQROPOS - opData->npos;
575         }
576
577         return true;
578 }
579
580 typedef struct
581 {
582         int                     pos;
583         int                     p;
584         int                     q;
585         DocRepresentation *begin;
586         DocRepresentation *end;
587 } CoverExt;
588
589 static void
590 resetQueryRepresentation(QueryRepresentation *qr, bool reverseinsert)
591 {
592         int                     i;
593
594         for (i = 0; i < qr->query->size; i++)
595         {
596                 qr->operandData[i].operandexists = false;
597                 qr->operandData[i].reverseinsert = reverseinsert;
598                 qr->operandData[i].npos = 0;
599         }
600 }
601
602 static void
603 fillQueryRepresentationData(QueryRepresentation *qr, DocRepresentation *entry)
604 {
605         int                     i;
606         int                     lastPos;
607         QueryRepresentationOperand *opData;
608
609         for (i = 0; i < entry->data.query.nitem; i++)
610         {
611                 if (entry->data.query.items[i]->type != QI_VAL)
612                         continue;
613
614                 opData = QR_GET_OPERAND_DATA(qr, entry->data.query.items[i]);
615
616                 opData->operandexists = true;
617
618                 if (opData->npos == 0)
619                 {
620                         lastPos = (opData->reverseinsert) ? (MAXQROPOS - 1) : 0;
621                         opData->pos[lastPos] = entry->pos;
622                         opData->npos++;
623                         continue;
624                 }
625
626                 lastPos = opData->reverseinsert ?
627                         (MAXQROPOS - opData->npos) :
628                         (opData->npos - 1);
629
630                 if (WEP_GETPOS(opData->pos[lastPos]) != WEP_GETPOS(entry->pos))
631                 {
632                         lastPos = opData->reverseinsert ?
633                                 (MAXQROPOS - 1 - opData->npos) :
634                                 (opData->npos);
635
636                         opData->pos[lastPos] = entry->pos;
637                         opData->npos++;
638                 }
639         }
640 }
641
642 static bool
643 Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
644 {
645         DocRepresentation *ptr;
646         int                     lastpos = ext->pos;
647         bool            found = false;
648
649         /*
650          * since this function recurses, it could be driven to stack overflow.
651          * (though any decent compiler will optimize away the tail-recursion.
652          */
653         check_stack_depth();
654
655         resetQueryRepresentation(qr, false);
656
657         ext->p = INT_MAX;
658         ext->q = 0;
659         ptr = doc + ext->pos;
660
661         /* find upper bound of cover from current position, move up */
662         while (ptr - doc < len)
663         {
664                 fillQueryRepresentationData(qr, ptr);
665
666                 if (TS_execute(GETQUERY(qr->query), (void *) qr,
667                                            TS_EXEC_EMPTY, checkcondition_QueryOperand))
668                 {
669                         if (WEP_GETPOS(ptr->pos) > ext->q)
670                         {
671                                 ext->q = WEP_GETPOS(ptr->pos);
672                                 ext->end = ptr;
673                                 lastpos = ptr - doc;
674                                 found = true;
675                         }
676                         break;
677                 }
678                 ptr++;
679         }
680
681         if (!found)
682                 return false;
683
684         resetQueryRepresentation(qr, true);
685
686         ptr = doc + lastpos;
687
688         /* find lower bound of cover from found upper bound, move down */
689         while (ptr >= doc + ext->pos)
690         {
691                 /*
692                  * we scan doc from right to left, so pos info in reverse order!
693                  */
694                 fillQueryRepresentationData(qr, ptr);
695
696                 if (TS_execute(GETQUERY(qr->query), (void *) qr,
697                                            TS_EXEC_CALC_NOT, checkcondition_QueryOperand))
698                 {
699                         if (WEP_GETPOS(ptr->pos) < ext->p)
700                         {
701                                 ext->begin = ptr;
702                                 ext->p = WEP_GETPOS(ptr->pos);
703                         }
704                         break;
705                 }
706                 ptr--;
707         }
708
709         if (ext->p <= ext->q)
710         {
711                 /*
712                  * set position for next try to next lexeme after beginning of found
713                  * cover
714                  */
715                 ext->pos = (ptr - doc) + 1;
716                 return true;
717         }
718
719         ext->pos++;
720         return Cover(doc, len, qr, ext);
721 }
722
723 static DocRepresentation *
724 get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
725 {
726         QueryItem  *item = GETQUERY(qr->query);
727         WordEntry  *entry,
728                            *firstentry;
729         WordEntryPos *post;
730         int32           dimt,                   /* number of 'post' items */
731                                 j,
732                                 i,
733                                 nitem;
734         int                     len = qr->query->size * 4,
735                                 cur = 0;
736         DocRepresentation *doc;
737
738         doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
739
740         /*
741          * Iterate through query to make DocRepresentation for words and it's
742          * entries satisfied by query
743          */
744         for (i = 0; i < qr->query->size; i++)
745         {
746                 QueryOperand *curoperand;
747
748                 if (item[i].type != QI_VAL)
749                         continue;
750
751                 curoperand = &item[i].qoperand;
752
753                 firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
754                 if (!entry)
755                         continue;
756
757                 /* iterations over entries in tsvector */
758                 while (entry - firstentry < nitem)
759                 {
760                         if (entry->haspos)
761                         {
762                                 dimt = POSDATALEN(txt, entry);
763                                 post = POSDATAPTR(txt, entry);
764                         }
765                         else
766                         {
767                                 /* ignore words without positions */
768                                 entry++;
769                                 continue;
770                         }
771
772                         while (cur + dimt >= len)
773                         {
774                                 len *= 2;
775                                 doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
776                         }
777
778                         /* iterations over entry's positions */
779                         for (j = 0; j < dimt; j++)
780                         {
781                                 if (curoperand->weight == 0 ||
782                                         curoperand->weight & (1 << WEP_GETWEIGHT(post[j])))
783                                 {
784                                         doc[cur].pos = post[j];
785                                         doc[cur].data.map.entry = entry;
786                                         doc[cur].data.map.item = (QueryItem *) curoperand;
787                                         cur++;
788                                 }
789                         }
790
791                         entry++;
792                 }
793         }
794
795         if (cur > 0)
796         {
797                 DocRepresentation *rptr = doc + 1,
798                                    *wptr = doc,
799                                         storage;
800
801                 /*
802                  * Sort representation in ascending order by pos and entry
803                  */
804                 qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
805
806                 /*
807                  * Join QueryItem per WordEntry and it's position
808                  */
809                 storage.pos = doc->pos;
810                 storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
811                 storage.data.query.items[0] = doc->data.map.item;
812                 storage.data.query.nitem = 1;
813
814                 while (rptr - doc < cur)
815                 {
816                         if (rptr->pos == (rptr - 1)->pos &&
817                                 rptr->data.map.entry == (rptr - 1)->data.map.entry)
818                         {
819                                 storage.data.query.items[storage.data.query.nitem] = rptr->data.map.item;
820                                 storage.data.query.nitem++;
821                         }
822                         else
823                         {
824                                 *wptr = storage;
825                                 wptr++;
826                                 storage.pos = rptr->pos;
827                                 storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
828                                 storage.data.query.items[0] = rptr->data.map.item;
829                                 storage.data.query.nitem = 1;
830                         }
831
832                         rptr++;
833                 }
834
835                 *wptr = storage;
836                 wptr++;
837
838                 *doclen = wptr - doc;
839                 return doc;
840         }
841
842         pfree(doc);
843         return NULL;
844 }
845
846 static float4
847 calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
848 {
849         DocRepresentation *doc;
850         int                     len,
851                                 i,
852                                 doclen = 0;
853         CoverExt        ext;
854         double          Wdoc = 0.0;
855         double          invws[lengthof(weights)];
856         double          SumDist = 0.0,
857                                 PrevExtPos = 0.0,
858                                 CurExtPos = 0.0;
859         int                     NExtent = 0;
860         QueryRepresentation qr;
861
862
863         for (i = 0; i < lengthof(weights); i++)
864         {
865                 invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
866                 if (invws[i] > 1.0)
867                         ereport(ERROR,
868                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
869                                          errmsg("weight out of range")));
870                 invws[i] = 1.0 / invws[i];
871         }
872
873         qr.query = query;
874         qr.operandData = (QueryRepresentationOperand *)
875                 palloc0(sizeof(QueryRepresentationOperand) * query->size);
876
877         doc = get_docrep(txt, &qr, &doclen);
878         if (!doc)
879         {
880                 pfree(qr.operandData);
881                 return 0.0;
882         }
883
884         MemSet(&ext, 0, sizeof(CoverExt));
885         while (Cover(doc, doclen, &qr, &ext))
886         {
887                 double          Cpos = 0.0;
888                 double          InvSum = 0.0;
889                 int                     nNoise;
890                 DocRepresentation *ptr = ext.begin;
891
892                 while (ptr <= ext.end)
893                 {
894                         InvSum += invws[WEP_GETWEIGHT(ptr->pos)];
895                         ptr++;
896                 }
897
898                 Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
899
900                 /*
901                  * if doc are big enough then ext.q may be equal to ext.p due to limit
902                  * of positional information. In this case we approximate number of
903                  * noise word as half cover's length
904                  */
905                 nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
906                 if (nNoise < 0)
907                         nNoise = (ext.end - ext.begin) / 2;
908                 Wdoc += Cpos / ((double) (1 + nNoise));
909
910                 CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
911                 if (NExtent > 0 && CurExtPos > PrevExtPos       /* prevent division by
912                                                                                                          * zero in a case of
913                           * multiple lexize */ )
914                         SumDist += 1.0 / (CurExtPos - PrevExtPos);
915
916                 PrevExtPos = CurExtPos;
917                 NExtent++;
918         }
919
920         if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
921                 Wdoc /= log((double) (cnt_length(txt) + 1));
922
923         if (method & RANK_NORM_LENGTH)
924         {
925                 len = cnt_length(txt);
926                 if (len > 0)
927                         Wdoc /= (double) len;
928         }
929
930         if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0)
931                 Wdoc /= ((double) NExtent) / SumDist;
932
933         if ((method & RANK_NORM_UNIQ) && txt->size > 0)
934                 Wdoc /= (double) (txt->size);
935
936         if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
937                 Wdoc /= log((double) (txt->size + 1)) / log(2.0);
938
939         if (method & RANK_NORM_RDIVRPLUS1)
940                 Wdoc /= (Wdoc + 1);
941
942         pfree(doc);
943
944         pfree(qr.operandData);
945
946         return (float4) Wdoc;
947 }
948
949 Datum
950 ts_rankcd_wttf(PG_FUNCTION_ARGS)
951 {
952         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
953         TSVector        txt = PG_GETARG_TSVECTOR(1);
954         TSQuery         query = PG_GETARG_TSQUERY(2);
955         int                     method = PG_GETARG_INT32(3);
956         float           res;
957
958         res = calc_rank_cd(getWeights(win), txt, query, method);
959
960         PG_FREE_IF_COPY(win, 0);
961         PG_FREE_IF_COPY(txt, 1);
962         PG_FREE_IF_COPY(query, 2);
963         PG_RETURN_FLOAT4(res);
964 }
965
966 Datum
967 ts_rankcd_wtt(PG_FUNCTION_ARGS)
968 {
969         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
970         TSVector        txt = PG_GETARG_TSVECTOR(1);
971         TSQuery         query = PG_GETARG_TSQUERY(2);
972         float           res;
973
974         res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
975
976         PG_FREE_IF_COPY(win, 0);
977         PG_FREE_IF_COPY(txt, 1);
978         PG_FREE_IF_COPY(query, 2);
979         PG_RETURN_FLOAT4(res);
980 }
981
982 Datum
983 ts_rankcd_ttf(PG_FUNCTION_ARGS)
984 {
985         TSVector        txt = PG_GETARG_TSVECTOR(0);
986         TSQuery         query = PG_GETARG_TSQUERY(1);
987         int                     method = PG_GETARG_INT32(2);
988         float           res;
989
990         res = calc_rank_cd(getWeights(NULL), txt, query, method);
991
992         PG_FREE_IF_COPY(txt, 0);
993         PG_FREE_IF_COPY(query, 1);
994         PG_RETURN_FLOAT4(res);
995 }
996
997 Datum
998 ts_rankcd_tt(PG_FUNCTION_ARGS)
999 {
1000         TSVector        txt = PG_GETARG_TSVECTOR(0);
1001         TSQuery         query = PG_GETARG_TSQUERY(1);
1002         float           res;
1003
1004         res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
1005
1006         PG_FREE_IF_COPY(txt, 0);
1007         PG_FREE_IF_COPY(query, 1);
1008         PG_RETURN_FLOAT4(res);
1009 }