]> granicus.if.org Git - postgresql/blob - contrib/tsearch2/rank.c
Avoid infinity calculations in rank_cd
[postgresql] / contrib / tsearch2 / rank.c
1 /*
2  * Relevation
3  * Teodor Sigaev <teodor@sigaev.ru>
4  */
5 #include "postgres.h"
6
7 #include <math.h>
8
9 #include "access/gist.h"
10 #include "access/itup.h"
11 #include "catalog/namespace.h"
12 #include "commands/trigger.h"
13 #include "executor/spi.h"
14 #include "fmgr.h"
15 #include "funcapi.h"
16 #include "nodes/pg_list.h"
17 #include "storage/bufpage.h"
18 #include "utils/array.h"
19 #include "utils/builtins.h"
20
21 #include "tsvector.h"
22 #include "query.h"
23 #include "common.h"
24
25 PG_FUNCTION_INFO_V1(rank);
26 Datum           rank(PG_FUNCTION_ARGS);
27
28 PG_FUNCTION_INFO_V1(rank_def);
29 Datum           rank_def(PG_FUNCTION_ARGS);
30
31 PG_FUNCTION_INFO_V1(rank_cd);
32 Datum           rank_cd(PG_FUNCTION_ARGS);
33
34 PG_FUNCTION_INFO_V1(rank_cd_def);
35 Datum           rank_cd_def(PG_FUNCTION_ARGS);
36
37 PG_FUNCTION_INFO_V1(get_covers);
38 Datum           get_covers(PG_FUNCTION_ARGS);
39
40 static float weights[] = {0.1, 0.2, 0.4, 1.0};
41
42 #define wpos(wep)       ( w[ WEP_GETWEIGHT(wep) ] )
43
44 #define RANK_NO_NORM            0x00
45 #define RANK_NORM_LOGLENGTH             0x01
46 #define RANK_NORM_LENGTH        0x02
47 #define RANK_NORM_EXTDIST       0x04
48 #define RANK_NORM_UNIQ          0x08
49 #define RANK_NORM_LOGUNIQ       0x10
50 #define DEF_NORM_METHOD         RANK_NO_NORM
51
52 static float calc_rank_or(float *w, tsvector * t, QUERYTYPE * q);
53 static float calc_rank_and(float *w, tsvector * t, QUERYTYPE * q);
54
55 /*
56  * Returns a weight of a word collocation
57  */
58 static float4
59 word_distance(int4 w)
60 {
61         if (w > 100)
62                 return 1e-30;
63
64         return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
65 }
66
67 static int
68 cnt_length(tsvector * t)
69 {
70         WordEntry  *ptr = ARRPTR(t),
71                            *end = (WordEntry *) STRPTR(t);
72         int                     len = 0,
73                                 clen;
74
75         while (ptr < end)
76         {
77                 if ((clen = POSDATALEN(t, ptr)) == 0)
78                         len += 1;
79                 else
80                         len += clen;
81                 ptr++;
82         }
83
84         return len;
85 }
86
87 static int4
88 WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item)
89 {
90         if (ptr->len == item->length)
91                 return strncmp(
92                                            eval + ptr->pos,
93                                            qval + item->distance,
94                                            item->length);
95
96         return (ptr->len > item->length) ? 1 : -1;
97 }
98
99 static WordEntry *
100 find_wordentry(tsvector * t, QUERYTYPE * q, ITEM * item)
101 {
102         WordEntry  *StopLow = ARRPTR(t);
103         WordEntry  *StopHigh = (WordEntry *) STRPTR(t);
104         WordEntry  *StopMiddle;
105         int                     difference;
106
107         /* Loop invariant: StopLow <= item < StopHigh */
108
109         while (StopLow < StopHigh)
110         {
111                 StopMiddle = StopLow + (StopHigh - StopLow) / 2;
112                 difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
113                 if (difference == 0)
114                         return StopMiddle;
115                 else if (difference < 0)
116                         StopLow = StopMiddle + 1;
117                 else
118                         StopHigh = StopMiddle;
119         }
120
121         return NULL;
122 }
123
124
125 static int
126 compareITEM(const void *a, const void *b, void *arg)
127 {
128         char *operand = (char *) arg;
129
130         if ((*(ITEM **) a)->length == (*(ITEM **) b)->length)
131                 return strncmp(operand + (*(ITEM **) a)->distance,
132                                            operand + (*(ITEM **) b)->distance,
133                                            (*(ITEM **) b)->length);
134
135         return ((*(ITEM **) a)->length > (*(ITEM **) b)->length) ? 1 : -1;
136 }
137
138 static ITEM **
139 SortAndUniqItems(char *operand, ITEM * item, int *size)
140 {
141         ITEM      **res,
142                           **ptr,
143                           **prevptr;
144
145         ptr = res = (ITEM **) palloc(sizeof(ITEM *) * *size);
146
147         while ((*size)--)
148         {
149                 if (item->type == VAL)
150                 {
151                         *ptr = item;
152                         ptr++;
153                 }
154                 item++;
155         }
156
157         *size = ptr - res;
158         if (*size < 2)
159                 return res;
160
161         qsort_arg(res, *size, sizeof(ITEM **), compareITEM, (void *) operand);
162
163         ptr = res + 1;
164         prevptr = res;
165
166         while (ptr - res < *size)
167         {
168                 if (compareITEM((void *) ptr, (void *) prevptr, (void *) operand) != 0)
169                 {
170                         prevptr++;
171                         *prevptr = *ptr;
172                 }
173                 ptr++;
174         }
175
176         *size = prevptr + 1 - res;
177         return res;
178 }
179
180 static WordEntryPos POSNULL[] = {
181         0,
182         0
183 };
184
185 static float
186 calc_rank_and(float *w, tsvector * t, QUERYTYPE * q)
187 {
188         uint16    **pos;
189         int                     i,
190                                 k,
191                                 l,
192                                 p;
193         WordEntry  *entry;
194         WordEntryPos *post,
195                            *ct;
196         int4            dimt,
197                                 lenct,
198                                 dist;
199         float           res = -1.0;
200         ITEM      **item;
201         int                     size = q->size;
202
203         item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
204         if (size < 2)
205         {
206                 pfree(item);
207                 return calc_rank_or(w, t, q);
208         }
209         pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
210         memset(pos, 0, sizeof(uint16 *) * q->size);
211         *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
212         WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1);
213
214         for (i = 0; i < size; i++)
215         {
216                 entry = find_wordentry(t, q, item[i]);
217                 if (!entry)
218                         continue;
219
220                 if (entry->haspos)
221                         pos[i] = (uint16 *) _POSDATAPTR(t, entry);
222                 else
223                         pos[i] = (uint16 *) POSNULL;
224
225
226                 dimt = *(uint16 *) (pos[i]);
227                 post = (WordEntryPos *) (pos[i] + 1);
228                 for (k = 0; k < i; k++)
229                 {
230                         if (!pos[k])
231                                 continue;
232                         lenct = *(uint16 *) (pos[k]);
233                         ct = (WordEntryPos *) (pos[k] + 1);
234                         for (l = 0; l < dimt; l++)
235                         {
236                                 for (p = 0; p < lenct; p++)
237                                 {
238                                         dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
239                                         if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL)))
240                                         {
241                                                 float           curw;
242
243                                                 if (!dist)
244                                                         dist = MAXENTRYPOS;
245                                                 curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
246                                                 res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
247                                         }
248                                 }
249                         }
250                 }
251         }
252         pfree(pos);
253         pfree(item);
254         return res;
255 }
256
257 static float
258 calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
259 {
260         WordEntry  *entry;
261         WordEntryPos *post;
262         int4            dimt,
263                                 j,
264                                 i;
265         float           res = 0.0;
266         ITEM      **item;
267         int                     size = q->size;
268
269         *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
270         item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
271
272         for (i = 0; i < size; i++)
273         {
274                 float           resj,
275                                         wjm;
276                 int4            jm;
277
278                 entry = find_wordentry(t, q, item[i]);
279                 if (!entry)
280                         continue;
281
282                 if (entry->haspos)
283                 {
284                         dimt = POSDATALEN(t, entry);
285                         post = POSDATAPTR(t, entry);
286                 }
287                 else
288                 {
289                         dimt = *(uint16 *) POSNULL;
290                         post = POSNULL + 1;
291                 }
292
293                 resj = 0.0;
294                 wjm = -1.0;
295                 jm = 0;
296                 for (j = 0; j < dimt; j++)
297                 {
298                         resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
299                         if (wpos(post[j]) > wjm)
300                         {
301                                 wjm = wpos(post[j]);
302                                 jm = j;
303                         }
304                 }
305 /*
306                 limit (sum(i/i^2),i->inf) = pi^2/6
307                 resj = sum(wi/i^2),i=1,noccurence,
308                 wi - should be sorted desc,
309                 don't sort for now, just choose maximum weight. This should be corrected
310                 Oleg Bartunov
311 */
312                 res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
313         }
314         if (size > 0)
315                 res = res / size;
316         pfree(item);
317         return res;
318 }
319
320 static float
321 calc_rank(float *w, tsvector * t, QUERYTYPE * q, int4 method)
322 {
323         ITEM       *item = GETQUERY(q);
324         float           res = 0.0;
325         int                     len;
326
327         if (!t->size || !q->size)
328                 return 0.0;
329
330         res = (item->type != VAL && item->val == (int4) '&') ?
331                 calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
332
333         if (res < 0)
334                 res = 1e-20;
335
336         if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
337                 res /= log((double) (cnt_length(t) + 1)) / log(2.0);
338
339         if (method & RANK_NORM_LENGTH)
340         {
341                 len = cnt_length(t);
342                 if (len > 0)
343                         res /= (float) len;
344         }
345
346         if ((method & RANK_NORM_UNIQ) && t->size > 0)
347                 res /= (float) (t->size);
348
349         if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
350                 res /= log((double) (t->size + 1)) / log(2.0);
351
352         return res;
353 }
354
355 Datum
356 rank(PG_FUNCTION_ARGS)
357 {
358         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
359         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
360         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
361         int                     method = DEF_NORM_METHOD;
362         float           res = 0.0;
363         float           ws[lengthof(weights)];
364         float4     *arrdata;
365         int                     i;
366
367         if (ARR_NDIM(win) != 1)
368                 ereport(ERROR,
369                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
370                                  errmsg("array of weight must be one-dimensional")));
371
372         if (ARRNELEMS(win) < lengthof(weights))
373                 ereport(ERROR,
374                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
375                                  errmsg("array of weight is too short")));
376
377         if (ARR_HASNULL(win))
378                 ereport(ERROR,
379                                 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
380                                  errmsg("array of weight must not contain nulls")));
381
382         arrdata = (float4 *) ARR_DATA_PTR(win);
383         for (i = 0; i < lengthof(weights); i++)
384         {
385                 ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
386                 if (ws[i] > 1.0)
387                         ereport(ERROR,
388                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
389                                          errmsg("weight out of range")));
390         }
391
392         if (PG_NARGS() == 4)
393                 method = PG_GETARG_INT32(3);
394
395         res = calc_rank(ws, txt, query, method);
396
397         PG_FREE_IF_COPY(win, 0);
398         PG_FREE_IF_COPY(txt, 1);
399         PG_FREE_IF_COPY(query, 2);
400         PG_RETURN_FLOAT4(res);
401 }
402
403 Datum
404 rank_def(PG_FUNCTION_ARGS)
405 {
406         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
407         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
408         float           res = 0.0;
409         int                     method = DEF_NORM_METHOD;
410
411         if (PG_NARGS() == 3)
412                 method = PG_GETARG_INT32(2);
413
414         res = calc_rank(weights, txt, query, method);
415
416         PG_FREE_IF_COPY(txt, 0);
417         PG_FREE_IF_COPY(query, 1);
418         PG_RETURN_FLOAT4(res);
419 }
420
421
422 typedef struct
423 {
424         ITEM      **item;
425         int16           nitem;
426         bool            needfree;
427         uint8           wclass;
428         int32           pos;
429 }       DocRepresentation;
430
431 static int
432 compareDocR(const void *a, const void *b)
433 {
434         if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos)
435                 return 0;
436         return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
437 }
438
439 static bool
440 checkcondition_ITEM(void *checkval, ITEM * val)
441 {
442         return (bool) (val->istrue);
443 }
444
445 static void
446 reset_istrue_flag(QUERYTYPE * query)
447 {
448         ITEM       *item = GETQUERY(query);
449         int                     i;
450
451         /* reset istrue flag */
452         for (i = 0; i < query->size; i++)
453         {
454                 if (item->type == VAL)
455                         item->istrue = 0;
456                 item++;
457         }
458 }
459
460 typedef struct
461 {
462         int                     pos;
463         int                     p;
464         int                     q;
465         DocRepresentation *begin;
466         DocRepresentation *end;
467 }       Extention;
468
469
470 static bool
471 Cover(DocRepresentation * doc, int len, QUERYTYPE * query, Extention * ext)
472 {
473         DocRepresentation *ptr;
474         int                     lastpos = ext->pos;
475         int                     i;
476         bool            found = false;
477
478         reset_istrue_flag(query);
479
480         ext->p = 0x7fffffff;
481         ext->q = 0;
482         ptr = doc + ext->pos;
483
484         /* find upper bound of cover from current position, move up */
485         while (ptr - doc < len)
486         {
487                 for (i = 0; i < ptr->nitem; i++)
488                         ptr->item[i]->istrue = 1;
489                 if (TS_execute(GETQUERY(query), NULL, false, checkcondition_ITEM))
490                 {
491                         if (ptr->pos > ext->q)
492                         {
493                                 ext->q = ptr->pos;
494                                 ext->end = ptr;
495                                 lastpos = ptr - doc;
496                                 found = true;
497                         }
498                         break;
499                 }
500                 ptr++;
501         }
502
503         if (!found)
504                 return false;
505
506         reset_istrue_flag(query);
507
508         ptr = doc + lastpos;
509
510         /* find lower bound of cover from founded upper bound, move down */
511         while (ptr >= doc)
512         {
513                 for (i = 0; i < ptr->nitem; i++)
514                         ptr->item[i]->istrue = 1;
515                 if (TS_execute(GETQUERY(query), NULL, true, checkcondition_ITEM))
516                 {
517                         if (ptr->pos < ext->p)
518                         {
519                                 ext->begin = ptr;
520                                 ext->p = ptr->pos;
521                         }
522                         break;
523                 }
524                 ptr--;
525         }
526
527         if (ext->p <= ext->q)
528         {
529                 /*
530                  * set position for next try to next lexeme after begining of founded
531                  * cover
532                  */
533                 ext->pos = (ptr - doc) + 1;
534                 return true;
535         }
536
537         ext->pos++;
538         return Cover(doc, len, query, ext);
539 }
540
541 static DocRepresentation *
542 get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen)
543 {
544         ITEM       *item = GETQUERY(query);
545         WordEntry  *entry;
546         WordEntryPos *post;
547         int4            dimt,
548                                 j,
549                                 i;
550         int                     len = query->size * 4,
551                                 cur = 0;
552         DocRepresentation *doc;
553         char       *operand;
554
555         *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
556         doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
557         operand = GETOPERAND(query);
558         reset_istrue_flag(query);
559
560         for (i = 0; i < query->size; i++)
561         {
562                 if (item[i].type != VAL || item[i].istrue)
563                         continue;
564
565                 entry = find_wordentry(txt, query, &(item[i]));
566                 if (!entry)
567                         continue;
568
569                 if (entry->haspos)
570                 {
571                         dimt = POSDATALEN(txt, entry);
572                         post = POSDATAPTR(txt, entry);
573                 }
574                 else
575                 {
576                         dimt = *(uint16 *) POSNULL;
577                         post = POSNULL + 1;
578                 }
579
580                 while (cur + dimt >= len)
581                 {
582                         len *= 2;
583                         doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
584                 }
585
586                 for (j = 0; j < dimt; j++)
587                 {
588                         if (j == 0)
589                         {
590                                 ITEM       *kptr,
591                                                    *iptr = item + i;
592                                 int                     k;
593
594                                 doc[cur].needfree = false;
595                                 doc[cur].nitem = 0;
596                                 doc[cur].item = (ITEM **) palloc(sizeof(ITEM *) * query->size);
597
598                                 for (k = 0; k < query->size; k++)
599                                 {
600                                         kptr = item + k;
601                                         if (k == i ||
602                                                 (item[k].type == VAL &&
603                                                  compareITEM(&kptr, &iptr, operand) == 0))
604                                         {
605                                                 doc[cur].item[doc[cur].nitem] = item + k;
606                                                 doc[cur].nitem++;
607                                                 kptr->istrue = 1;
608                                         }
609                                 }
610                         }
611                         else
612                         {
613                                 doc[cur].needfree = false;
614                                 doc[cur].nitem = doc[cur - 1].nitem;
615                                 doc[cur].item = doc[cur - 1].item;
616                         }
617                         doc[cur].pos = WEP_GETPOS(post[j]);
618                         doc[cur].wclass = WEP_GETWEIGHT(post[j]);
619                         cur++;
620                 }
621         }
622
623         *doclen = cur;
624
625         if (cur > 0)
626         {
627                 if (cur > 1)
628                         qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
629                 return doc;
630         }
631
632         pfree(doc);
633         return NULL;
634 }
635
636 static float4
637 calc_rank_cd(float4 *arrdata, tsvector * txt, QUERYTYPE * query, int method)
638 {
639         DocRepresentation *doc;
640         int                     len,
641                                 i,
642                                 doclen = 0;
643         Extention       ext;
644         double          Wdoc = 0.0;
645         double          invws[lengthof(weights)];
646         double          SumDist = 0.0,
647                                 PrevExtPos = 0.0,
648                                 CurExtPos = 0.0;
649         int                     NExtent = 0;
650
651         for (i = 0; i < lengthof(weights); i++)
652         {
653                 invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
654                 if (invws[i] > 1.0)
655                         ereport(ERROR,
656                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
657                                          errmsg("weight out of range")));
658                 invws[i] = 1.0 / invws[i];
659         }
660
661         doc = get_docrep(txt, query, &doclen);
662         if (!doc)
663                 return 0.0;
664
665         MemSet(&ext, 0, sizeof(Extention));
666         while (Cover(doc, doclen, query, &ext))
667         {
668                 double          Cpos = 0.0;
669                 double          InvSum = 0.0;
670                 int                     nNoise;
671                 DocRepresentation *ptr = ext.begin;
672
673                 while (ptr <= ext.end)
674                 {
675                         InvSum += invws[ptr->wclass];
676                         ptr++;
677                 }
678
679                 Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
680                 /*
681                  * if doc are big enough then ext.q may be equal to ext.p
682                  * due to limit of posional information. In this case we 
683                  * approximate number of noise word as half cover's
684                  * length
685                  */
686                 nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
687                 if ( nNoise < 0 )
688                         nNoise = (ext.end - ext.begin) / 2;
689                 Wdoc += Cpos / ((double) (1 + nNoise));
690
691                 CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
692                 if (NExtent > 0 && CurExtPos > PrevExtPos               /* prevent devision by
693                                                                                                                  * zero in a case of
694                                 multiple lexize */ )
695                         SumDist += 1.0 / (CurExtPos - PrevExtPos);
696
697                 PrevExtPos = CurExtPos;
698                 NExtent++;
699         }
700
701         if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
702                 Wdoc /= log((double) (cnt_length(txt) + 1));
703
704         if (method & RANK_NORM_LENGTH)
705         {
706                 len = cnt_length(txt);
707                 if (len > 0)
708                         Wdoc /= (double) len;
709         }
710
711         if ((method & RANK_NORM_EXTDIST) && SumDist > 0)
712                 Wdoc /= ((double) NExtent) / SumDist;
713
714         if ((method & RANK_NORM_UNIQ) && txt->size > 0)
715                 Wdoc /= (double) (txt->size);
716
717         if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
718                 Wdoc /= log((double) (txt->size + 1)) / log(2.0);
719
720         for (i = 0; i < doclen; i++)
721                 if (doc[i].needfree)
722                         pfree(doc[i].item);
723         pfree(doc);
724
725         return (float4) Wdoc;
726 }
727
728 Datum
729 rank_cd(PG_FUNCTION_ARGS)
730 {
731         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
732         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
733         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(2));
734         int                     method = DEF_NORM_METHOD;
735         float4          res;
736
737         if (ARR_NDIM(win) != 1)
738                 ereport(ERROR,
739                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
740                                  errmsg("array of weight must be one-dimensional")));
741
742         if (ARRNELEMS(win) < lengthof(weights))
743                 ereport(ERROR,
744                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
745                                  errmsg("array of weight is too short")));
746
747         if (ARR_HASNULL(win))
748                 ereport(ERROR,
749                                 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
750                                  errmsg("array of weight must not contain nulls")));
751
752         if (PG_NARGS() == 4)
753                 method = PG_GETARG_INT32(3);
754
755         res = calc_rank_cd((float4 *) ARR_DATA_PTR(win), txt, query, method);
756
757         PG_FREE_IF_COPY(win, 0);
758         PG_FREE_IF_COPY(txt, 1);
759         PG_FREE_IF_COPY(query, 2);
760
761         PG_RETURN_FLOAT4(res);
762 }
763
764
765 Datum
766 rank_cd_def(PG_FUNCTION_ARGS)
767 {
768         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
769         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1));
770         float4          res;
771
772         res = calc_rank_cd(weights, txt, query, (PG_NARGS() == 3) ? PG_GETARG_DATUM(2) : DEF_NORM_METHOD);
773
774         PG_FREE_IF_COPY(txt, 0);
775         PG_FREE_IF_COPY(query, 1);
776
777         PG_RETURN_FLOAT4(res);
778 }
779
780 /**************debug*************/
781
782 typedef struct
783 {
784         char       *w;
785         int2            len;
786         int2            pos;
787         int2            start;
788         int2            finish;
789 }       DocWord;
790
791 static int
792 compareDocWord(const void *a, const void *b)
793 {
794         if (((DocWord *) a)->pos == ((DocWord *) b)->pos)
795                 return 0;
796         return (((DocWord *) a)->pos > ((DocWord *) b)->pos) ? 1 : -1;
797 }
798
799
800 Datum
801 get_covers(PG_FUNCTION_ARGS)
802 {
803         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
804         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1));
805         WordEntry  *pptr = ARRPTR(txt);
806         int                     i,
807                                 dlen = 0,
808                                 j,
809                                 cur = 0,
810                                 len = 0,
811                                 rlen;
812         DocWord    *dw,
813                            *dwptr;
814         text       *out;
815         char       *cptr;
816         DocRepresentation *doc;
817         int                     olddwpos = 0;
818         int                     ncover = 1;
819         Extention       ext;
820
821         doc = get_docrep(txt, query, &rlen);
822
823         if (!doc)
824         {
825                 out = palloc(VARHDRSZ);
826                 VARATT_SIZEP(out) = VARHDRSZ;
827                 PG_FREE_IF_COPY(txt, 0);
828                 PG_FREE_IF_COPY(query, 1);
829                 PG_RETURN_POINTER(out);
830         }
831
832         for (i = 0; i < txt->size; i++)
833         {
834                 if (!pptr[i].haspos)
835                         ereport(ERROR,
836                                         (errcode(ERRCODE_SYNTAX_ERROR),
837                                          errmsg("no pos info")));
838                 dlen += POSDATALEN(txt, &(pptr[i]));
839         }
840
841         dwptr = dw = palloc(sizeof(DocWord) * dlen);
842         memset(dw, 0, sizeof(DocWord) * dlen);
843
844         for (i = 0; i < txt->size; i++)
845         {
846                 WordEntryPos *posdata = POSDATAPTR(txt, &(pptr[i]));
847
848                 for (j = 0; j < POSDATALEN(txt, &(pptr[i])); j++)
849                 {
850                         dw[cur].w = STRPTR(txt) + pptr[i].pos;
851                         dw[cur].len = pptr[i].len;
852                         dw[cur].pos = WEP_GETPOS(posdata[j]);
853                         cur++;
854                 }
855                 len += (pptr[i].len + 1) * (int) POSDATALEN(txt, &(pptr[i]));
856         }
857         qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
858
859         MemSet(&ext, 0, sizeof(Extention));
860         while (Cover(doc, rlen, query, &ext))
861         {
862                 dwptr = dw + olddwpos;
863                 while (dwptr->pos < ext.p && dwptr - dw < dlen)
864                         dwptr++;
865                 olddwpos = dwptr - dw;
866                 dwptr->start = ncover;
867                 while (dwptr->pos < ext.q + 1 && dwptr - dw < dlen)
868                         dwptr++;
869                 (dwptr - 1)->finish = ncover;
870                 len += 4 /* {}+two spaces */ + 2 * 16 /* numbers */ ;
871                 ncover++;
872         }
873
874         out = palloc(VARHDRSZ + len);
875         cptr = ((char *) out) + VARHDRSZ;
876         dwptr = dw;
877
878         while (dwptr - dw < dlen)
879         {
880                 if (dwptr->start)
881                 {
882                         sprintf(cptr, "{%d ", dwptr->start);
883                         cptr = strchr(cptr, '\0');
884                 }
885                 memcpy(cptr, dwptr->w, dwptr->len);
886                 cptr += dwptr->len;
887                 *cptr = ' ';
888                 cptr++;
889                 if (dwptr->finish)
890                 {
891                         sprintf(cptr, "}%d ", dwptr->finish);
892                         cptr = strchr(cptr, '\0');
893                 }
894                 dwptr++;
895         }
896
897         VARATT_SIZEP(out) = cptr - ((char *) out);
898
899         pfree(dw);
900         for (i = 0; i < rlen; i++)
901                 if (doc[i].needfree)
902                         pfree(doc[i].item);
903         pfree(doc);
904
905         PG_FREE_IF_COPY(txt, 0);
906         PG_FREE_IF_COPY(query, 1);
907         PG_RETURN_POINTER(out);
908 }