]> granicus.if.org Git - postgresql/blob - contrib/tsearch2/rank.c
pgindent run for 8.2.
[postgresql] / contrib / tsearch2 / rank.c
1 /*
2  * Relevation
3  * Teodor Sigaev <teodor@sigaev.ru>
4  */
5 #include "postgres.h"
6
7 #include <math.h>
8
9 #include "access/gist.h"
10 #include "access/itup.h"
11 #include "catalog/namespace.h"
12 #include "commands/trigger.h"
13 #include "executor/spi.h"
14 #include "fmgr.h"
15 #include "funcapi.h"
16 #include "nodes/pg_list.h"
17 #include "storage/bufpage.h"
18 #include "utils/array.h"
19 #include "utils/builtins.h"
20
21 #include "tsvector.h"
22 #include "query.h"
23 #include "common.h"
24
25 PG_FUNCTION_INFO_V1(rank);
26 Datum           rank(PG_FUNCTION_ARGS);
27
28 PG_FUNCTION_INFO_V1(rank_def);
29 Datum           rank_def(PG_FUNCTION_ARGS);
30
31 PG_FUNCTION_INFO_V1(rank_cd);
32 Datum           rank_cd(PG_FUNCTION_ARGS);
33
34 PG_FUNCTION_INFO_V1(rank_cd_def);
35 Datum           rank_cd_def(PG_FUNCTION_ARGS);
36
37 PG_FUNCTION_INFO_V1(get_covers);
38 Datum           get_covers(PG_FUNCTION_ARGS);
39
40 static float weights[] = {0.1, 0.2, 0.4, 1.0};
41
42 #define wpos(wep)       ( w[ WEP_GETWEIGHT(wep) ] )
43
44 #define RANK_NO_NORM            0x00
45 #define RANK_NORM_LOGLENGTH             0x01
46 #define RANK_NORM_LENGTH        0x02
47 #define RANK_NORM_EXTDIST       0x04
48 #define RANK_NORM_UNIQ          0x08
49 #define RANK_NORM_LOGUNIQ       0x10
50 #define DEF_NORM_METHOD         RANK_NO_NORM
51
52 static float calc_rank_or(float *w, tsvector * t, QUERYTYPE * q);
53 static float calc_rank_and(float *w, tsvector * t, QUERYTYPE * q);
54
55 /*
56  * Returns a weight of a word collocation
57  */
58 static float4
59 word_distance(int4 w)
60 {
61         if (w > 100)
62                 return 1e-30;
63
64         return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
65 }
66
67 static int
68 cnt_length(tsvector * t)
69 {
70         WordEntry  *ptr = ARRPTR(t),
71                            *end = (WordEntry *) STRPTR(t);
72         int                     len = 0,
73                                 clen;
74
75         while (ptr < end)
76         {
77                 if ((clen = POSDATALEN(t, ptr)) == 0)
78                         len += 1;
79                 else
80                         len += clen;
81                 ptr++;
82         }
83
84         return len;
85 }
86
87 static int4
88 WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item)
89 {
90         if (ptr->len == item->length)
91                 return strncmp(
92                                            eval + ptr->pos,
93                                            qval + item->distance,
94                                            item->length);
95
96         return (ptr->len > item->length) ? 1 : -1;
97 }
98
99 static WordEntry *
100 find_wordentry(tsvector * t, QUERYTYPE * q, ITEM * item)
101 {
102         WordEntry  *StopLow = ARRPTR(t);
103         WordEntry  *StopHigh = (WordEntry *) STRPTR(t);
104         WordEntry  *StopMiddle;
105         int                     difference;
106
107         /* Loop invariant: StopLow <= item < StopHigh */
108
109         while (StopLow < StopHigh)
110         {
111                 StopMiddle = StopLow + (StopHigh - StopLow) / 2;
112                 difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
113                 if (difference == 0)
114                         return StopMiddle;
115                 else if (difference < 0)
116                         StopLow = StopMiddle + 1;
117                 else
118                         StopHigh = StopMiddle;
119         }
120
121         return NULL;
122 }
123
124
125 static char *SortAndUniqOperand = NULL;
126
127 static int
128 compareITEM(const void *a, const void *b)
129 {
130         if ((*(ITEM **) a)->length == (*(ITEM **) b)->length)
131                 return strncmp(SortAndUniqOperand + (*(ITEM **) a)->distance,
132                                            SortAndUniqOperand + (*(ITEM **) b)->distance,
133                                            (*(ITEM **) b)->length);
134
135         return ((*(ITEM **) a)->length > (*(ITEM **) b)->length) ? 1 : -1;
136 }
137
138 static ITEM **
139 SortAndUniqItems(char *operand, ITEM * item, int *size)
140 {
141         ITEM      **res,
142                           **ptr,
143                           **prevptr;
144
145         ptr = res = (ITEM **) palloc(sizeof(ITEM *) * *size);
146
147         while ((*size)--)
148         {
149                 if (item->type == VAL)
150                 {
151                         *ptr = item;
152                         ptr++;
153                 }
154                 item++;
155         }
156
157         *size = ptr - res;
158         if (*size < 2)
159                 return res;
160
161         SortAndUniqOperand = operand;
162         qsort(res, *size, sizeof(ITEM **), compareITEM);
163
164         ptr = res + 1;
165         prevptr = res;
166
167         while (ptr - res < *size)
168         {
169                 if (compareITEM((void *) ptr, (void *) prevptr) != 0)
170                 {
171                         prevptr++;
172                         *prevptr = *ptr;
173                 }
174                 ptr++;
175         }
176
177         *size = prevptr + 1 - res;
178         return res;
179 }
180
181 static WordEntryPos POSNULL[] = {
182         0,
183         0
184 };
185
186 static float
187 calc_rank_and(float *w, tsvector * t, QUERYTYPE * q)
188 {
189         uint16    **pos;
190         int                     i,
191                                 k,
192                                 l,
193                                 p;
194         WordEntry  *entry;
195         WordEntryPos *post,
196                            *ct;
197         int4            dimt,
198                                 lenct,
199                                 dist;
200         float           res = -1.0;
201         ITEM      **item;
202         int                     size = q->size;
203
204         item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
205         if (size < 2)
206         {
207                 pfree(item);
208                 return calc_rank_or(w, t, q);
209         }
210         pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
211         memset(pos, 0, sizeof(uint16 *) * q->size);
212         *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
213         WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1);
214
215         for (i = 0; i < size; i++)
216         {
217                 entry = find_wordentry(t, q, item[i]);
218                 if (!entry)
219                         continue;
220
221                 if (entry->haspos)
222                         pos[i] = (uint16 *) _POSDATAPTR(t, entry);
223                 else
224                         pos[i] = (uint16 *) POSNULL;
225
226
227                 dimt = *(uint16 *) (pos[i]);
228                 post = (WordEntryPos *) (pos[i] + 1);
229                 for (k = 0; k < i; k++)
230                 {
231                         if (!pos[k])
232                                 continue;
233                         lenct = *(uint16 *) (pos[k]);
234                         ct = (WordEntryPos *) (pos[k] + 1);
235                         for (l = 0; l < dimt; l++)
236                         {
237                                 for (p = 0; p < lenct; p++)
238                                 {
239                                         dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
240                                         if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL)))
241                                         {
242                                                 float           curw;
243
244                                                 if (!dist)
245                                                         dist = MAXENTRYPOS;
246                                                 curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
247                                                 res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
248                                         }
249                                 }
250                         }
251                 }
252         }
253         pfree(pos);
254         pfree(item);
255         return res;
256 }
257
258 static float
259 calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
260 {
261         WordEntry  *entry;
262         WordEntryPos *post;
263         int4            dimt,
264                                 j,
265                                 i;
266         float           res = 0.0;
267         ITEM      **item;
268         int                     size = q->size;
269
270         *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
271         item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
272
273         for (i = 0; i < size; i++)
274         {
275                 float           resj,
276                                         wjm;
277                 int4            jm;
278
279                 entry = find_wordentry(t, q, item[i]);
280                 if (!entry)
281                         continue;
282
283                 if (entry->haspos)
284                 {
285                         dimt = POSDATALEN(t, entry);
286                         post = POSDATAPTR(t, entry);
287                 }
288                 else
289                 {
290                         dimt = *(uint16 *) POSNULL;
291                         post = POSNULL + 1;
292                 }
293
294                 resj = 0.0;
295                 wjm = -1.0;
296                 jm = 0;
297                 for (j = 0; j < dimt; j++)
298                 {
299                         resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
300                         if (wpos(post[j]) > wjm)
301                         {
302                                 wjm = wpos(post[j]);
303                                 jm = j;
304                         }
305                 }
306 /*
307                 limit (sum(i/i^2),i->inf) = pi^2/6
308                 resj = sum(wi/i^2),i=1,noccurence,
309                 wi - should be sorted desc,
310                 don't sort for now, just choose maximum weight. This should be corrected
311                 Oleg Bartunov
312 */
313                 res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
314         }
315         if (size > 0)
316                 res = res / size;
317         pfree(item);
318         return res;
319 }
320
321 static float
322 calc_rank(float *w, tsvector * t, QUERYTYPE * q, int4 method)
323 {
324         ITEM       *item = GETQUERY(q);
325         float           res = 0.0;
326         int                     len;
327
328         if (!t->size || !q->size)
329                 return 0.0;
330
331         res = (item->type != VAL && item->val == (int4) '&') ?
332                 calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
333
334         if (res < 0)
335                 res = 1e-20;
336
337         if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
338                 res /= log((double) (cnt_length(t) + 1)) / log(2.0);
339
340         if (method & RANK_NORM_LENGTH)
341         {
342                 len = cnt_length(t);
343                 if (len > 0)
344                         res /= (float) len;
345         }
346
347         if ((method & RANK_NORM_UNIQ) && t->size > 0)
348                 res /= (float) (t->size);
349
350         if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
351                 res /= log((double) (t->size + 1)) / log(2.0);
352
353         return res;
354 }
355
356 Datum
357 rank(PG_FUNCTION_ARGS)
358 {
359         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
360         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
361         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
362         int                     method = DEF_NORM_METHOD;
363         float           res = 0.0;
364         float           ws[lengthof(weights)];
365         float4     *arrdata;
366         int                     i;
367
368         if (ARR_NDIM(win) != 1)
369                 ereport(ERROR,
370                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
371                                  errmsg("array of weight must be one-dimensional")));
372
373         if (ARRNELEMS(win) < lengthof(weights))
374                 ereport(ERROR,
375                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
376                                  errmsg("array of weight is too short")));
377
378         if (ARR_HASNULL(win))
379                 ereport(ERROR,
380                                 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
381                                  errmsg("array of weight must not contain nulls")));
382
383         arrdata = (float4 *) ARR_DATA_PTR(win);
384         for (i = 0; i < lengthof(weights); i++)
385         {
386                 ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
387                 if (ws[i] > 1.0)
388                         ereport(ERROR,
389                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
390                                          errmsg("weight out of range")));
391         }
392
393         if (PG_NARGS() == 4)
394                 method = PG_GETARG_INT32(3);
395
396         res = calc_rank(ws, txt, query, method);
397
398         PG_FREE_IF_COPY(win, 0);
399         PG_FREE_IF_COPY(txt, 1);
400         PG_FREE_IF_COPY(query, 2);
401         PG_RETURN_FLOAT4(res);
402 }
403
404 Datum
405 rank_def(PG_FUNCTION_ARGS)
406 {
407         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
408         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
409         float           res = 0.0;
410         int                     method = DEF_NORM_METHOD;
411
412         if (PG_NARGS() == 3)
413                 method = PG_GETARG_INT32(2);
414
415         res = calc_rank(weights, txt, query, method);
416
417         PG_FREE_IF_COPY(txt, 0);
418         PG_FREE_IF_COPY(query, 1);
419         PG_RETURN_FLOAT4(res);
420 }
421
422
423 typedef struct
424 {
425         ITEM      **item;
426         int16           nitem;
427         bool            needfree;
428         uint8           wclass;
429         int32           pos;
430 }       DocRepresentation;
431
432 static int
433 compareDocR(const void *a, const void *b)
434 {
435         if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos)
436                 return 0;
437         return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
438 }
439
440 static bool
441 checkcondition_ITEM(void *checkval, ITEM * val)
442 {
443         return (bool) (val->istrue);
444 }
445
446 static void
447 reset_istrue_flag(QUERYTYPE * query)
448 {
449         ITEM       *item = GETQUERY(query);
450         int                     i;
451
452         /* reset istrue flag */
453         for (i = 0; i < query->size; i++)
454         {
455                 if (item->type == VAL)
456                         item->istrue = 0;
457                 item++;
458         }
459 }
460
461 typedef struct
462 {
463         int                     pos;
464         int                     p;
465         int                     q;
466         DocRepresentation *begin;
467         DocRepresentation *end;
468 }       Extention;
469
470
471 static bool
472 Cover(DocRepresentation * doc, int len, QUERYTYPE * query, Extention * ext)
473 {
474         DocRepresentation *ptr;
475         int                     lastpos = ext->pos;
476         int                     i;
477         bool            found = false;
478
479         reset_istrue_flag(query);
480
481         ext->p = 0x7fffffff;
482         ext->q = 0;
483         ptr = doc + ext->pos;
484
485         /* find upper bound of cover from current position, move up */
486         while (ptr - doc < len)
487         {
488                 for (i = 0; i < ptr->nitem; i++)
489                         ptr->item[i]->istrue = 1;
490                 if (TS_execute(GETQUERY(query), NULL, false, checkcondition_ITEM))
491                 {
492                         if (ptr->pos > ext->q)
493                         {
494                                 ext->q = ptr->pos;
495                                 ext->end = ptr;
496                                 lastpos = ptr - doc;
497                                 found = true;
498                         }
499                         break;
500                 }
501                 ptr++;
502         }
503
504         if (!found)
505                 return false;
506
507         reset_istrue_flag(query);
508
509         ptr = doc + lastpos;
510
511         /* find lower bound of cover from founded upper bound, move down */
512         while (ptr >= doc)
513         {
514                 for (i = 0; i < ptr->nitem; i++)
515                         ptr->item[i]->istrue = 1;
516                 if (TS_execute(GETQUERY(query), NULL, true, checkcondition_ITEM))
517                 {
518                         if (ptr->pos < ext->p)
519                         {
520                                 ext->begin = ptr;
521                                 ext->p = ptr->pos;
522                         }
523                         break;
524                 }
525                 ptr--;
526         }
527
528         if (ext->p <= ext->q)
529         {
530                 /*
531                  * set position for next try to next lexeme after begining of founded
532                  * cover
533                  */
534                 ext->pos = (ptr - doc) + 1;
535                 return true;
536         }
537
538         ext->pos++;
539         return Cover(doc, len, query, ext);
540 }
541
542 static DocRepresentation *
543 get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen)
544 {
545         ITEM       *item = GETQUERY(query);
546         WordEntry  *entry;
547         WordEntryPos *post;
548         int4            dimt,
549                                 j,
550                                 i;
551         int                     len = query->size * 4,
552                                 cur = 0;
553         DocRepresentation *doc;
554
555         *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
556         doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
557         SortAndUniqOperand = GETOPERAND(query);
558         reset_istrue_flag(query);
559
560         for (i = 0; i < query->size; i++)
561         {
562                 if (item[i].type != VAL || item[i].istrue)
563                         continue;
564
565                 entry = find_wordentry(txt, query, &(item[i]));
566                 if (!entry)
567                         continue;
568
569                 if (entry->haspos)
570                 {
571                         dimt = POSDATALEN(txt, entry);
572                         post = POSDATAPTR(txt, entry);
573                 }
574                 else
575                 {
576                         dimt = *(uint16 *) POSNULL;
577                         post = POSNULL + 1;
578                 }
579
580                 while (cur + dimt >= len)
581                 {
582                         len *= 2;
583                         doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
584                 }
585
586                 for (j = 0; j < dimt; j++)
587                 {
588                         if (j == 0)
589                         {
590                                 ITEM       *kptr,
591                                                    *iptr = item + i;
592                                 int                     k;
593
594                                 doc[cur].needfree = false;
595                                 doc[cur].nitem = 0;
596                                 doc[cur].item = (ITEM **) palloc(sizeof(ITEM *) * query->size);
597
598                                 for (k = 0; k < query->size; k++)
599                                 {
600                                         kptr = item + k;
601                                         if (k == i || (item[k].type == VAL && compareITEM(&kptr, &iptr) == 0))
602                                         {
603                                                 doc[cur].item[doc[cur].nitem] = item + k;
604                                                 doc[cur].nitem++;
605                                                 kptr->istrue = 1;
606                                         }
607                                 }
608                         }
609                         else
610                         {
611                                 doc[cur].needfree = false;
612                                 doc[cur].nitem = doc[cur - 1].nitem;
613                                 doc[cur].item = doc[cur - 1].item;
614                         }
615                         doc[cur].pos = WEP_GETPOS(post[j]);
616                         doc[cur].wclass = WEP_GETWEIGHT(post[j]);
617                         cur++;
618                 }
619         }
620
621         *doclen = cur;
622
623         if (cur > 0)
624         {
625                 if (cur > 1)
626                         qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
627                 return doc;
628         }
629
630         pfree(doc);
631         return NULL;
632 }
633
634 static float4
635 calc_rank_cd(float4 *arrdata, tsvector * txt, QUERYTYPE * query, int method)
636 {
637         DocRepresentation *doc;
638         int                     len,
639                                 i,
640                                 doclen = 0;
641         Extention       ext;
642         double          Wdoc = 0.0;
643         double          invws[lengthof(weights)];
644         double          SumDist = 0.0,
645                                 PrevExtPos = 0.0,
646                                 CurExtPos = 0.0;
647         int                     NExtent = 0;
648
649         for (i = 0; i < lengthof(weights); i++)
650         {
651                 invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
652                 if (invws[i] > 1.0)
653                         ereport(ERROR,
654                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
655                                          errmsg("weight out of range")));
656                 invws[i] = 1.0 / invws[i];
657         }
658
659         doc = get_docrep(txt, query, &doclen);
660         if (!doc)
661                 return 0.0;
662
663         MemSet(&ext, 0, sizeof(Extention));
664         while (Cover(doc, doclen, query, &ext))
665         {
666                 double          Cpos = 0.0;
667                 double          InvSum = 0.0;
668                 DocRepresentation *ptr = ext.begin;
669
670                 while (ptr <= ext.end)
671                 {
672                         InvSum += invws[ptr->wclass];
673                         ptr++;
674                 }
675
676                 Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
677                 Wdoc += Cpos / ((double) ((1 + (ext.q - ext.p) - (ext.end - ext.begin))));
678
679                 CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
680                 if (NExtent > 0 && CurExtPos > PrevExtPos               /* prevent devision by
681                                                                                                                  * zero in a case of
682                                 multiple lexize */ )
683                         SumDist += 1.0 / (CurExtPos - PrevExtPos);
684
685                 PrevExtPos = CurExtPos;
686                 NExtent++;
687         }
688
689         if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
690                 Wdoc /= log((double) (cnt_length(txt) + 1));
691
692         if (method & RANK_NORM_LENGTH)
693         {
694                 len = cnt_length(txt);
695                 if (len > 0)
696                         Wdoc /= (double) len;
697         }
698
699         if ((method & RANK_NORM_EXTDIST) && SumDist > 0)
700                 Wdoc /= ((double) NExtent) / SumDist;
701
702         if ((method & RANK_NORM_UNIQ) && txt->size > 0)
703                 Wdoc /= (double) (txt->size);
704
705         if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
706                 Wdoc /= log((double) (txt->size + 1)) / log(2.0);
707
708         for (i = 0; i < doclen; i++)
709                 if (doc[i].needfree)
710                         pfree(doc[i].item);
711         pfree(doc);
712
713         return (float4) Wdoc;
714 }
715
716 Datum
717 rank_cd(PG_FUNCTION_ARGS)
718 {
719         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
720         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
721         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(2));
722         int                     method = DEF_NORM_METHOD;
723         float4          res;
724
725         if (ARR_NDIM(win) != 1)
726                 ereport(ERROR,
727                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
728                                  errmsg("array of weight must be one-dimensional")));
729
730         if (ARRNELEMS(win) < lengthof(weights))
731                 ereport(ERROR,
732                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
733                                  errmsg("array of weight is too short")));
734
735         if (ARR_HASNULL(win))
736                 ereport(ERROR,
737                                 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
738                                  errmsg("array of weight must not contain nulls")));
739
740         if (PG_NARGS() == 4)
741                 method = PG_GETARG_INT32(3);
742
743         res = calc_rank_cd((float4 *) ARR_DATA_PTR(win), txt, query, method);
744
745         PG_FREE_IF_COPY(win, 0);
746         PG_FREE_IF_COPY(txt, 1);
747         PG_FREE_IF_COPY(query, 2);
748
749         PG_RETURN_FLOAT4(res);
750 }
751
752
753 Datum
754 rank_cd_def(PG_FUNCTION_ARGS)
755 {
756         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
757         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1));
758         float4          res;
759
760         res = calc_rank_cd(weights, txt, query, (PG_NARGS() == 3) ? PG_GETARG_DATUM(2) : DEF_NORM_METHOD);
761
762         PG_FREE_IF_COPY(txt, 0);
763         PG_FREE_IF_COPY(query, 1);
764
765         PG_RETURN_FLOAT4(res);
766 }
767
768 /**************debug*************/
769
770 typedef struct
771 {
772         char       *w;
773         int2            len;
774         int2            pos;
775         int2            start;
776         int2            finish;
777 }       DocWord;
778
779 static int
780 compareDocWord(const void *a, const void *b)
781 {
782         if (((DocWord *) a)->pos == ((DocWord *) b)->pos)
783                 return 0;
784         return (((DocWord *) a)->pos > ((DocWord *) b)->pos) ? 1 : -1;
785 }
786
787
788 Datum
789 get_covers(PG_FUNCTION_ARGS)
790 {
791         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
792         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1));
793         WordEntry  *pptr = ARRPTR(txt);
794         int                     i,
795                                 dlen = 0,
796                                 j,
797                                 cur = 0,
798                                 len = 0,
799                                 rlen;
800         DocWord    *dw,
801                            *dwptr;
802         text       *out;
803         char       *cptr;
804         DocRepresentation *doc;
805         int                     olddwpos = 0;
806         int                     ncover = 1;
807         Extention       ext;
808
809         doc = get_docrep(txt, query, &rlen);
810
811         if (!doc)
812         {
813                 out = palloc(VARHDRSZ);
814                 VARATT_SIZEP(out) = VARHDRSZ;
815                 PG_FREE_IF_COPY(txt, 0);
816                 PG_FREE_IF_COPY(query, 1);
817                 PG_RETURN_POINTER(out);
818         }
819
820         for (i = 0; i < txt->size; i++)
821         {
822                 if (!pptr[i].haspos)
823                         ereport(ERROR,
824                                         (errcode(ERRCODE_SYNTAX_ERROR),
825                                          errmsg("no pos info")));
826                 dlen += POSDATALEN(txt, &(pptr[i]));
827         }
828
829         dwptr = dw = palloc(sizeof(DocWord) * dlen);
830         memset(dw, 0, sizeof(DocWord) * dlen);
831
832         for (i = 0; i < txt->size; i++)
833         {
834                 WordEntryPos *posdata = POSDATAPTR(txt, &(pptr[i]));
835
836                 for (j = 0; j < POSDATALEN(txt, &(pptr[i])); j++)
837                 {
838                         dw[cur].w = STRPTR(txt) + pptr[i].pos;
839                         dw[cur].len = pptr[i].len;
840                         dw[cur].pos = WEP_GETPOS(posdata[j]);
841                         cur++;
842                 }
843                 len += (pptr[i].len + 1) * (int) POSDATALEN(txt, &(pptr[i]));
844         }
845         qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
846
847         MemSet(&ext, 0, sizeof(Extention));
848         while (Cover(doc, rlen, query, &ext))
849         {
850                 dwptr = dw + olddwpos;
851                 while (dwptr->pos < ext.p && dwptr - dw < dlen)
852                         dwptr++;
853                 olddwpos = dwptr - dw;
854                 dwptr->start = ncover;
855                 while (dwptr->pos < ext.q + 1 && dwptr - dw < dlen)
856                         dwptr++;
857                 (dwptr - 1)->finish = ncover;
858                 len += 4 /* {}+two spaces */ + 2 * 16 /* numbers */ ;
859                 ncover++;
860         }
861
862         out = palloc(VARHDRSZ + len);
863         cptr = ((char *) out) + VARHDRSZ;
864         dwptr = dw;
865
866         while (dwptr - dw < dlen)
867         {
868                 if (dwptr->start)
869                 {
870                         sprintf(cptr, "{%d ", dwptr->start);
871                         cptr = strchr(cptr, '\0');
872                 }
873                 memcpy(cptr, dwptr->w, dwptr->len);
874                 cptr += dwptr->len;
875                 *cptr = ' ';
876                 cptr++;
877                 if (dwptr->finish)
878                 {
879                         sprintf(cptr, "}%d ", dwptr->finish);
880                         cptr = strchr(cptr, '\0');
881                 }
882                 dwptr++;
883         }
884
885         VARATT_SIZEP(out) = cptr - ((char *) out);
886
887         pfree(dw);
888         for (i = 0; i < rlen; i++)
889                 if (doc[i].needfree)
890                         pfree(doc[i].item);
891         pfree(doc);
892
893         PG_FREE_IF_COPY(txt, 0);
894         PG_FREE_IF_COPY(query, 1);
895         PG_RETURN_POINTER(out);
896 }