]> granicus.if.org Git - postgresql/blob - contrib/tsearch2/rank.c
Fix stupid mistake in rank_cd_def cleanup
[postgresql] / contrib / tsearch2 / rank.c
1 /*
2  * Relevation
3  * Teodor Sigaev <teodor@sigaev.ru>
4  */
5 #include "postgres.h"
6
7 #include <math.h>
8
9 #include "access/gist.h"
10 #include "access/itup.h"
11 #include "catalog/namespace.h"
12 #include "commands/trigger.h"
13 #include "executor/spi.h"
14 #include "fmgr.h"
15 #include "funcapi.h"
16 #include "nodes/pg_list.h"
17 #include "storage/bufpage.h"
18 #include "utils/array.h"
19 #include "utils/builtins.h"
20
21 #include "tsvector.h"
22 #include "query.h"
23 #include "common.h"
24
25 PG_FUNCTION_INFO_V1(rank);
26 Datum           rank(PG_FUNCTION_ARGS);
27
28 PG_FUNCTION_INFO_V1(rank_def);
29 Datum           rank_def(PG_FUNCTION_ARGS);
30
31 PG_FUNCTION_INFO_V1(rank_cd);
32 Datum           rank_cd(PG_FUNCTION_ARGS);
33
34 PG_FUNCTION_INFO_V1(rank_cd_def);
35 Datum           rank_cd_def(PG_FUNCTION_ARGS);
36
37 PG_FUNCTION_INFO_V1(get_covers);
38 Datum           get_covers(PG_FUNCTION_ARGS);
39
40 static float weights[] = {0.1, 0.2, 0.4, 1.0};
41
42 #define wpos(wep)       ( w[ WEP_GETWEIGHT(wep) ] )
43
44 #define RANK_NO_NORM            0x00
45 #define RANK_NORM_LOGLENGTH     0x01
46 #define RANK_NORM_LENGTH        0x02
47 #define RANK_NORM_EXTDIST       0x04
48 #define RANK_NORM_UNIQ          0x08
49 #define RANK_NORM_LOGUNIQ       0x10
50 #define DEF_NORM_METHOD         RANK_NO_NORM
51
52 static float calc_rank_or(float *w, tsvector * t, QUERYTYPE * q);
53 static float calc_rank_and(float *w, tsvector * t, QUERYTYPE * q);
54
55 /*
56  * Returns a weight of a word collocation
57  */
58 static float4
59 word_distance(int4 w)
60 {
61         if (w > 100)
62                 return 1e-30;
63
64         return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
65 }
66
67 static int
68 cnt_length(tsvector * t)
69 {
70         WordEntry  *ptr = ARRPTR(t),
71                            *end = (WordEntry *) STRPTR(t);
72         int                     len = 0,
73                                 clen;
74
75         while (ptr < end)
76         {
77                 if ((clen = POSDATALEN(t, ptr)) == 0)
78                         len += 1;
79                 else
80                         len += clen;
81                 ptr++;
82         }
83
84         return len;
85 }
86
87 static int4
88 WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item)
89 {
90         if (ptr->len == item->length)
91                 return strncmp(
92                                            eval + ptr->pos,
93                                            qval + item->distance,
94                                            item->length);
95
96         return (ptr->len > item->length) ? 1 : -1;
97 }
98
99 static WordEntry *
100 find_wordentry(tsvector * t, QUERYTYPE * q, ITEM * item)
101 {
102         WordEntry  *StopLow = ARRPTR(t);
103         WordEntry  *StopHigh = (WordEntry *) STRPTR(t);
104         WordEntry  *StopMiddle;
105         int                     difference;
106
107         /* Loop invariant: StopLow <= item < StopHigh */
108
109         while (StopLow < StopHigh)
110         {
111                 StopMiddle = StopLow + (StopHigh - StopLow) / 2;
112                 difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
113                 if (difference == 0)
114                         return StopMiddle;
115                 else if (difference < 0)
116                         StopLow = StopMiddle + 1;
117                 else
118                         StopHigh = StopMiddle;
119         }
120
121         return NULL;
122 }
123
124
125 static char *SortAndUniqOperand = NULL;
126
127 static int
128 compareITEM(const void *a, const void *b)
129 {
130         if ((*(ITEM **) a)->length == (*(ITEM **) b)->length)
131                 return strncmp(SortAndUniqOperand + (*(ITEM **) a)->distance,
132                                            SortAndUniqOperand + (*(ITEM **) b)->distance,
133                                            (*(ITEM **) b)->length);
134
135         return ((*(ITEM **) a)->length > (*(ITEM **) b)->length) ? 1 : -1;
136 }
137
138 static ITEM **
139 SortAndUniqItems(char *operand, ITEM * item, int *size)
140 {
141         ITEM      **res,
142                           **ptr,
143                           **prevptr;
144
145         ptr = res = (ITEM **) palloc(sizeof(ITEM *) * *size);
146
147         while ((*size)--)
148         {
149                 if (item->type == VAL)
150                 {
151                         *ptr = item;
152                         ptr++;
153                 }
154                 item++;
155         }
156
157         *size = ptr - res;
158         if (*size < 2)
159                 return res;
160
161         SortAndUniqOperand = operand;
162         qsort(res, *size, sizeof(ITEM **), compareITEM);
163
164         ptr = res + 1;
165         prevptr = res;
166
167         while (ptr - res < *size)
168         {
169                 if (compareITEM((void *) ptr, (void *) prevptr) != 0)
170                 {
171                         prevptr++;
172                         *prevptr = *ptr;
173                 }
174                 ptr++;
175         }
176
177         *size = prevptr + 1 - res;
178         return res;
179 }
180
181 static WordEntryPos POSNULL[] = {
182         0,
183         0
184 };
185
186 static float
187 calc_rank_and(float *w, tsvector * t, QUERYTYPE * q)
188 {
189         uint16    **pos;
190         int                     i,
191                                 k,
192                                 l,
193                                 p;
194         WordEntry  *entry;
195         WordEntryPos *post,
196                            *ct;
197         int4            dimt,
198                                 lenct,
199                                 dist;
200         float           res = -1.0;
201         ITEM      **item;
202         int                     size = q->size;
203
204         item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
205         if (size < 2)
206         {
207                 pfree(item);
208                 return calc_rank_or(w, t, q);
209         }
210         pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
211         memset(pos, 0, sizeof(uint16 *) * q->size);
212         *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
213         WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1);
214
215         for (i = 0; i < size; i++)
216         {
217                 entry = find_wordentry(t, q, item[i]);
218                 if (!entry)
219                         continue;
220
221                 if (entry->haspos)
222                         pos[i] = (uint16 *) _POSDATAPTR(t, entry);
223                 else
224                         pos[i] = (uint16 *) POSNULL;
225
226
227                 dimt = *(uint16 *) (pos[i]);
228                 post = (WordEntryPos *) (pos[i] + 1);
229                 for (k = 0; k < i; k++)
230                 {
231                         if (!pos[k])
232                                 continue;
233                         lenct = *(uint16 *) (pos[k]);
234                         ct = (WordEntryPos *) (pos[k] + 1);
235                         for (l = 0; l < dimt; l++)
236                         {
237                                 for (p = 0; p < lenct; p++)
238                                 {
239                                         dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
240                                         if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL)))
241                                         {
242                                                 float           curw;
243
244                                                 if (!dist)
245                                                         dist = MAXENTRYPOS;
246                                                 curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
247                                                 res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
248                                         }
249                                 }
250                         }
251                 }
252         }
253         pfree(pos);
254         pfree(item);
255         return res;
256 }
257
258 static float
259 calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
260 {
261         WordEntry  *entry;
262         WordEntryPos *post;
263         int4            dimt,
264                                 j,
265                                 i;
266         float           res = 0.0;
267         ITEM      **item;
268         int                     size = q->size;
269
270         *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
271         item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
272
273         for (i = 0; i < size; i++)
274         {
275                 float           resj,
276                                         wjm;
277                 int4            jm;
278
279                 entry = find_wordentry(t, q, item[i]);
280                 if (!entry)
281                         continue;
282
283                 if (entry->haspos)
284                 {
285                         dimt = POSDATALEN(t, entry);
286                         post = POSDATAPTR(t, entry);
287                 }
288                 else
289                 {
290                         dimt = *(uint16 *) POSNULL;
291                         post = POSNULL + 1;
292                 }
293
294                 resj = 0.0;
295                 wjm = -1.0;
296                 jm = 0;
297                 for (j = 0; j < dimt; j++)
298                 {
299                         resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
300                         if (wpos(post[j]) > wjm)
301                         {
302                                 wjm = wpos(post[j]);
303                                 jm = j;
304                         }
305                 }
306 /*
307                 limit (sum(i/i^2),i->inf) = pi^2/6
308                 resj = sum(wi/i^2),i=1,noccurence,
309                 wi - should be sorted desc,
310                 don't sort for now, just choose maximum weight. This should be corrected
311                 Oleg Bartunov
312 */
313                 res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
314         }
315         if (size > 0)
316                 res = res / size;
317         pfree(item);
318         return res;
319 }
320
321 static float
322 calc_rank(float *w, tsvector * t, QUERYTYPE * q, int4 method)
323 {
324         ITEM       *item = GETQUERY(q);
325         float           res = 0.0;
326         int                     len;
327
328         if (!t->size || !q->size)
329                 return 0.0;
330
331         res = (item->type != VAL && item->val == (int4) '&') ?
332                 calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
333
334         if (res < 0)
335                 res = 1e-20;
336
337         if ( (method & RANK_NORM_LOGLENGTH) && t->size>0 )
338                 res /= log((double) (cnt_length(t) + 1)) / log(2.0);
339
340         if ( method & RANK_NORM_LENGTH ) {
341                 len = cnt_length(t);
342                 if ( len>0 )
343                         res /= (float) len;
344         }
345
346         if ( (method & RANK_NORM_UNIQ) && t->size > 0 )
347                 res /= (float)( t->size );
348
349         if ( (method & RANK_NORM_LOGUNIQ) && t->size > 0 )
350                 res /= log((double) (t->size + 1)) / log(2.0);
351
352         return res;
353 }
354
355 Datum
356 rank(PG_FUNCTION_ARGS)
357 {
358         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
359         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
360         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
361         int                     method = DEF_NORM_METHOD;
362         float           res = 0.0;
363         float           ws[lengthof(weights)];
364         float4     *arrdata;
365         int                     i;
366
367         if (ARR_NDIM(win) != 1)
368                 ereport(ERROR,
369                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
370                                  errmsg("array of weight must be one-dimensional")));
371
372         if (ARRNELEMS(win) < lengthof(weights))
373                 ereport(ERROR,
374                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
375                                  errmsg("array of weight is too short")));
376
377         if (ARR_HASNULL(win))
378                 ereport(ERROR,
379                                 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
380                                  errmsg("array of weight must not contain nulls")));
381
382         arrdata = (float4 *) ARR_DATA_PTR(win);
383         for (i = 0; i < lengthof(weights); i++)
384         {
385                 ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
386                 if (ws[i] > 1.0)
387                         ereport(ERROR,
388                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
389                                          errmsg("weight out of range")));
390         }
391
392         if (PG_NARGS() == 4)
393                 method = PG_GETARG_INT32(3);
394
395         res = calc_rank(ws, txt, query, method);
396
397         PG_FREE_IF_COPY(win, 0);
398         PG_FREE_IF_COPY(txt, 1);
399         PG_FREE_IF_COPY(query, 2);
400         PG_RETURN_FLOAT4(res);
401 }
402
403 Datum
404 rank_def(PG_FUNCTION_ARGS)
405 {
406         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
407         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
408         float           res = 0.0;
409         int                     method = DEF_NORM_METHOD;
410
411         if (PG_NARGS() == 3)
412                 method = PG_GETARG_INT32(2);
413
414         res = calc_rank(weights, txt, query, method);
415
416         PG_FREE_IF_COPY(txt, 0);
417         PG_FREE_IF_COPY(query, 1);
418         PG_RETURN_FLOAT4(res);
419 }
420
421
422 typedef struct
423 {
424         ITEM      **item;
425         int16           nitem;
426         bool            needfree;
427         uint8           wclass;
428         int32           pos;
429 }       DocRepresentation;
430
431 static int
432 compareDocR(const void *a, const void *b)
433 {
434         if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos)
435                 return 0;
436         return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
437 }
438
439 static bool
440 checkcondition_ITEM(void *checkval, ITEM * val)
441 {
442         return (bool) (val->istrue);
443 }
444
445 static void
446 reset_istrue_flag(QUERYTYPE * query)
447 {
448         ITEM       *item = GETQUERY(query);
449         int                     i;
450
451         /* reset istrue flag */
452         for (i = 0; i < query->size; i++)
453         {
454                 if (item->type == VAL)
455                         item->istrue = 0;
456                 item++;
457         }
458 }
459
460 typedef struct {
461         int     pos;
462         int     p;
463         int     q;
464         DocRepresentation       *begin;
465         DocRepresentation       *end;
466 } Extention;
467
468
469 static bool
470 Cover(DocRepresentation * doc, int len, QUERYTYPE * query, Extention *ext)
471 {
472         DocRepresentation *ptr;
473         int                     lastpos = ext->pos;
474         int                     i;
475         bool            found = false;
476
477         reset_istrue_flag(query);
478
479         ext->p = 0x7fffffff;
480         ext->q = 0;
481         ptr = doc + ext->pos;
482
483         /* find upper bound of cover from current position, move up */
484         while (ptr - doc < len)
485         {
486                 for (i = 0; i < ptr->nitem; i++)
487                         ptr->item[i]->istrue = 1;
488                 if (TS_execute(GETQUERY(query), NULL, false, checkcondition_ITEM))
489                 {
490                         if (ptr->pos > ext->q)
491                         {
492                                 ext->q = ptr->pos;
493                                 ext->end = ptr;
494                                 lastpos = ptr - doc;
495                                 found = true;
496                         }
497                         break;
498                 }
499                 ptr++;
500         }
501
502         if (!found)
503                 return false;
504
505         reset_istrue_flag(query);
506
507         ptr = doc + lastpos;
508
509         /* find lower bound of cover from founded upper bound, move down */
510         while (ptr >= doc)
511         {
512                 for (i = 0; i < ptr->nitem; i++)
513                         ptr->item[i]->istrue = 1;
514                 if (TS_execute(GETQUERY(query), NULL, true, checkcondition_ITEM))
515                 {
516                         if (ptr->pos < ext->p) {
517                                 ext->begin = ptr;
518                                 ext->p = ptr->pos;
519                         }
520                         break;
521                 }
522                 ptr--;
523         }
524
525         if (ext->p <= ext->q)
526         {
527                 /*
528                  * set position for next try to next lexeme after begining of founded
529                  * cover
530                  */
531                 ext->pos = (ptr - doc) + 1;
532                 return true;
533         }
534
535         ext->pos++;
536         return Cover(doc, len, query, ext);
537 }
538
539 static DocRepresentation *
540 get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen)
541 {
542         ITEM       *item = GETQUERY(query);
543         WordEntry  *entry;
544         WordEntryPos *post;
545         int4            dimt,
546                                 j,
547                                 i;
548         int                     len = query->size * 4,
549                                 cur = 0;
550         DocRepresentation *doc;
551
552         *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
553         doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
554         SortAndUniqOperand = GETOPERAND(query);
555         reset_istrue_flag(query);
556
557         for (i = 0; i < query->size; i++)
558         {
559                 if (item[i].type != VAL || item[i].istrue)
560                         continue;
561
562                 entry = find_wordentry(txt, query, &(item[i]));
563                 if (!entry)
564                         continue;
565
566                 if (entry->haspos)
567                 {
568                         dimt = POSDATALEN(txt, entry);
569                         post = POSDATAPTR(txt, entry);
570                 }
571                 else
572                 {
573                         dimt = *(uint16 *) POSNULL;
574                         post = POSNULL + 1;
575                 }
576
577                 while (cur + dimt >= len)
578                 {
579                         len *= 2;
580                         doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
581                 }
582
583                 for (j = 0; j < dimt; j++)
584                 {
585                         if (j == 0)
586                         {
587                                 ITEM       *kptr,
588                                                    *iptr = item + i;
589                                 int                     k;
590
591                                 doc[cur].needfree = false;
592                                 doc[cur].nitem = 0;
593                                 doc[cur].item = (ITEM **) palloc(sizeof(ITEM *) * query->size);
594
595                                 for (k = 0; k < query->size; k++)
596                                 {
597                                         kptr = item + k;
598                                         if (k == i || (item[k].type == VAL && compareITEM(&kptr, &iptr) == 0))
599                                         {
600                                                 doc[cur].item[doc[cur].nitem] = item + k;
601                                                 doc[cur].nitem++;
602                                                 kptr->istrue = 1;
603                                         }
604                                 }
605                         }
606                         else
607                         {
608                                 doc[cur].needfree = false;
609                                 doc[cur].nitem = doc[cur - 1].nitem;
610                                 doc[cur].item = doc[cur - 1].item;
611                         }
612                         doc[cur].pos = WEP_GETPOS(post[j]);
613                         doc[cur].wclass = WEP_GETWEIGHT(post[j]);
614                         cur++;
615                 }
616         }
617
618         *doclen = cur;
619
620         if (cur > 0)
621         {
622                 if (cur > 1)
623                         qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
624                 return doc;
625         }
626
627         pfree(doc);
628         return NULL;
629 }
630
631 static float4
632 calc_rank_cd(float4 *arrdata, tsvector *txt, QUERYTYPE *query, int method) {
633         DocRepresentation *doc;
634         int                     len,
635                                 i,
636                                 doclen = 0;
637         Extention       ext;
638         double          Wdoc = 0.0;
639         double          invws[lengthof(weights)];
640         double          SumDist=0.0, PrevExtPos=0.0, CurExtPos=0.0;
641         int             NExtent=0;
642
643         for (i = 0; i < lengthof(weights); i++)
644         {
645                 invws[i] = ((double)((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
646                 if (invws[i] > 1.0)
647                         ereport(ERROR,
648                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
649                                          errmsg("weight out of range")));
650                 invws[i] = 1.0/invws[i]; 
651         }
652
653         doc = get_docrep(txt, query, &doclen);
654         if (!doc) 
655                 return 0.0;
656
657         MemSet( &ext, 0, sizeof(Extention) );
658         while (Cover(doc, doclen, query, &ext)) {
659                 double  Cpos = 0.0;
660                 double  InvSum = 0.0;
661                 DocRepresentation *ptr = ext.begin;
662
663                 while ( ptr<=ext.end ) {
664                         InvSum += invws[ ptr->wclass ];
665                         ptr++;
666                 }
667
668                 Cpos = ((double)( ext.end-ext.begin+1 )) / InvSum;
669                 Wdoc += Cpos / ( (double)(( 1 + (ext.q - ext.p) - (ext.end - ext.begin) )) ); 
670
671                 CurExtPos = ((double)(ext.q + ext.p))/2.0; 
672                 if ( NExtent>0 && CurExtPos > PrevExtPos /* prevent devision by zero in a case of multiple lexize */ ) 
673                         SumDist += 1.0/( CurExtPos - PrevExtPos );
674
675                 PrevExtPos = CurExtPos;
676                 NExtent++; 
677         }
678
679         if ( (method & RANK_NORM_LOGLENGTH) && txt->size > 0 )
680                 Wdoc /= log((double) (cnt_length(txt) + 1));
681
682         if ( method & RANK_NORM_LENGTH ) {
683                 len = cnt_length(txt);
684                 if ( len>0 )
685                         Wdoc /= (double) len;
686         }
687
688         if ( (method & RANK_NORM_EXTDIST) && SumDist > 0 ) 
689                 Wdoc /= ((double)NExtent) / SumDist;
690
691         if ( (method & RANK_NORM_UNIQ) && txt->size > 0 )
692                 Wdoc /= (double)( txt->size );
693
694         if ( (method & RANK_NORM_LOGUNIQ) && txt->size > 0 )
695                 Wdoc /= log((double) (txt->size + 1)) / log(2.0);
696
697         for (i = 0; i < doclen; i++)
698                 if (doc[i].needfree)
699                         pfree(doc[i].item);
700         pfree(doc);
701
702         return (float4)Wdoc;
703
704
705 Datum
706 rank_cd(PG_FUNCTION_ARGS)
707 {
708         ArrayType *win =  (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
709         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
710         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(2));
711         int                     method = DEF_NORM_METHOD;
712         float4          res;
713
714         if (ARR_NDIM(win) != 1)
715                 ereport(ERROR,
716                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
717                                  errmsg("array of weight must be one-dimensional")));
718
719         if (ARRNELEMS(win) < lengthof(weights))
720                 ereport(ERROR,
721                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
722                                  errmsg("array of weight is too short")));
723
724         if (ARR_HASNULL(win))
725                 ereport(ERROR,
726                                 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
727                                  errmsg("array of weight must not contain nulls")));
728
729         if (PG_NARGS() == 4)
730                 method = PG_GETARG_INT32(3);
731
732         res = calc_rank_cd( (float4 *) ARR_DATA_PTR(win), txt, query, method);
733
734         PG_FREE_IF_COPY(win, 0);
735         PG_FREE_IF_COPY(txt, 1);
736         PG_FREE_IF_COPY(query, 2);
737
738         PG_RETURN_FLOAT4(res);
739 }
740
741
742 Datum
743 rank_cd_def(PG_FUNCTION_ARGS)
744 {
745         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
746         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1));
747         float4 res;
748
749         res = calc_rank_cd( weights, txt, query, (PG_NARGS() == 3) ? PG_GETARG_DATUM(2) : DEF_NORM_METHOD);
750         
751         PG_FREE_IF_COPY(txt, 0);
752         PG_FREE_IF_COPY(query, 1);
753
754         PG_RETURN_FLOAT4(res);
755 }
756
757 /**************debug*************/
758
759 typedef struct
760 {
761         char       *w;
762         int2            len;
763         int2            pos;
764         int2            start;
765         int2            finish;
766 }       DocWord;
767
768 static int
769 compareDocWord(const void *a, const void *b)
770 {
771         if (((DocWord *) a)->pos == ((DocWord *) b)->pos)
772                 return 0;
773         return (((DocWord *) a)->pos > ((DocWord *) b)->pos) ? 1 : -1;
774 }
775
776
777 Datum
778 get_covers(PG_FUNCTION_ARGS)
779 {
780         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
781         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1));
782         WordEntry  *pptr = ARRPTR(txt);
783         int                     i,
784                                 dlen = 0,
785                                 j,
786                                 cur = 0,
787                                 len = 0,
788                                 rlen;
789         DocWord    *dw,
790                            *dwptr;
791         text       *out;
792         char       *cptr;
793         DocRepresentation *doc;
794         int                     olddwpos = 0;
795         int                     ncover = 1;
796         Extention       ext;
797
798         doc = get_docrep(txt, query, &rlen);
799
800         if (!doc)
801         {
802                 out = palloc(VARHDRSZ);
803                 VARATT_SIZEP(out) = VARHDRSZ;
804                 PG_FREE_IF_COPY(txt, 0);
805                 PG_FREE_IF_COPY(query, 1);
806                 PG_RETURN_POINTER(out);
807         }
808
809         for (i = 0; i < txt->size; i++)
810         {
811                 if (!pptr[i].haspos)
812                         ereport(ERROR,
813                                         (errcode(ERRCODE_SYNTAX_ERROR),
814                                          errmsg("no pos info")));
815                 dlen += POSDATALEN(txt, &(pptr[i]));
816         }
817
818         dwptr = dw = palloc(sizeof(DocWord) * dlen);
819         memset(dw, 0, sizeof(DocWord) * dlen);
820
821         for (i = 0; i < txt->size; i++)
822         {
823                 WordEntryPos *posdata = POSDATAPTR(txt, &(pptr[i]));
824
825                 for (j = 0; j < POSDATALEN(txt, &(pptr[i])); j++)
826                 {
827                         dw[cur].w = STRPTR(txt) + pptr[i].pos;
828                         dw[cur].len = pptr[i].len;
829                         dw[cur].pos = WEP_GETPOS(posdata[j]);
830                         cur++;
831                 }
832                 len += (pptr[i].len + 1) * (int) POSDATALEN(txt, &(pptr[i]));
833         }
834         qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
835
836         MemSet( &ext, 0, sizeof(Extention) );
837         while (Cover(doc, rlen, query, &ext))
838         {
839                 dwptr = dw + olddwpos;
840                 while (dwptr->pos < ext.p && dwptr - dw < dlen)
841                         dwptr++;
842                 olddwpos = dwptr - dw;
843                 dwptr->start = ncover;
844                 while (dwptr->pos < ext.q + 1 && dwptr - dw < dlen)
845                         dwptr++;
846                 (dwptr - 1)->finish = ncover;
847                 len += 4 /* {}+two spaces */ + 2 * 16 /* numbers */ ;
848                 ncover++;
849         }
850
851         out = palloc(VARHDRSZ + len);
852         cptr = ((char *) out) + VARHDRSZ;
853         dwptr = dw;
854
855         while (dwptr - dw < dlen)
856         {
857                 if (dwptr->start)
858                 {
859                         sprintf(cptr, "{%d ", dwptr->start);
860                         cptr = strchr(cptr, '\0');
861                 }
862                 memcpy(cptr, dwptr->w, dwptr->len);
863                 cptr += dwptr->len;
864                 *cptr = ' ';
865                 cptr++;
866                 if (dwptr->finish)
867                 {
868                         sprintf(cptr, "}%d ", dwptr->finish);
869                         cptr = strchr(cptr, '\0');
870                 }
871                 dwptr++;
872         }
873
874         VARATT_SIZEP(out) = cptr - ((char *) out);
875
876         pfree(dw);
877         for (i = 0; i < rlen; i++)
878                 if (doc[i].needfree)
879                         pfree(doc[i].item);
880         pfree(doc);
881
882         PG_FREE_IF_COPY(txt, 0);
883         PG_FREE_IF_COPY(query, 1);
884         PG_RETURN_POINTER(out);
885 }