]> granicus.if.org Git - postgresql/blob - contrib/tsearch2/rank.c
Squelch some VC++ compiler warnings. Mark float literals with the "f"
[postgresql] / contrib / tsearch2 / rank.c
1 /*
2  * Relevation
3  * Teodor Sigaev <teodor@sigaev.ru>
4  */
5 #include "postgres.h"
6
7 #include <math.h>
8
9 #include "access/gist.h"
10 #include "access/itup.h"
11 #include "catalog/namespace.h"
12 #include "commands/trigger.h"
13 #include "executor/spi.h"
14 #include "fmgr.h"
15 #include "funcapi.h"
16 #include "nodes/pg_list.h"
17 #include "storage/bufpage.h"
18 #include "utils/array.h"
19 #include "utils/builtins.h"
20
21 #include "tsvector.h"
22 #include "query.h"
23 #include "common.h"
24
25 PG_FUNCTION_INFO_V1(rank);
26 Datum           rank(PG_FUNCTION_ARGS);
27
28 PG_FUNCTION_INFO_V1(rank_def);
29 Datum           rank_def(PG_FUNCTION_ARGS);
30
31 PG_FUNCTION_INFO_V1(rank_cd);
32 Datum           rank_cd(PG_FUNCTION_ARGS);
33
34 PG_FUNCTION_INFO_V1(rank_cd_def);
35 Datum           rank_cd_def(PG_FUNCTION_ARGS);
36
37 PG_FUNCTION_INFO_V1(get_covers);
38 Datum           get_covers(PG_FUNCTION_ARGS);
39
40 static float weights[] = {0.1f, 0.2f, 0.4f, 1.0f};
41
42 #define wpos(wep)       ( w[ WEP_GETWEIGHT(wep) ] )
43
44 #define RANK_NO_NORM            0x00
45 #define RANK_NORM_LOGLENGTH             0x01
46 #define RANK_NORM_LENGTH        0x02
47 #define RANK_NORM_EXTDIST       0x04
48 #define RANK_NORM_UNIQ          0x08
49 #define RANK_NORM_LOGUNIQ       0x10
50 #define DEF_NORM_METHOD         RANK_NO_NORM
51
52 static float calc_rank_or(float *w, tsvector * t, QUERYTYPE * q);
53 static float calc_rank_and(float *w, tsvector * t, QUERYTYPE * q);
54
55 /*
56  * Returns a weight of a word collocation
57  */
58 static float4
59 word_distance(int4 w)
60 {
61         if (w > 100)
62                 return (float4)1e-30;
63
64         return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
65 }
66
67 static int
68 cnt_length(tsvector * t)
69 {
70         WordEntry  *ptr = ARRPTR(t),
71                            *end = (WordEntry *) STRPTR(t);
72         int                     len = 0,
73                                 clen;
74
75         while (ptr < end)
76         {
77                 if ((clen = POSDATALEN(t, ptr)) == 0)
78                         len += 1;
79                 else
80                         len += clen;
81                 ptr++;
82         }
83
84         return len;
85 }
86
87 static int4
88 WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item)
89 {
90         if (ptr->len == item->length)
91                 return strncmp(
92                                            eval + ptr->pos,
93                                            qval + item->distance,
94                                            item->length);
95
96         return (ptr->len > item->length) ? 1 : -1;
97 }
98
99 static WordEntry *
100 find_wordentry(tsvector * t, QUERYTYPE * q, ITEM * item)
101 {
102         WordEntry  *StopLow = ARRPTR(t);
103         WordEntry  *StopHigh = (WordEntry *) STRPTR(t);
104         WordEntry  *StopMiddle;
105         int                     difference;
106
107         /* Loop invariant: StopLow <= item < StopHigh */
108
109         while (StopLow < StopHigh)
110         {
111                 StopMiddle = StopLow + (StopHigh - StopLow) / 2;
112                 difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
113                 if (difference == 0)
114                         return StopMiddle;
115                 else if (difference < 0)
116                         StopLow = StopMiddle + 1;
117                 else
118                         StopHigh = StopMiddle;
119         }
120
121         return NULL;
122 }
123
124
125 static int
126 compareITEM(const void *a, const void *b, void *arg)
127 {
128         char *operand = (char *) arg;
129
130         if ((*(ITEM **) a)->length == (*(ITEM **) b)->length)
131                 return strncmp(operand + (*(ITEM **) a)->distance,
132                                            operand + (*(ITEM **) b)->distance,
133                                            (*(ITEM **) b)->length);
134
135         return ((*(ITEM **) a)->length > (*(ITEM **) b)->length) ? 1 : -1;
136 }
137
138 static ITEM **
139 SortAndUniqItems(char *operand, ITEM * item, int *size)
140 {
141         ITEM      **res,
142                           **ptr,
143                           **prevptr;
144
145         ptr = res = (ITEM **) palloc(sizeof(ITEM *) * *size);
146
147         while ((*size)--)
148         {
149                 if (item->type == VAL)
150                 {
151                         *ptr = item;
152                         ptr++;
153                 }
154                 item++;
155         }
156
157         *size = ptr - res;
158         if (*size < 2)
159                 return res;
160
161         qsort_arg(res, *size, sizeof(ITEM **), compareITEM, (void *) operand);
162
163         ptr = res + 1;
164         prevptr = res;
165
166         while (ptr - res < *size)
167         {
168                 if (compareITEM((void *) ptr, (void *) prevptr, (void *) operand) != 0)
169                 {
170                         prevptr++;
171                         *prevptr = *ptr;
172                 }
173                 ptr++;
174         }
175
176         *size = prevptr + 1 - res;
177         return res;
178 }
179
180 static WordEntryPos POSNULL[] = {
181         0,
182         0
183 };
184
185 static float
186 calc_rank_and(float *w, tsvector * t, QUERYTYPE * q)
187 {
188         uint16    **pos;
189         int                     i,
190                                 k,
191                                 l,
192                                 p;
193         WordEntry  *entry;
194         WordEntryPos *post,
195                            *ct;
196         int4            dimt,
197                                 lenct,
198                                 dist;
199         float           res = -1.0;
200         ITEM      **item;
201         int                     size = q->size;
202
203         item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
204         if (size < 2)
205         {
206                 pfree(item);
207                 return calc_rank_or(w, t, q);
208         }
209         pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
210         memset(pos, 0, sizeof(uint16 *) * q->size);
211         *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
212         WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1);
213
214         for (i = 0; i < size; i++)
215         {
216                 entry = find_wordentry(t, q, item[i]);
217                 if (!entry)
218                         continue;
219
220                 if (entry->haspos)
221                         pos[i] = (uint16 *) _POSDATAPTR(t, entry);
222                 else
223                         pos[i] = (uint16 *) POSNULL;
224
225
226                 dimt = *(uint16 *) (pos[i]);
227                 post = (WordEntryPos *) (pos[i] + 1);
228                 for (k = 0; k < i; k++)
229                 {
230                         if (!pos[k])
231                                 continue;
232                         lenct = *(uint16 *) (pos[k]);
233                         ct = (WordEntryPos *) (pos[k] + 1);
234                         for (l = 0; l < dimt; l++)
235                         {
236                                 for (p = 0; p < lenct; p++)
237                                 {
238                                         dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
239                                         if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL)))
240                                         {
241                                                 float           curw;
242
243                                                 if (!dist)
244                                                         dist = MAXENTRYPOS;
245                                                 curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
246                                                 res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
247                                         }
248                                 }
249                         }
250                 }
251         }
252         pfree(pos);
253         pfree(item);
254         return res;
255 }
256
257 static float
258 calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
259 {
260         WordEntry  *entry;
261         WordEntryPos *post;
262         int4            dimt,
263                                 j,
264                                 i;
265         float           res = 0.0;
266         ITEM      **item;
267         int                     size = q->size;
268
269         *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
270         item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
271
272         for (i = 0; i < size; i++)
273         {
274                 float           resj,
275                                         wjm;
276                 int4            jm;
277
278                 entry = find_wordentry(t, q, item[i]);
279                 if (!entry)
280                         continue;
281
282                 if (entry->haspos)
283                 {
284                         dimt = POSDATALEN(t, entry);
285                         post = POSDATAPTR(t, entry);
286                 }
287                 else
288                 {
289                         dimt = *(uint16 *) POSNULL;
290                         post = POSNULL + 1;
291                 }
292
293                 resj = 0.0;
294                 wjm = -1.0;
295                 jm = 0;
296                 for (j = 0; j < dimt; j++)
297                 {
298                         resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
299                         if (wpos(post[j]) > wjm)
300                         {
301                                 wjm = wpos(post[j]);
302                                 jm = j;
303                         }
304                 }
305 /*
306                 limit (sum(i/i^2),i->inf) = pi^2/6
307                 resj = sum(wi/i^2),i=1,noccurence,
308                 wi - should be sorted desc,
309                 don't sort for now, just choose maximum weight. This should be corrected
310                 Oleg Bartunov
311 */
312                 res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
313         }
314         if (size > 0)
315                 res = res / size;
316         pfree(item);
317         return res;
318 }
319
320 static float
321 calc_rank(float *w, tsvector * t, QUERYTYPE * q, int4 method)
322 {
323         ITEM       *item = GETQUERY(q);
324         float           res = 0.0;
325         int                     len;
326
327         if (!t->size || !q->size)
328                 return 0.0;
329
330         res = (item->type != VAL && item->val == (int4) '&') ?
331                 calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
332
333         if (res < 0)
334                 res = (float)1e-20;
335
336         if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
337                 res /= log((double) (cnt_length(t) + 1)) / log(2.0);
338
339         if (method & RANK_NORM_LENGTH)
340         {
341                 len = cnt_length(t);
342                 if (len > 0)
343                         res /= (float) len;
344         }
345
346         if ((method & RANK_NORM_UNIQ) && t->size > 0)
347                 res /= (float) (t->size);
348
349         if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
350                 res /= log((double) (t->size + 1)) / log(2.0);
351
352         return res;
353 }
354
355 Datum
356 rank(PG_FUNCTION_ARGS)
357 {
358         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
359         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
360         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
361         int                     method = DEF_NORM_METHOD;
362         float           res = 0.0;
363         float           ws[lengthof(weights)];
364         float4     *arrdata;
365         int                     i;
366
367         if (ARR_NDIM(win) != 1)
368                 ereport(ERROR,
369                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
370                                  errmsg("array of weight must be one-dimensional")));
371
372         if (ARRNELEMS(win) < lengthof(weights))
373                 ereport(ERROR,
374                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
375                                  errmsg("array of weight is too short")));
376
377         if (ARR_HASNULL(win))
378                 ereport(ERROR,
379                                 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
380                                  errmsg("array of weight must not contain nulls")));
381
382         arrdata = (float4 *) ARR_DATA_PTR(win);
383         for (i = 0; i < lengthof(weights); i++)
384         {
385                 ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
386                 if (ws[i] > 1.0)
387                         ereport(ERROR,
388                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
389                                          errmsg("weight out of range")));
390         }
391
392         if (PG_NARGS() == 4)
393                 method = PG_GETARG_INT32(3);
394
395         res = calc_rank(ws, txt, query, method);
396
397         PG_FREE_IF_COPY(win, 0);
398         PG_FREE_IF_COPY(txt, 1);
399         PG_FREE_IF_COPY(query, 2);
400         PG_RETURN_FLOAT4(res);
401 }
402
403 Datum
404 rank_def(PG_FUNCTION_ARGS)
405 {
406         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
407         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
408         float           res = 0.0;
409         int                     method = DEF_NORM_METHOD;
410
411         if (PG_NARGS() == 3)
412                 method = PG_GETARG_INT32(2);
413
414         res = calc_rank(weights, txt, query, method);
415
416         PG_FREE_IF_COPY(txt, 0);
417         PG_FREE_IF_COPY(query, 1);
418         PG_RETURN_FLOAT4(res);
419 }
420
421
422 typedef struct
423 {
424         ITEM      **item;
425         int16           nitem;
426         bool            needfree;
427         uint8           wclass;
428         int32           pos;
429 }       DocRepresentation;
430
431 static int
432 compareDocR(const void *a, const void *b)
433 {
434         if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos)
435                 return 0;
436         return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
437 }
438
439 static bool
440 checkcondition_ITEM(void *checkval, ITEM * val)
441 {
442         return (bool) (val->istrue);
443 }
444
445 static void
446 reset_istrue_flag(QUERYTYPE * query)
447 {
448         ITEM       *item = GETQUERY(query);
449         int                     i;
450
451         /* reset istrue flag */
452         for (i = 0; i < query->size; i++)
453         {
454                 if (item->type == VAL)
455                         item->istrue = 0;
456                 item++;
457         }
458 }
459
460 typedef struct
461 {
462         int                     pos;
463         int                     p;
464         int                     q;
465         DocRepresentation *begin;
466         DocRepresentation *end;
467 }       Extention;
468
469
470 static bool
471 Cover(DocRepresentation * doc, int len, QUERYTYPE * query, Extention * ext)
472 {
473         DocRepresentation *ptr;
474         int                     lastpos = ext->pos;
475         int                     i;
476         bool            found = false;
477
478         reset_istrue_flag(query);
479
480         ext->p = 0x7fffffff;
481         ext->q = 0;
482         ptr = doc + ext->pos;
483
484         /* find upper bound of cover from current position, move up */
485         while (ptr - doc < len)
486         {
487                 for (i = 0; i < ptr->nitem; i++)
488                         ptr->item[i]->istrue = 1;
489                 if (TS_execute(GETQUERY(query), NULL, false, checkcondition_ITEM))
490                 {
491                         if (ptr->pos > ext->q)
492                         {
493                                 ext->q = ptr->pos;
494                                 ext->end = ptr;
495                                 lastpos = ptr - doc;
496                                 found = true;
497                         }
498                         break;
499                 }
500                 ptr++;
501         }
502
503         if (!found)
504                 return false;
505
506         reset_istrue_flag(query);
507
508         ptr = doc + lastpos;
509
510         /* find lower bound of cover from founded upper bound, move down */
511         while (ptr >= doc)
512         {
513                 for (i = 0; i < ptr->nitem; i++)
514                         ptr->item[i]->istrue = 1;
515                 if (TS_execute(GETQUERY(query), NULL, true, checkcondition_ITEM))
516                 {
517                         if (ptr->pos < ext->p)
518                         {
519                                 ext->begin = ptr;
520                                 ext->p = ptr->pos;
521                         }
522                         break;
523                 }
524                 ptr--;
525         }
526
527         if (ext->p <= ext->q)
528         {
529                 /*
530                  * set position for next try to next lexeme after begining of founded
531                  * cover
532                  */
533                 ext->pos = (ptr - doc) + 1;
534                 return true;
535         }
536
537         ext->pos++;
538         return Cover(doc, len, query, ext);
539 }
540
541 static DocRepresentation *
542 get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen)
543 {
544         ITEM       *item = GETQUERY(query);
545         WordEntry  *entry;
546         WordEntryPos *post;
547         int4            dimt,
548                                 j,
549                                 i;
550         int                     len = query->size * 4,
551                                 cur = 0;
552         DocRepresentation *doc;
553         char       *operand;
554
555         *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
556         doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
557         operand = GETOPERAND(query);
558         reset_istrue_flag(query);
559
560         for (i = 0; i < query->size; i++)
561         {
562                 if (item[i].type != VAL || item[i].istrue)
563                         continue;
564
565                 entry = find_wordentry(txt, query, &(item[i]));
566                 if (!entry)
567                         continue;
568
569                 if (entry->haspos)
570                 {
571                         dimt = POSDATALEN(txt, entry);
572                         post = POSDATAPTR(txt, entry);
573                 }
574                 else
575                 {
576                         dimt = *(uint16 *) POSNULL;
577                         post = POSNULL + 1;
578                 }
579
580                 while (cur + dimt >= len)
581                 {
582                         len *= 2;
583                         doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
584                 }
585
586                 for (j = 0; j < dimt; j++)
587                 {
588                         if (j == 0)
589                         {
590                                 ITEM       *kptr,
591                                                    *iptr = item + i;
592                                 int                     k;
593
594                                 doc[cur].needfree = false;
595                                 doc[cur].nitem = 0;
596                                 doc[cur].item = (ITEM **) palloc(sizeof(ITEM *) * query->size);
597
598                                 for (k = 0; k < query->size; k++)
599                                 {
600                                         kptr = item + k;
601                                         if (k == i ||
602                                                 (item[k].type == VAL &&
603                                                  compareITEM(&kptr, &iptr, operand) == 0))
604                                         {
605                                                 doc[cur].item[doc[cur].nitem] = item + k;
606                                                 doc[cur].nitem++;
607                                                 kptr->istrue = 1;
608                                         }
609                                 }
610                         }
611                         else
612                         {
613                                 doc[cur].needfree = false;
614                                 doc[cur].nitem = doc[cur - 1].nitem;
615                                 doc[cur].item = doc[cur - 1].item;
616                         }
617                         doc[cur].pos = WEP_GETPOS(post[j]);
618                         doc[cur].wclass = WEP_GETWEIGHT(post[j]);
619                         cur++;
620                 }
621         }
622
623         *doclen = cur;
624
625         if (cur > 0)
626         {
627                 if (cur > 1)
628                         qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
629                 return doc;
630         }
631
632         pfree(doc);
633         return NULL;
634 }
635
636 static float4
637 calc_rank_cd(float4 *arrdata, tsvector * txt, QUERYTYPE * query, int method)
638 {
639         DocRepresentation *doc;
640         int                     len,
641                                 i,
642                                 doclen = 0;
643         Extention       ext;
644         double          Wdoc = 0.0;
645         double          invws[lengthof(weights)];
646         double          SumDist = 0.0,
647                                 PrevExtPos = 0.0,
648                                 CurExtPos = 0.0;
649         int                     NExtent = 0;
650
651         for (i = 0; i < lengthof(weights); i++)
652         {
653                 invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
654                 if (invws[i] > 1.0)
655                         ereport(ERROR,
656                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
657                                          errmsg("weight out of range")));
658                 invws[i] = 1.0 / invws[i];
659         }
660
661         doc = get_docrep(txt, query, &doclen);
662         if (!doc)
663                 return 0.0;
664
665         MemSet(&ext, 0, sizeof(Extention));
666         while (Cover(doc, doclen, query, &ext))
667         {
668                 double          Cpos = 0.0;
669                 double          InvSum = 0.0;
670                 int                     nNoise;
671                 DocRepresentation *ptr = ext.begin;
672
673                 while (ptr <= ext.end)
674                 {
675                         InvSum += invws[ptr->wclass];
676                         ptr++;
677                 }
678
679                 Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
680                 /*
681                  * if doc are big enough then ext.q may be equal to ext.p
682                  * due to limit of posional information. In this case we 
683                  * approximate number of noise word as half cover's
684                  * length
685                  */
686                 nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
687                 if ( nNoise < 0 )
688                         nNoise = (ext.end - ext.begin) / 2;
689                 Wdoc += Cpos / ((double) (1 + nNoise));
690
691                 CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
692                 if (NExtent > 0 && CurExtPos > PrevExtPos               /* prevent devision by
693                                                                                                                  * zero in a case of
694                                 multiple lexize */ )
695                         SumDist += 1.0 / (CurExtPos - PrevExtPos);
696
697                 PrevExtPos = CurExtPos;
698                 NExtent++;
699         }
700
701         if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
702                 Wdoc /= log((double) (cnt_length(txt) + 1));
703
704         if (method & RANK_NORM_LENGTH)
705         {
706                 len = cnt_length(txt);
707                 if (len > 0)
708                         Wdoc /= (double) len;
709         }
710
711         if ((method & RANK_NORM_EXTDIST) && SumDist > 0)
712                 Wdoc /= ((double) NExtent) / SumDist;
713
714         if ((method & RANK_NORM_UNIQ) && txt->size > 0)
715                 Wdoc /= (double) (txt->size);
716
717         if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
718                 Wdoc /= log((double) (txt->size + 1)) / log(2.0);
719
720         for (i = 0; i < doclen; i++)
721                 if (doc[i].needfree)
722                         pfree(doc[i].item);
723         pfree(doc);
724
725         return (float4) Wdoc;
726 }
727
728 Datum
729 rank_cd(PG_FUNCTION_ARGS)
730 {
731         ArrayType  *win;
732         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
733         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(2));
734         int                     method = DEF_NORM_METHOD;
735         float4          res;
736
737         /*
738          * Pre-8.2, rank_cd took just a plain int as its first argument.
739          * It was a mistake to keep the same C function name while changing the
740          * signature, but it's too late to fix that.  Instead, do a runtime test
741          * to make sure the expected datatype has been passed.  This is needed
742          * to prevent core dumps if tsearch2 function definitions from an old
743          * database are loaded into an 8.2 server.
744          */
745         if (get_fn_expr_argtype(fcinfo->flinfo, 0) != FLOAT4ARRAYOID)
746                 ereport(ERROR,
747                                 (errcode(ERRCODE_INVALID_FUNCTION_DEFINITION),
748                                  errmsg("rank_cd() now takes real[] as its first argument, not integer")));
749
750         /* now safe to dereference the first arg */
751         win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
752
753         if (ARR_NDIM(win) != 1)
754                 ereport(ERROR,
755                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
756                                  errmsg("array of weight must be one-dimensional")));
757
758         if (ARRNELEMS(win) < lengthof(weights))
759                 ereport(ERROR,
760                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
761                                  errmsg("array of weight is too short")));
762
763         if (ARR_HASNULL(win))
764                 ereport(ERROR,
765                                 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
766                                  errmsg("array of weight must not contain nulls")));
767
768         if (PG_NARGS() == 4)
769                 method = PG_GETARG_INT32(3);
770
771         res = calc_rank_cd((float4 *) ARR_DATA_PTR(win), txt, query, method);
772
773         PG_FREE_IF_COPY(win, 0);
774         PG_FREE_IF_COPY(txt, 1);
775         PG_FREE_IF_COPY(query, 2);
776
777         PG_RETURN_FLOAT4(res);
778 }
779
780
781 Datum
782 rank_cd_def(PG_FUNCTION_ARGS)
783 {
784         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
785         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1));
786         float4          res;
787
788         res = calc_rank_cd(weights, txt, query, (PG_NARGS() == 3) ? PG_GETARG_DATUM(2) : DEF_NORM_METHOD);
789
790         PG_FREE_IF_COPY(txt, 0);
791         PG_FREE_IF_COPY(query, 1);
792
793         PG_RETURN_FLOAT4(res);
794 }
795
796 /**************debug*************/
797
798 typedef struct
799 {
800         char       *w;
801         int2            len;
802         int2            pos;
803         int2            start;
804         int2            finish;
805 }       DocWord;
806
807 static int
808 compareDocWord(const void *a, const void *b)
809 {
810         if (((DocWord *) a)->pos == ((DocWord *) b)->pos)
811                 return 0;
812         return (((DocWord *) a)->pos > ((DocWord *) b)->pos) ? 1 : -1;
813 }
814
815
816 Datum
817 get_covers(PG_FUNCTION_ARGS)
818 {
819         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
820         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1));
821         WordEntry  *pptr = ARRPTR(txt);
822         int                     i,
823                                 dlen = 0,
824                                 j,
825                                 cur = 0,
826                                 len = 0,
827                                 rlen;
828         DocWord    *dw,
829                            *dwptr;
830         text       *out;
831         char       *cptr;
832         DocRepresentation *doc;
833         int                     olddwpos = 0;
834         int                     ncover = 1;
835         Extention       ext;
836
837         doc = get_docrep(txt, query, &rlen);
838
839         if (!doc)
840         {
841                 out = palloc(VARHDRSZ);
842                 VARATT_SIZEP(out) = VARHDRSZ;
843                 PG_FREE_IF_COPY(txt, 0);
844                 PG_FREE_IF_COPY(query, 1);
845                 PG_RETURN_POINTER(out);
846         }
847
848         for (i = 0; i < txt->size; i++)
849         {
850                 if (!pptr[i].haspos)
851                         ereport(ERROR,
852                                         (errcode(ERRCODE_SYNTAX_ERROR),
853                                          errmsg("no pos info")));
854                 dlen += POSDATALEN(txt, &(pptr[i]));
855         }
856
857         dwptr = dw = palloc(sizeof(DocWord) * dlen);
858         memset(dw, 0, sizeof(DocWord) * dlen);
859
860         for (i = 0; i < txt->size; i++)
861         {
862                 WordEntryPos *posdata = POSDATAPTR(txt, &(pptr[i]));
863
864                 for (j = 0; j < POSDATALEN(txt, &(pptr[i])); j++)
865                 {
866                         dw[cur].w = STRPTR(txt) + pptr[i].pos;
867                         dw[cur].len = pptr[i].len;
868                         dw[cur].pos = WEP_GETPOS(posdata[j]);
869                         cur++;
870                 }
871                 len += (pptr[i].len + 1) * (int) POSDATALEN(txt, &(pptr[i]));
872         }
873         qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
874
875         MemSet(&ext, 0, sizeof(Extention));
876         while (Cover(doc, rlen, query, &ext))
877         {
878                 dwptr = dw + olddwpos;
879                 while (dwptr->pos < ext.p && dwptr - dw < dlen)
880                         dwptr++;
881                 olddwpos = dwptr - dw;
882                 dwptr->start = ncover;
883                 while (dwptr->pos < ext.q + 1 && dwptr - dw < dlen)
884                         dwptr++;
885                 (dwptr - 1)->finish = ncover;
886                 len += 4 /* {}+two spaces */ + 2 * 16 /* numbers */ ;
887                 ncover++;
888         }
889
890         out = palloc(VARHDRSZ + len);
891         cptr = ((char *) out) + VARHDRSZ;
892         dwptr = dw;
893
894         while (dwptr - dw < dlen)
895         {
896                 if (dwptr->start)
897                 {
898                         sprintf(cptr, "{%d ", dwptr->start);
899                         cptr = strchr(cptr, '\0');
900                 }
901                 memcpy(cptr, dwptr->w, dwptr->len);
902                 cptr += dwptr->len;
903                 *cptr = ' ';
904                 cptr++;
905                 if (dwptr->finish)
906                 {
907                         sprintf(cptr, "}%d ", dwptr->finish);
908                         cptr = strchr(cptr, '\0');
909                 }
910                 dwptr++;
911         }
912
913         VARATT_SIZEP(out) = cptr - ((char *) out);
914
915         pfree(dw);
916         for (i = 0; i < rlen; i++)
917                 if (doc[i].needfree)
918                         pfree(doc[i].item);
919         pfree(doc);
920
921         PG_FREE_IF_COPY(txt, 0);
922         PG_FREE_IF_COPY(query, 1);
923         PG_RETURN_POINTER(out);
924 }