]> granicus.if.org Git - postgresql/blob - contrib/tsearch2/rank.c
Mop-up for nulls-in-arrays patch: fix some places that access array
[postgresql] / contrib / tsearch2 / rank.c
1 /*
2  * Relevation
3  * Teodor Sigaev <teodor@sigaev.ru>
4  */
5 #include "postgres.h"
6
7 #include <math.h>
8
9 #include "access/gist.h"
10 #include "access/itup.h"
11 #include "catalog/namespace.h"
12 #include "commands/trigger.h"
13 #include "executor/spi.h"
14 #include "fmgr.h"
15 #include "funcapi.h"
16 #include "nodes/pg_list.h"
17 #include "storage/bufpage.h"
18 #include "utils/array.h"
19 #include "utils/builtins.h"
20
21 #include "tsvector.h"
22 #include "query.h"
23 #include "common.h"
24
25 PG_FUNCTION_INFO_V1(rank);
26 Datum           rank(PG_FUNCTION_ARGS);
27
28 PG_FUNCTION_INFO_V1(rank_def);
29 Datum           rank_def(PG_FUNCTION_ARGS);
30
31 PG_FUNCTION_INFO_V1(rank_cd);
32 Datum           rank_cd(PG_FUNCTION_ARGS);
33
34 PG_FUNCTION_INFO_V1(rank_cd_def);
35 Datum           rank_cd_def(PG_FUNCTION_ARGS);
36
37 PG_FUNCTION_INFO_V1(get_covers);
38 Datum           get_covers(PG_FUNCTION_ARGS);
39
40 static float weights[] = {0.1, 0.2, 0.4, 1.0};
41
42 #define wpos(wep)       ( w[ WEP_GETWEIGHT(wep) ] )
43
44 #define DEF_NORM_METHOD 0
45
46 static float calc_rank_or(float *w, tsvector * t, QUERYTYPE * q);
47 static float calc_rank_and(float *w, tsvector * t, QUERYTYPE * q);
48
49 /*
50  * Returns a weight of a word collocation
51  */
52 static float4
53 word_distance(int4 w)
54 {
55         if (w > 100)
56                 return 1e-30;
57
58         return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
59 }
60
61 static int
62 cnt_length(tsvector * t)
63 {
64         WordEntry  *ptr = ARRPTR(t),
65                            *end = (WordEntry *) STRPTR(t);
66         int                     len = 0,
67                                 clen;
68
69         while (ptr < end)
70         {
71                 if ((clen = POSDATALEN(t, ptr)) == 0)
72                         len += 1;
73                 else
74                         len += clen;
75                 ptr++;
76         }
77
78         return len;
79 }
80
81 static int4
82 WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item)
83 {
84         if (ptr->len == item->length)
85                 return strncmp(
86                                            eval + ptr->pos,
87                                            qval + item->distance,
88                                            item->length);
89
90         return (ptr->len > item->length) ? 1 : -1;
91 }
92
93 static WordEntry *
94 find_wordentry(tsvector * t, QUERYTYPE * q, ITEM * item)
95 {
96         WordEntry  *StopLow = ARRPTR(t);
97         WordEntry  *StopHigh = (WordEntry *) STRPTR(t);
98         WordEntry  *StopMiddle;
99         int                     difference;
100
101         /* Loop invariant: StopLow <= item < StopHigh */
102
103         while (StopLow < StopHigh)
104         {
105                 StopMiddle = StopLow + (StopHigh - StopLow) / 2;
106                 difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
107                 if (difference == 0)
108                         return StopMiddle;
109                 else if (difference < 0)
110                         StopLow = StopMiddle + 1;
111                 else
112                         StopHigh = StopMiddle;
113         }
114
115         return NULL;
116 }
117
118
119 static char *SortAndUniqOperand = NULL;
120
121 static int
122 compareITEM(const void *a, const void *b)
123 {
124         if ((*(ITEM **) a)->length == (*(ITEM **) b)->length)
125                 return strncmp(SortAndUniqOperand + (*(ITEM **) a)->distance,
126                                            SortAndUniqOperand + (*(ITEM **) b)->distance,
127                                            (*(ITEM **) b)->length);
128
129         return ((*(ITEM **) a)->length > (*(ITEM **) b)->length) ? 1 : -1;
130 }
131
132 static ITEM **
133 SortAndUniqItems(char *operand, ITEM * item, int *size)
134 {
135         ITEM      **res,
136                           **ptr,
137                           **prevptr;
138
139         ptr = res = (ITEM **) palloc(sizeof(ITEM *) * *size);
140
141         while ((*size)--)
142         {
143                 if (item->type == VAL)
144                 {
145                         *ptr = item;
146                         ptr++;
147                 }
148                 item++;
149         }
150
151         *size = ptr - res;
152         if (*size < 2)
153                 return res;
154
155         SortAndUniqOperand = operand;
156         qsort(res, *size, sizeof(ITEM **), compareITEM);
157
158         ptr = res + 1;
159         prevptr = res;
160
161         while (ptr - res < *size)
162         {
163                 if (compareITEM((void *) ptr, (void *) prevptr) != 0)
164                 {
165                         prevptr++;
166                         *prevptr = *ptr;
167                 }
168                 ptr++;
169         }
170
171         *size = prevptr + 1 - res;
172         return res;
173 }
174
175 static WordEntryPos POSNULL[] = {
176         0,
177         0
178 };
179
180 static float
181 calc_rank_and(float *w, tsvector * t, QUERYTYPE * q)
182 {
183         uint16    **pos;
184         int                     i,
185                                 k,
186                                 l,
187                                 p;
188         WordEntry  *entry;
189         WordEntryPos *post,
190                            *ct;
191         int4            dimt,
192                                 lenct,
193                                 dist;
194         float           res = -1.0;
195         ITEM      **item;
196         int                     size = q->size;
197
198         item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
199         if (size < 2)
200         {
201                 pfree(item);
202                 return calc_rank_or(w, t, q);
203         }
204         pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
205         memset(pos, 0, sizeof(uint16 *) * q->size);
206         *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
207         WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1);
208
209         for (i = 0; i < size; i++)
210         {
211                 entry = find_wordentry(t, q, item[i]);
212                 if (!entry)
213                         continue;
214
215                 if (entry->haspos)
216                         pos[i] = (uint16 *) _POSDATAPTR(t, entry);
217                 else
218                         pos[i] = (uint16 *) POSNULL;
219
220
221                 dimt = *(uint16 *) (pos[i]);
222                 post = (WordEntryPos *) (pos[i] + 1);
223                 for (k = 0; k < i; k++)
224                 {
225                         if (!pos[k])
226                                 continue;
227                         lenct = *(uint16 *) (pos[k]);
228                         ct = (WordEntryPos *) (pos[k] + 1);
229                         for (l = 0; l < dimt; l++)
230                         {
231                                 for (p = 0; p < lenct; p++)
232                                 {
233                                         dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
234                                         if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL)))
235                                         {
236                                                 float           curw;
237
238                                                 if (!dist)
239                                                         dist = MAXENTRYPOS;
240                                                 curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
241                                                 res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
242                                         }
243                                 }
244                         }
245                 }
246         }
247         pfree(pos);
248         pfree(item);
249         return res;
250 }
251
252 static float
253 calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
254 {
255         WordEntry  *entry;
256         WordEntryPos *post;
257         int4            dimt,
258                                 j,
259                                 i;
260         float           res = 0.0;
261         ITEM      **item;
262         int                     size = q->size;
263
264         *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
265         item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
266
267         for (i = 0; i < size; i++)
268         {
269                 float resj,wjm;
270                 int4  jm;
271                 entry = find_wordentry(t, q, item[i]);
272                 if (!entry)
273                         continue;
274
275                 if (entry->haspos)
276                 {
277                         dimt = POSDATALEN(t, entry);
278                         post = POSDATAPTR(t, entry);
279                 }
280                 else
281                 {
282                         dimt = *(uint16 *) POSNULL;
283                         post = POSNULL + 1;
284                 }
285
286                 resj = 0.0;
287                 wjm = -1.0;
288                 jm = 0;
289                 for (j = 0; j < dimt; j++)
290                 {
291                         resj = resj + wpos(post[j])/((j+1)*(j+1));
292                         if ( wpos(post[j]) > wjm ) {
293                                 wjm = wpos(post[j]);
294                                 jm  = j;
295                         }
296                 }
297 /* 
298         limit (sum(i/i^2),i->inf) = pi^2/6
299         resj = sum(wi/i^2),i=1,noccurence,
300         wi - should be sorted desc, 
301         don't sort for now, just choose maximum weight. This should be corrected
302                 Oleg Bartunov
303 */
304                 res = res + ( wjm + resj - wjm/((jm+1)*(jm+1)))/1.64493406685; 
305         }
306         if ( size > 0 )
307                 res = res /size;
308         pfree(item);
309         return res;
310 }
311
312 static float
313 calc_rank(float *w, tsvector * t, QUERYTYPE * q, int4 method)
314 {
315         ITEM       *item = GETQUERY(q);
316         float           res = 0.0;
317         int                     len;
318
319         if (!t->size || !q->size)
320                 return 0.0;
321
322         res = (item->type != VAL && item->val == (int4) '&') ?
323                 calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
324
325         if (res < 0)
326                 res = 1e-20;
327
328         switch (method)
329         {
330                 case 0:
331                         break;
332                 case 1:
333                         res /= log((float) (cnt_length(t) + 1)) / log(2.0);
334                         break;
335                 case 2:
336                         len = cnt_length(t);
337                         if (len > 0)
338                                 res /= (float) len;
339                         break;
340                 default:
341                         /* internal error */
342                         elog(ERROR, "unrecognized normalization method: %d", method);
343         }
344
345         return res;
346 }
347
348 Datum
349 rank(PG_FUNCTION_ARGS)
350 {
351         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
352         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
353         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
354         int                     method = DEF_NORM_METHOD;
355         float           res = 0.0;
356         float           ws[lengthof(weights)];
357         float4     *arrdata;
358         int                     i;
359
360         if (ARR_NDIM(win) != 1)
361                 ereport(ERROR,
362                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
363                                  errmsg("array of weight must be one-dimensional")));
364
365         if (ARRNELEMS(win) < lengthof(weights))
366                 ereport(ERROR,
367                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
368                                  errmsg("array of weight is too short")));
369
370         if (ARR_HASNULL(win))
371                 ereport(ERROR,
372                                 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
373                                  errmsg("array of weight must not contain nulls")));
374
375         arrdata = (float4 *) ARR_DATA_PTR(win);
376         for (i = 0; i < lengthof(weights); i++)
377         {
378                 ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
379                 if (ws[i] > 1.0)
380                         ereport(ERROR,
381                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
382                                          errmsg("weight out of range")));
383         }
384
385         if (PG_NARGS() == 4)
386                 method = PG_GETARG_INT32(3);
387
388         res = calc_rank(ws, txt, query, method);
389
390         PG_FREE_IF_COPY(win, 0);
391         PG_FREE_IF_COPY(txt, 1);
392         PG_FREE_IF_COPY(query, 2);
393         PG_RETURN_FLOAT4(res);
394 }
395
396 Datum
397 rank_def(PG_FUNCTION_ARGS)
398 {
399         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
400         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
401         float           res = 0.0;
402         int                     method = DEF_NORM_METHOD;
403
404         if (PG_NARGS() == 3)
405                 method = PG_GETARG_INT32(2);
406
407         res = calc_rank(weights, txt, query, method);
408
409         PG_FREE_IF_COPY(txt, 0);
410         PG_FREE_IF_COPY(query, 1);
411         PG_RETURN_FLOAT4(res);
412 }
413
414
415 typedef struct
416 {
417         ITEM       **item;
418         int16           nitem;
419         bool            needfree;
420         int32           pos;
421 }       DocRepresentation;
422
423 static int
424 compareDocR(const void *a, const void *b)
425 {
426         if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos)
427                 return 0;
428         return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
429 }
430
431 static bool
432 checkcondition_ITEM(void *checkval, ITEM * val) {
433         return (bool)(val->istrue);
434 }
435
436 static void
437 reset_istrue_flag(QUERYTYPE *query) {
438         ITEM       *item = GETQUERY(query);
439         int i;
440
441         /* reset istrue flag */
442         for(i = 0; i < query->size; i++) { 
443                 if ( item->type == VAL ) 
444                         item->istrue = 0;
445                 item++;
446         }
447 }
448                 
449 static bool
450 Cover(DocRepresentation * doc, int len, QUERYTYPE * query, int *pos, int *p, int *q)
451 {
452         DocRepresentation *ptr;
453         int                     lastpos = *pos;
454         int i;
455         bool    found=false;
456
457         reset_istrue_flag(query);
458         
459         *p = 0x7fffffff;
460         *q = 0;
461         ptr = doc + *pos;
462
463         /* find upper bound of cover from current position, move up */
464         while (ptr - doc < len) {
465                 for(i=0;i<ptr->nitem;i++)
466                         ptr->item[i]->istrue = 1;
467                 if ( TS_execute(GETQUERY(query), NULL, false, checkcondition_ITEM) ) {
468                         if (ptr->pos > *q) {
469                                 *q = ptr->pos;
470                                 lastpos = ptr - doc;
471                                 found = true;
472                         } 
473                         break;
474                 }
475                 ptr++;
476         }
477
478         if (!found) 
479                 return false;
480
481         reset_istrue_flag(query);
482
483         ptr = doc + lastpos;
484
485         /* find lower bound of cover from founded upper bound, move down */
486         while (ptr >= doc ) {
487                 for(i=0;i<ptr->nitem;i++)
488                         ptr->item[i]->istrue = 1;
489                 if ( TS_execute(GETQUERY(query), NULL, true, checkcondition_ITEM) ) {
490                         if (ptr->pos < *p) 
491                                 *p = ptr->pos;
492                         break;
493                 }
494                 ptr--;
495         }
496
497         if ( *p <= *q ) {
498                 /* set position for next try to next lexeme after begining of founded cover */
499                 *pos= (ptr-doc) + 1;
500                 return true;
501         }
502
503         (*pos)++;
504         return Cover( doc, len, query, pos, p, q );
505 }
506
507 static DocRepresentation *
508 get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen)
509 {
510         ITEM       *item = GETQUERY(query);
511         WordEntry  *entry;
512         WordEntryPos *post;
513         int4            dimt,
514                                 j,
515                                 i;
516         int                     len = query->size * 4,
517                                 cur = 0;
518         DocRepresentation *doc;
519
520         *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
521         doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
522         SortAndUniqOperand = GETOPERAND(query);
523         reset_istrue_flag(query);
524
525         for (i = 0; i < query->size; i++)
526         {
527                 if (item[i].type != VAL || item[i].istrue)
528                         continue;
529
530                 entry = find_wordentry(txt, query, &(item[i]));
531                 if (!entry)
532                         continue;
533
534                 if (entry->haspos)
535                 {
536                         dimt = POSDATALEN(txt, entry);
537                         post = POSDATAPTR(txt, entry);
538                 }
539                 else
540                 {
541                         dimt = *(uint16 *) POSNULL;
542                         post = POSNULL + 1;
543                 }
544
545                 while (cur + dimt >= len)
546                 {
547                         len *= 2;
548                         doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
549                 }
550
551                 for (j = 0; j < dimt; j++)
552                 {
553                         if ( j == 0 ) {
554                                 ITEM *kptr, *iptr = item+i;
555                                 int k;
556  
557                                 doc[cur].needfree = false;
558                                 doc[cur].nitem = 0;
559                                 doc[cur].item = (ITEM**)palloc( sizeof(ITEM*) * query->size );
560
561                                 for(k=0; k < query->size; k++) {
562                                         kptr = item+k;
563                                         if ( k==i || ( item[k].type == VAL && compareITEM( &kptr, &iptr ) == 0 ) ) {
564                                                 doc[cur].item[ doc[cur].nitem ] = item+k;
565                                                 doc[cur].nitem++;
566                                                 kptr->istrue = 1;
567                                         }
568                                 } 
569                         } else {
570                                 doc[cur].needfree = false;
571                                 doc[cur].nitem = doc[cur-1].nitem;
572                                 doc[cur].item  = doc[cur-1].item;
573                         }
574                         doc[cur].pos = WEP_GETPOS(post[j]);
575                         cur++;
576                 }
577         }
578
579         *doclen = cur;
580
581         if (cur > 0)
582         {
583                 if (cur > 1)
584                         qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
585                 return doc;
586         }
587
588         pfree(doc);
589         return NULL;
590 }
591
592
593 Datum
594 rank_cd(PG_FUNCTION_ARGS)
595 {
596         int                     K = PG_GETARG_INT32(0);
597         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
598         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(2));
599         int                     method = DEF_NORM_METHOD;
600         DocRepresentation *doc;
601         float           res = 0.0;
602         int                     p = 0,
603                                 q = 0,
604                                 len,
605                                 cur,
606                                 i,
607                                 doclen=0;
608
609         doc = get_docrep(txt, query, &doclen);
610         if (!doc)
611         {
612                 PG_FREE_IF_COPY(txt, 1);
613                 PG_FREE_IF_COPY(query, 2);
614                 PG_RETURN_FLOAT4(0.0);
615         }
616
617         cur = 0;
618         if (K <= 0)
619                 K = 4;
620         while (Cover(doc, doclen, query, &cur, &p, &q))
621                 res += (q - p + 1 > K) ? ((float) K) / ((float) (q - p + 1)) : 1.0;
622
623         if (PG_NARGS() == 4)
624                 method = PG_GETARG_INT32(3);
625
626         switch (method)
627         {
628                 case 0:
629                         break;
630                 case 1:
631                         res /= log((float) (cnt_length(txt) + 1));
632                         break;
633                 case 2:
634                         len = cnt_length(txt);
635                         if (len > 0)
636                                 res /= (float) len;
637                         break;
638                 default:
639                         /* internal error */
640                         elog(ERROR, "unrecognized normalization method: %d", method);
641         }
642
643         for(i=0;i<doclen;i++)
644                 if ( doc[i].needfree )
645                         pfree( doc[i].item );
646         pfree(doc);
647         PG_FREE_IF_COPY(txt, 1);
648         PG_FREE_IF_COPY(query, 2);
649
650         PG_RETURN_FLOAT4(res);
651 }
652
653
654 Datum
655 rank_cd_def(PG_FUNCTION_ARGS)
656 {
657         PG_RETURN_DATUM(DirectFunctionCall4(
658                                                                                 rank_cd,
659                                                                                 Int32GetDatum(-1),
660                                                                                 PG_GETARG_DATUM(0),
661                                                                                 PG_GETARG_DATUM(1),
662           (PG_NARGS() == 3) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
663                                                                                 ));
664 }
665
666 /**************debug*************/
667
668 typedef struct
669 {
670         char       *w;
671         int2            len;
672         int2            pos;
673         int2            start;
674         int2            finish;
675 }       DocWord;
676
677 static int
678 compareDocWord(const void *a, const void *b)
679 {
680         if (((DocWord *) a)->pos == ((DocWord *) b)->pos)
681                 return 0;
682         return (((DocWord *) a)->pos > ((DocWord *) b)->pos) ? 1 : -1;
683 }
684
685
686 Datum
687 get_covers(PG_FUNCTION_ARGS)
688 {
689         tsvector   *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
690         QUERYTYPE  *query = (QUERYTYPE *) PG_DETOAST_DATUM_COPY(PG_GETARG_DATUM(1));
691         WordEntry  *pptr = ARRPTR(txt);
692         int                     i,
693                                 dlen = 0,
694                                 j,
695                                 cur = 0,
696                                 len = 0,
697                                 rlen;
698         DocWord    *dw,
699                            *dwptr;
700         text       *out;
701         char       *cptr;
702         DocRepresentation *doc;
703         int                     pos = 0,
704                                 p,
705                                 q,
706                                 olddwpos = 0;
707         int                     ncover = 1;
708
709         doc = get_docrep(txt, query, &rlen);
710
711         if (!doc)
712         {
713                 out = palloc(VARHDRSZ);
714                 VARATT_SIZEP(out) = VARHDRSZ;
715                 PG_FREE_IF_COPY(txt, 0);
716                 PG_FREE_IF_COPY(query, 1);
717                 PG_RETURN_POINTER(out);
718         }
719
720         for (i = 0; i < txt->size; i++)
721         {
722                 if (!pptr[i].haspos)
723                         ereport(ERROR,
724                                         (errcode(ERRCODE_SYNTAX_ERROR),
725                                          errmsg("no pos info")));
726                 dlen += POSDATALEN(txt, &(pptr[i]));
727         }
728
729         dwptr = dw = palloc(sizeof(DocWord) * dlen);
730         memset(dw, 0, sizeof(DocWord) * dlen);
731
732         for (i = 0; i < txt->size; i++)
733         {
734                 WordEntryPos *posdata = POSDATAPTR(txt, &(pptr[i]));
735
736                 for (j = 0; j < POSDATALEN(txt, &(pptr[i])); j++)
737                 {
738                         dw[cur].w = STRPTR(txt) + pptr[i].pos;
739                         dw[cur].len = pptr[i].len;
740                         dw[cur].pos = WEP_GETPOS(posdata[j]);
741                         cur++;
742                 }
743                 len += (pptr[i].len + 1) * (int) POSDATALEN(txt, &(pptr[i]));
744         }
745         qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
746
747         while (Cover(doc, rlen, query, &pos, &p, &q))
748         {
749                 dwptr = dw + olddwpos;
750                 while (dwptr->pos < p && dwptr - dw < dlen)
751                         dwptr++;
752                 olddwpos = dwptr - dw;
753                 dwptr->start = ncover;
754                 while (dwptr->pos < q + 1 && dwptr - dw < dlen)
755                         dwptr++;
756                 (dwptr - 1)->finish = ncover;
757                 len += 4 /* {}+two spaces */ + 2 * 16 /* numbers */ ;
758                 ncover++;
759         }
760
761         out = palloc(VARHDRSZ + len);
762         cptr = ((char *) out) + VARHDRSZ;
763         dwptr = dw;
764
765         while (dwptr - dw < dlen)
766         {
767                 if (dwptr->start)
768                 {
769                         sprintf(cptr, "{%d ", dwptr->start);
770                         cptr = strchr(cptr, '\0');
771                 }
772                 memcpy(cptr, dwptr->w, dwptr->len);
773                 cptr += dwptr->len;
774                 *cptr = ' ';
775                 cptr++;
776                 if (dwptr->finish)
777                 {
778                         sprintf(cptr, "}%d ", dwptr->finish);
779                         cptr = strchr(cptr, '\0');
780                 }
781                 dwptr++;
782         }
783
784         VARATT_SIZEP(out) = cptr - ((char *) out);
785
786         pfree(dw);
787         for(i=0;i<rlen;i++)
788                 if ( doc[i].needfree )
789                         pfree( doc[i].item );
790         pfree(doc);
791
792         PG_FREE_IF_COPY(txt, 0);
793         PG_FREE_IF_COPY(query, 1);
794         PG_RETURN_POINTER(out);
795 }