]> granicus.if.org Git - postgresql/blob - contrib/tsearch2/tsvector.c
cc2e96c22572bb7d00136f9eb48a15e8fa680af1
[postgresql] / contrib / tsearch2 / tsvector.c
1 /*
2  * In/Out definitions for tsvector type
3  * Internal structure:
4  * string of values, array of position lexem in string and it's length
5  * Teodor Sigaev <teodor@sigaev.ru>
6  */
7 #include "postgres.h"
8
9 #include "access/gist.h"
10 #include "access/itup.h"
11 #include "utils/builtins.h"
12 #include "storage/bufpage.h"
13 #include "executor/spi.h"
14 #include "commands/trigger.h"
15 #include "nodes/pg_list.h"
16 #include "catalog/namespace.h"
17
18 #include "utils/pg_locale.h"
19
20 #include <ctype.h>                              /* tolower */
21 #include "tsvector.h"
22 #include "query.h"
23 #include "ts_cfg.h"
24 #include "common.h"
25
26 PG_FUNCTION_INFO_V1(tsvector_in);
27 Datum           tsvector_in(PG_FUNCTION_ARGS);
28
29 PG_FUNCTION_INFO_V1(tsvector_out);
30 Datum           tsvector_out(PG_FUNCTION_ARGS);
31
32 PG_FUNCTION_INFO_V1(to_tsvector);
33 Datum           to_tsvector(PG_FUNCTION_ARGS);
34
35 PG_FUNCTION_INFO_V1(to_tsvector_current);
36 Datum           to_tsvector_current(PG_FUNCTION_ARGS);
37
38 PG_FUNCTION_INFO_V1(to_tsvector_name);
39 Datum           to_tsvector_name(PG_FUNCTION_ARGS);
40
41 PG_FUNCTION_INFO_V1(tsearch2);
42 Datum           tsearch2(PG_FUNCTION_ARGS);
43
44 PG_FUNCTION_INFO_V1(tsvector_length);
45 Datum           tsvector_length(PG_FUNCTION_ARGS);
46
47 /*
48  * in/out text index type
49  */
50 static int
51 comparePos(const void *a, const void *b)
52 {
53         if (((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos)
54                 return 1;
55         return (((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos) ? 1 : -1;
56 }
57
58 static int
59 uniquePos(WordEntryPos * a, int4 l)
60 {
61         WordEntryPos *ptr,
62                            *res;
63
64         res = a;
65         if (l == 1)
66                 return l;
67
68         qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
69
70         ptr = a + 1;
71         while (ptr - a < l)
72         {
73                 if (ptr->pos != res->pos)
74                 {
75                         res++;
76                         res->pos = ptr->pos;
77                         res->weight = ptr->weight;
78                         if (res - a >= MAXNUMPOS - 1 || res->pos == MAXENTRYPOS - 1)
79                                 break;
80                 }
81                 else if (ptr->weight > res->weight)
82                         res->weight = ptr->weight;
83                 ptr++;
84         }
85         return res + 1 - a;
86 }
87
88 static char *BufferStr;
89 static int
90 compareentry(const void *a, const void *b)
91 {
92         if (((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
93         {
94                 return strncmp(
95                                            &BufferStr[((WordEntryIN *) a)->entry.pos],
96                                            &BufferStr[((WordEntryIN *) b)->entry.pos],
97                                            ((WordEntryIN *) a)->entry.len);
98         }
99         return (((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len) ? 1 : -1;
100 }
101
102 static int
103 uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
104 {
105         WordEntryIN *ptr,
106                            *res;
107
108         res = a;
109         if (l == 1)
110         {
111                 if (a->entry.haspos)
112                 {
113                         *(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos));
114                         *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos);
115                 }
116                 return l;
117         }
118
119         ptr = a + 1;
120         BufferStr = buf;
121         qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
122
123         while (ptr - a < l)
124         {
125                 if (!(ptr->entry.len == res->entry.len &&
126                           strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
127                 {
128                         if (res->entry.haspos)
129                         {
130                                 *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
131                                 *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
132                         }
133                         *outbuflen += SHORTALIGN(res->entry.len);
134                         res++;
135                         memcpy(res, ptr, sizeof(WordEntryIN));
136                 }
137                 else if (ptr->entry.haspos)
138                 {
139                         if (res->entry.haspos)
140                         {
141                                 int4            len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos);
142
143                                 res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos));
144                                 memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]),
145                                            &(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos));
146                                 *(uint16 *) (res->pos) += *(uint16 *) (ptr->pos);
147                                 pfree(ptr->pos);
148                         }
149                         else
150                         {
151                                 res->entry.haspos = 1;
152                                 res->pos = ptr->pos;
153                         }
154                 }
155                 ptr++;
156         }
157         if (res->entry.haspos)
158         {
159                 *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
160                 *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
161         }
162         *outbuflen += SHORTALIGN(res->entry.len);
163
164         return res + 1 - a;
165 }
166
167 #define WAITWORD        1
168 #define WAITENDWORD 2
169 #define WAITNEXTCHAR    3
170 #define WAITENDCMPLX    4
171 #define WAITPOSINFO 5
172 #define INPOSINFO       6
173 #define WAITPOSDELIM    7
174
175 #define RESIZEPRSBUF \
176 do { \
177         if ( state->curpos - state->word + 1 >= state->len ) \
178         { \
179                 int4 clen = state->curpos - state->word; \
180                 state->len *= 2; \
181                 state->word = (char*)repalloc( (void*)state->word, state->len ); \
182                 state->curpos = state->word + clen; \
183         } \
184 } while (0)
185
186 int4
187 gettoken_tsvector(TI_IN_STATE * state)
188 {
189         int4            oldstate = 0;
190
191         state->curpos = state->word;
192         state->state = WAITWORD;
193         state->alen = 0;
194
195         while (1)
196         {
197                 if (state->state == WAITWORD)
198                 {
199                         if (*(state->prsbuf) == '\0')
200                                 return 0;
201                         else if (*(state->prsbuf) == '\'')
202                                 state->state = WAITENDCMPLX;
203                         else if (*(state->prsbuf) == '\\')
204                         {
205                                 state->state = WAITNEXTCHAR;
206                                 oldstate = WAITENDWORD;
207                         }
208                         else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
209                                 ereport(ERROR,
210                                                 (errcode(ERRCODE_SYNTAX_ERROR),
211                                                  errmsg("syntax error")));
212                         else if (*(state->prsbuf) != ' ')
213                         {
214                                 *(state->curpos) = *(state->prsbuf);
215                                 state->curpos++;
216                                 state->state = WAITENDWORD;
217                         }
218                 }
219                 else if (state->state == WAITNEXTCHAR)
220                 {
221                         if (*(state->prsbuf) == '\0')
222                                 ereport(ERROR,
223                                                 (errcode(ERRCODE_SYNTAX_ERROR),
224                                                  errmsg("there is no escaped character")));
225                         else
226                         {
227                                 RESIZEPRSBUF;
228                                 *(state->curpos) = *(state->prsbuf);
229                                 state->curpos++;
230                                 state->state = oldstate;
231                         }
232                 }
233                 else if (state->state == WAITENDWORD)
234                 {
235                         if (*(state->prsbuf) == '\\')
236                         {
237                                 state->state = WAITNEXTCHAR;
238                                 oldstate = WAITENDWORD;
239                         }
240                         else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
241                                          (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
242                         {
243                                 RESIZEPRSBUF;
244                                 if (state->curpos == state->word)
245                                         ereport(ERROR,
246                                                         (errcode(ERRCODE_SYNTAX_ERROR),
247                                                          errmsg("syntax error")));
248                                 *(state->curpos) = '\0';
249                                 return 1;
250                         }
251                         else if (*(state->prsbuf) == ':')
252                         {
253                                 if (state->curpos == state->word)
254                                         ereport(ERROR,
255                                                         (errcode(ERRCODE_SYNTAX_ERROR),
256                                                          errmsg("syntax error")));
257                                 *(state->curpos) = '\0';
258                                 if (state->oprisdelim)
259                                         return 1;
260                                 else
261                                         state->state = INPOSINFO;
262                         }
263                         else
264                         {
265                                 RESIZEPRSBUF;
266                                 *(state->curpos) = *(state->prsbuf);
267                                 state->curpos++;
268                         }
269                 }
270                 else if (state->state == WAITENDCMPLX)
271                 {
272                         if (*(state->prsbuf) == '\'')
273                         {
274                                 RESIZEPRSBUF;
275                                 *(state->curpos) = '\0';
276                                 if (state->curpos == state->word)
277                                         ereport(ERROR,
278                                                         (errcode(ERRCODE_SYNTAX_ERROR),
279                                                          errmsg("syntax error")));
280                                 if (state->oprisdelim)
281                                 {
282                                         state->prsbuf++;
283                                         return 1;
284                                 }
285                                 else
286                                         state->state = WAITPOSINFO;
287                         }
288                         else if (*(state->prsbuf) == '\\')
289                         {
290                                 state->state = WAITNEXTCHAR;
291                                 oldstate = WAITENDCMPLX;
292                         }
293                         else if (*(state->prsbuf) == '\0')
294                                 ereport(ERROR,
295                                                 (errcode(ERRCODE_SYNTAX_ERROR),
296                                                  errmsg("syntax error")));
297                         else
298                         {
299                                 RESIZEPRSBUF;
300                                 *(state->curpos) = *(state->prsbuf);
301                                 state->curpos++;
302                         }
303                 }
304                 else if (state->state == WAITPOSINFO)
305                 {
306                         if (*(state->prsbuf) == ':')
307                                 state->state = INPOSINFO;
308                         else
309                                 return 1;
310                 }
311                 else if (state->state == INPOSINFO)
312                 {
313                         if (isdigit(*(state->prsbuf)))
314                         {
315                                 if (state->alen == 0)
316                                 {
317                                         state->alen = 4;
318                                         state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen);
319                                         *(uint16 *) (state->pos) = 0;
320                                 }
321                                 else if (*(uint16 *) (state->pos) + 1 >= state->alen)
322                                 {
323                                         state->alen *= 2;
324                                         state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen);
325                                 }
326                                 (*(uint16 *) (state->pos))++;
327                                 state->pos[*(uint16 *) (state->pos)].pos = LIMITPOS(atoi(state->prsbuf));
328                                 if (state->pos[*(uint16 *) (state->pos)].pos == 0)
329                                         ereport(ERROR,
330                                                         (errcode(ERRCODE_SYNTAX_ERROR),
331                                                          errmsg("wrong position info")));
332                                 state->pos[*(uint16 *) (state->pos)].weight = 0;
333                                 state->state = WAITPOSDELIM;
334                         }
335                         else
336                                 ereport(ERROR,
337                                                 (errcode(ERRCODE_SYNTAX_ERROR),
338                                                  errmsg("syntax error")));
339                 }
340                 else if (state->state == WAITPOSDELIM)
341                 {
342                         if (*(state->prsbuf) == ',')
343                                 state->state = INPOSINFO;
344                         else if (tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf) == '*')
345                         {
346                                 if (state->pos[*(uint16 *) (state->pos)].weight)
347                                         ereport(ERROR,
348                                                         (errcode(ERRCODE_SYNTAX_ERROR),
349                                                          errmsg("syntax error")));
350                                 state->pos[*(uint16 *) (state->pos)].weight = 3;
351                         }
352                         else if (tolower(*(state->prsbuf)) == 'b')
353                         {
354                                 if (state->pos[*(uint16 *) (state->pos)].weight)
355                                         ereport(ERROR,
356                                                         (errcode(ERRCODE_SYNTAX_ERROR),
357                                                          errmsg("syntax error")));
358                                 state->pos[*(uint16 *) (state->pos)].weight = 2;
359                         }
360                         else if (tolower(*(state->prsbuf)) == 'c')
361                         {
362                                 if (state->pos[*(uint16 *) (state->pos)].weight)
363                                         ereport(ERROR,
364                                                         (errcode(ERRCODE_SYNTAX_ERROR),
365                                                          errmsg("syntax error")));
366                                 state->pos[*(uint16 *) (state->pos)].weight = 1;
367                         }
368                         else if (tolower(*(state->prsbuf)) == 'd')
369                         {
370                                 if (state->pos[*(uint16 *) (state->pos)].weight)
371                                         ereport(ERROR,
372                                                         (errcode(ERRCODE_SYNTAX_ERROR),
373                                                          errmsg("syntax error")));
374                                 state->pos[*(uint16 *) (state->pos)].weight = 0;
375                         }
376                         else if (isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0')
377                                 return 1;
378                         else if (!isdigit(*(state->prsbuf)))
379                                 ereport(ERROR,
380                                                 (errcode(ERRCODE_SYNTAX_ERROR),
381                                                  errmsg("syntax error")));
382                 }
383                 else
384                         /* internal error */
385                         elog(ERROR, "internal error");
386                 state->prsbuf++;
387         }
388
389         return 0;
390 }
391
392 Datum
393 tsvector_in(PG_FUNCTION_ARGS)
394 {
395         char       *buf = PG_GETARG_CSTRING(0);
396         TI_IN_STATE state;
397         WordEntryIN *arr;
398         WordEntry  *inarr;
399         int4            len = 0,
400                                 totallen = 64;
401         tsvector   *in;
402         char       *tmpbuf,
403                            *cur;
404         int4            i,
405                                 buflen = 256;
406
407         state.prsbuf = buf;
408         state.len = 32;
409         state.word = (char *) palloc(state.len);
410         state.oprisdelim = false;
411
412         arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
413         cur = tmpbuf = (char *) palloc(buflen);
414         while (gettoken_tsvector(&state))
415         {
416                 if (len >= totallen)
417                 {
418                         totallen *= 2;
419                         arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
420                 }
421                 while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
422                 {
423                         int4            dist = cur - tmpbuf;
424
425                         buflen *= 2;
426                         tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
427                         cur = tmpbuf + dist;
428                 }
429                 if (state.curpos - state.word >= MAXSTRLEN)
430                         ereport(ERROR,
431                                         (errcode(ERRCODE_SYNTAX_ERROR),
432                                          errmsg("word is too long")));
433                 arr[len].entry.len = state.curpos - state.word;
434                 if (cur - tmpbuf > MAXSTRPOS)
435                         ereport(ERROR,
436                                         (errcode(ERRCODE_SYNTAX_ERROR),
437                                          errmsg("too long value")));
438                 arr[len].entry.pos = cur - tmpbuf;
439                 memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
440                 cur += arr[len].entry.len;
441                 if (state.alen)
442                 {
443                         arr[len].entry.haspos = 1;
444                         arr[len].pos = state.pos;
445                 }
446                 else
447                         arr[len].entry.haspos = 0;
448                 len++;
449         }
450         pfree(state.word);
451
452         if (len > 0)
453                 len = uniqueentry(arr, len, tmpbuf, &buflen);
454         else
455                 buflen=0;
456         totallen = CALCDATASIZE(len, buflen);
457         in = (tsvector *) palloc(totallen);
458         memset(in, 0, totallen);
459         in->len = totallen;
460         in->size = len;
461         cur = STRPTR(in);
462         inarr = ARRPTR(in);
463         for (i = 0; i < len; i++)
464         {
465                 memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
466                 arr[i].entry.pos = cur - STRPTR(in);
467                 cur += SHORTALIGN(arr[i].entry.len);
468                 if (arr[i].entry.haspos)
469                 {
470                         memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos));
471                         cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos);
472                         pfree(arr[i].pos);
473                 }
474                 memcpy(&(inarr[i]), &(arr[i].entry), sizeof(WordEntry));
475         }
476         pfree(tmpbuf);
477         pfree(arr);
478         PG_RETURN_POINTER(in);
479 }
480
481 Datum
482 tsvector_length(PG_FUNCTION_ARGS)
483 {
484         tsvector   *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
485         int4            ret = in->size;
486
487         PG_FREE_IF_COPY(in, 0);
488         PG_RETURN_INT32(ret);
489 }
490
491 Datum
492 tsvector_out(PG_FUNCTION_ARGS)
493 {
494         tsvector   *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
495         char       *outbuf;
496         int4            i,
497                                 j,
498                                 lenbuf = 0,
499                                 pp;
500         WordEntry  *ptr = ARRPTR(out);
501         char       *curin,
502                            *curout;
503
504         lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
505         for (i = 0; i < out->size; i++)
506         {
507                 lenbuf += ptr[i].len * 2 /* for escape */ ;
508                 if (ptr[i].haspos)
509                         lenbuf += 7 * POSDATALEN(out, &(ptr[i]));
510         }
511
512         curout = outbuf = (char *) palloc(lenbuf);
513         for (i = 0; i < out->size; i++)
514         {
515                 curin = STRPTR(out) + ptr->pos;
516                 if (i != 0)
517                         *curout++ = ' ';
518                 *curout++ = '\'';
519                 j = ptr->len;
520                 while (j--)
521                 {
522                         if (*curin == '\'')
523                         {
524                                 int4            pos = curout - outbuf;
525
526                                 outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
527                                 curout = outbuf + pos;
528                                 *curout++ = '\\';
529                         }
530                         *curout++ = *curin++;
531                 }
532                 *curout++ = '\'';
533                 if ((pp = POSDATALEN(out, ptr)) != 0)
534                 {
535                         WordEntryPos *wptr;
536
537                         *curout++ = ':';
538                         wptr = POSDATAPTR(out, ptr);
539                         while (pp)
540                         {
541                                 sprintf(curout, "%d", wptr->pos);
542                                 curout = strchr(curout, '\0');
543                                 switch (wptr->weight)
544                                 {
545                                         case 3:
546                                                 *curout++ = 'A';
547                                                 break;
548                                         case 2:
549                                                 *curout++ = 'B';
550                                                 break;
551                                         case 1:
552                                                 *curout++ = 'C';
553                                                 break;
554                                         case 0:
555                                         default:
556                                                 break;
557                                 }
558                                 if (pp > 1)
559                                         *curout++ = ',';
560                                 pp--;
561                                 wptr++;
562                         }
563                 }
564                 ptr++;
565         }
566         *curout = '\0';
567         outbuf[lenbuf - 1] = '\0';
568         PG_FREE_IF_COPY(out, 0);
569         PG_RETURN_POINTER(outbuf);
570 }
571
572 static int
573 compareWORD(const void *a, const void *b)
574 {
575         if (((WORD *) a)->len == ((WORD *) b)->len)
576         {
577                 int                     res = strncmp(
578                                                                   ((WORD *) a)->word,
579                                                                   ((WORD *) b)->word,
580                                                                   ((WORD *) b)->len);
581
582                 if (res == 0)
583                         return (((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos) ? 1 : -1;
584                 return res;
585         }
586         return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
587 }
588
589 static int
590 uniqueWORD(WORD * a, int4 l)
591 {
592         WORD       *ptr,
593                            *res;
594         int                     tmppos;
595
596         if (l == 1)
597         {
598                 tmppos = LIMITPOS(a->pos.pos);
599                 a->alen = 2;
600                 a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
601                 a->pos.apos[0] = 1;
602                 a->pos.apos[1] = tmppos;
603                 return l;
604         }
605
606         res = a;
607         ptr = a + 1;
608
609         qsort((void *) a, l, sizeof(WORD), compareWORD);
610         tmppos = LIMITPOS(a->pos.pos);
611         a->alen = 2;
612         a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
613         a->pos.apos[0] = 1;
614         a->pos.apos[1] = tmppos;
615
616         while (ptr - a < l)
617         {
618                 if (!(ptr->len == res->len &&
619                           strncmp(ptr->word, res->word, res->len) == 0))
620                 {
621                         res++;
622                         res->len = ptr->len;
623                         res->word = ptr->word;
624                         tmppos = LIMITPOS(ptr->pos.pos);
625                         res->alen = 2;
626                         res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen);
627                         res->pos.apos[0] = 1;
628                         res->pos.apos[1] = tmppos;
629                 }
630                 else
631                 {
632                         pfree(ptr->word);
633                         if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1)
634                         {
635                                 if (res->pos.apos[0] + 1 >= res->alen)
636                                 {
637                                         res->alen *= 2;
638                                         res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen);
639                                 }
640                                 if ( res->pos.apos[0]==0 || res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos) ) { 
641                                         res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos);
642                                         res->pos.apos[0]++;
643                                 }
644                         }
645                 }
646                 ptr++;
647         }
648
649         return res + 1 - a;
650 }
651
652 /*
653  * make value of tsvector
654  */
655 static tsvector *
656 makevalue(PRSTEXT * prs)
657 {
658         int4            i,
659                                 j,
660                                 lenstr = 0,
661                                 totallen;
662         tsvector   *in;
663         WordEntry  *ptr;
664         char       *str,
665                            *cur;
666
667         prs->curwords = uniqueWORD(prs->words, prs->curwords);
668         for (i = 0; i < prs->curwords; i++)
669         {
670                 lenstr += SHORTALIGN(prs->words[i].len);
671
672                 if (prs->words[i].alen)
673                         lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
674         }
675
676         totallen = CALCDATASIZE(prs->curwords, lenstr);
677         in = (tsvector *) palloc(totallen);
678         memset(in, 0, totallen);
679         in->len = totallen;
680         in->size = prs->curwords;
681
682         ptr = ARRPTR(in);
683         cur = str = STRPTR(in);
684         for (i = 0; i < prs->curwords; i++)
685         {
686                 ptr->len = prs->words[i].len;
687                 if (cur - str > MAXSTRPOS)
688                         ereport(ERROR,
689                                         (errcode(ERRCODE_SYNTAX_ERROR),
690                                          errmsg("value is too big")));
691                 ptr->pos = cur - str;
692                 memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
693                 pfree(prs->words[i].word);
694                 cur += SHORTALIGN(prs->words[i].len);
695                 if (prs->words[i].alen)
696                 {
697                         WordEntryPos *wptr;
698
699                         ptr->haspos = 1;
700                         *(uint16 *) cur = prs->words[i].pos.apos[0];
701                         wptr = POSDATAPTR(in, ptr);
702                         for (j = 0; j < *(uint16 *) cur; j++)
703                         {
704                                 wptr[j].weight = 0;
705                                 wptr[j].pos = prs->words[i].pos.apos[j + 1];
706                         }
707                         cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
708                         pfree(prs->words[i].pos.apos);
709                 }
710                 else
711                         ptr->haspos = 0;
712                 ptr++;
713         }
714         pfree(prs->words);
715         return in;
716 }
717
718
719 Datum
720 to_tsvector(PG_FUNCTION_ARGS)
721 {
722         text       *in = PG_GETARG_TEXT_P(1);
723         PRSTEXT         prs;
724         tsvector   *out = NULL;
725         TSCfgInfo  *cfg = findcfg(PG_GETARG_INT32(0));
726
727         prs.lenwords = 32;
728         prs.curwords = 0;
729         prs.pos = 0;
730         prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
731
732         parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
733         PG_FREE_IF_COPY(in, 1);
734
735         if (prs.curwords)
736                 out = makevalue(&prs);
737         else
738         {
739                 pfree(prs.words);
740                 out = palloc(CALCDATASIZE(0, 0));
741                 out->len = CALCDATASIZE(0, 0);
742                 out->size = 0;
743         }
744         PG_RETURN_POINTER(out);
745 }
746
747 Datum
748 to_tsvector_name(PG_FUNCTION_ARGS)
749 {
750         text       *cfg = PG_GETARG_TEXT_P(0);
751         Datum           res = DirectFunctionCall3(
752                                                                                   to_tsvector,
753                                                                                   Int32GetDatum(name2id_cfg(cfg)),
754                                                                                   PG_GETARG_DATUM(1),
755                                                                                   (Datum) 0
756         );
757
758         PG_FREE_IF_COPY(cfg, 0);
759         PG_RETURN_DATUM(res);
760 }
761
762 Datum
763 to_tsvector_current(PG_FUNCTION_ARGS)
764 {
765         Datum           res = DirectFunctionCall3(
766                                                                                   to_tsvector,
767                                                                                   Int32GetDatum(get_currcfg()),
768                                                                                   PG_GETARG_DATUM(0),
769                                                                                   (Datum) 0
770         );
771
772         PG_RETURN_DATUM(res);
773 }
774
775 static Oid
776 findFunc(char *fname)
777 {
778         FuncCandidateList clist,
779                                 ptr;
780         Oid                     funcid = InvalidOid;
781         List       *names = makeList1(makeString(fname));
782
783         ptr = clist = FuncnameGetCandidates(names, 1);
784         freeList(names);
785
786         if (!ptr)
787                 return funcid;
788
789         while (ptr)
790         {
791                 if (ptr->args[0] == TEXTOID && funcid == InvalidOid)
792                         funcid = ptr->oid;
793                 clist = ptr->next;
794                 pfree(ptr);
795                 ptr = clist;
796         }
797
798         return funcid;
799 }
800
801 /*
802  * Trigger
803  */
804 Datum
805 tsearch2(PG_FUNCTION_ARGS)
806 {
807         TriggerData *trigdata;
808         Trigger    *trigger;
809         Relation        rel;
810         HeapTuple       rettuple = NULL;
811         TSCfgInfo  *cfg = findcfg(get_currcfg());
812         int                     numidxattr,
813                                 i;
814         PRSTEXT         prs;
815         Datum           datum = (Datum) 0;
816         Oid                     funcoid = InvalidOid;
817
818         if (!CALLED_AS_TRIGGER(fcinfo))
819                 /* internal error */
820                 elog(ERROR, "TSearch: Not fired by trigger manager");
821
822         trigdata = (TriggerData *) fcinfo->context;
823         if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
824                 /* internal error */
825                 elog(ERROR, "TSearch: Can't process STATEMENT events");
826         if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
827                 /* internal error */
828                 elog(ERROR, "TSearch: Must be fired BEFORE event");
829
830         if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
831                 rettuple = trigdata->tg_trigtuple;
832         else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
833                 rettuple = trigdata->tg_newtuple;
834         else
835                 /* internal error */
836                 elog(ERROR, "TSearch: Unknown event");
837
838         trigger = trigdata->tg_trigger;
839         rel = trigdata->tg_relation;
840
841         if (trigger->tgnargs < 2)
842                 /* internal error */
843                 elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
844
845         numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
846         if (numidxattr == SPI_ERROR_NOATTRIBUTE)
847                 ereport(ERROR,
848                                 (errcode(ERRCODE_UNDEFINED_COLUMN),
849                                  errmsg("tsvector column \"%s\" does not exist",
850                                                 trigger->tgargs[0])));
851
852         prs.lenwords = 32;
853         prs.curwords = 0;
854         prs.pos = 0;
855         prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
856
857         /* find all words in indexable column */
858         for (i = 1; i < trigger->tgnargs; i++)
859         {
860                 int                     numattr;
861                 Oid                     oidtype;
862                 Datum           txt_toasted;
863                 bool            isnull;
864                 text       *txt;
865
866                 numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
867                 if (numattr == SPI_ERROR_NOATTRIBUTE)
868                 {
869                         funcoid = findFunc(trigger->tgargs[i]);
870                         if (funcoid == InvalidOid)
871                                 ereport(ERROR,
872                                                 (errcode(ERRCODE_UNDEFINED_COLUMN),
873                                                  errmsg("could not find function or field \"%s\"",
874                                                                 trigger->tgargs[i])));
875
876                         continue;
877                 }
878                 oidtype = SPI_gettypeid(rel->rd_att, numattr);
879                 /* We assume char() and varchar() are binary-equivalent to text */
880                 if (!(oidtype == TEXTOID ||
881                           oidtype == VARCHAROID ||
882                           oidtype == BPCHAROID))
883                 {
884                         elog(WARNING, "TSearch: '%s' is not of character type",
885                                  trigger->tgargs[i]);
886                         continue;
887                 }
888                 txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
889                 if (isnull)
890                         continue;
891
892                 if (funcoid != InvalidOid)
893                 {
894                         text       *txttmp = (text *) DatumGetPointer(OidFunctionCall1(
895                                                                                                                                  funcoid,
896                                                                                          PointerGetDatum(txt_toasted)
897                                                                                                                                           ));
898
899                         txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
900                         if (txt == txttmp)
901                                 txt_toasted = PointerGetDatum(txt);
902                 }
903                 else
904                         txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
905
906                 parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
907                 if (txt != (text *) DatumGetPointer(txt_toasted))
908                         pfree(txt);
909         }
910
911         /* make tsvector value */
912         if (prs.curwords)
913         {
914                 datum = PointerGetDatum(makevalue(&prs));
915                 rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
916                                                                    &datum, NULL);
917                 pfree(DatumGetPointer(datum));
918         }
919         else
920         {
921                 tsvector   *out = palloc(CALCDATASIZE(0, 0));
922
923                 out->len = CALCDATASIZE(0, 0);
924                 out->size = 0;
925                 datum = PointerGetDatum(out);
926                 pfree(prs.words);
927                 rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
928                                                                    &datum, NULL);
929         }
930
931         if (rettuple == NULL)
932                 /* internal error */
933                 elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
934
935         return PointerGetDatum(rettuple);
936 }
937
938 static int
939 silly_cmp_tsvector(const tsvector *a, const tsvector *b) {
940         if ( a->len < b->len )  
941                 return -1;
942         else if ( a->len > b->len )
943                 return 1;
944         else if ( a->size < b->size ) 
945                 return -1;
946         else if ( a->size > b->size )
947                 return 1;
948         else {
949                 unsigned char *aptr=(unsigned char *)(a->data) + DATAHDRSIZE;
950                 unsigned char *bptr=(unsigned char *)(b->data) + DATAHDRSIZE;
951                 
952                 while( aptr - ( (unsigned char *)(a->data) ) < a->len ) {
953                         if ( *aptr != *bptr )
954                                 return ( *aptr < *bptr ) ? -1 : 1;
955                         aptr++; bptr++;
956                 } 
957         }
958         return 0;       
959 }
960
961 PG_FUNCTION_INFO_V1(tsvector_cmp);
962 PG_FUNCTION_INFO_V1(tsvector_lt);
963 PG_FUNCTION_INFO_V1(tsvector_le);
964 PG_FUNCTION_INFO_V1(tsvector_eq);
965 PG_FUNCTION_INFO_V1(tsvector_ne);
966 PG_FUNCTION_INFO_V1(tsvector_ge);
967 PG_FUNCTION_INFO_V1(tsvector_gt);
968 Datum           tsvector_cmp(PG_FUNCTION_ARGS);
969 Datum           tsvector_lt(PG_FUNCTION_ARGS);
970 Datum           tsvector_le(PG_FUNCTION_ARGS);
971 Datum           tsvector_eq(PG_FUNCTION_ARGS);
972 Datum           tsvector_ne(PG_FUNCTION_ARGS);
973 Datum           tsvector_ge(PG_FUNCTION_ARGS);
974 Datum           tsvector_gt(PG_FUNCTION_ARGS);
975
976 #define RUNCMP                                                                          \
977 tsvector *a        = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));\
978 tsvector *b        = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));\
979 int res = silly_cmp_tsvector(a,b);                                                      \
980 PG_FREE_IF_COPY(a,0);                                                                   \
981 PG_FREE_IF_COPY(b,1);                                                                   \
982
983 Datum
984 tsvector_cmp(PG_FUNCTION_ARGS)   {
985         RUNCMP
986         PG_RETURN_INT32(res);
987 }
988
989 Datum
990 tsvector_lt(PG_FUNCTION_ARGS) {
991         RUNCMP
992         PG_RETURN_BOOL((res < 0) ? true : false);
993 }
994
995 Datum
996 tsvector_le(PG_FUNCTION_ARGS) {
997         RUNCMP
998         PG_RETURN_BOOL((res <= 0) ? true : false);
999 }
1000
1001 Datum
1002 tsvector_eq(PG_FUNCTION_ARGS) {
1003         RUNCMP
1004         PG_RETURN_BOOL((res == 0) ? true : false);
1005 }
1006
1007 Datum
1008 tsvector_ge(PG_FUNCTION_ARGS) {
1009         RUNCMP
1010         PG_RETURN_BOOL((res >= 0) ? true : false);
1011 }
1012  
1013 Datum
1014 tsvector_gt(PG_FUNCTION_ARGS) {
1015         RUNCMP
1016         PG_RETURN_BOOL((res > 0) ? true : false);
1017 }               
1018  
1019 Datum
1020 tsvector_ne(PG_FUNCTION_ARGS) {   
1021         RUNCMP      
1022         PG_RETURN_BOOL((res != 0) ? true : false);
1023 }
1024