]> granicus.if.org Git - postgresql/blob - src/backend/utils/adt/tsquery.c
9a890d2ae687e8bee4916e90f3e73262a32678b5
[postgresql] / src / backend / utils / adt / tsquery.c
1 /*-------------------------------------------------------------------------
2  *
3  * tsquery.c
4  *        I/O functions for tsquery
5  *
6  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7  *
8  *
9  * IDENTIFICATION
10  *        $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.18 2008/05/16 16:31:01 tgl Exp $
11  *
12  *-------------------------------------------------------------------------
13  */
14
15 #include "postgres.h"
16
17 #include "libpq/pqformat.h"
18 #include "miscadmin.h"
19 #include "tsearch/ts_locale.h"
20 #include "tsearch/ts_type.h"
21 #include "tsearch/ts_utils.h"
22 #include "utils/builtins.h"
23 #include "utils/memutils.h"
24 #include "utils/pg_crc.h"
25
26
27 struct TSQueryParserStateData
28 {
29         /* State for gettoken_query */
30         char       *buffer;                     /* entire string we are scanning */
31         char       *buf;                        /* current scan point */
32         int                     state;
33         int                     count;                  /* nesting count, incremented by (,
34                                                                  * decremented by ) */
35
36         /* polish (prefix) notation in list, filled in by push* functions */
37         List       *polstr;
38
39         /*
40          * Strings from operands are collected in op. curop is a pointer to the
41          * end of used space of op.
42          */
43         char       *op;
44         char       *curop;
45         int                     lenop;                  /* allocated size of op */
46         int                     sumlen;                 /* used size of op */
47
48         /* state for value's parser */
49         TSVectorParseState valstate;
50 };
51
52 /* parser's states */
53 #define WAITOPERAND 1
54 #define WAITOPERATOR    2
55 #define WAITFIRSTOPERAND 3
56 #define WAITSINGLEOPERAND 4
57
58 /*
59  * subroutine to parse the modifiers (weight and prefix flag currently) 
60  * part, like ':1AB' of a query.
61  */
62 static char *
63 get_modifiers(char *buf, int16 *weight, bool *prefix)
64 {
65         *weight = 0;
66         *prefix = false;
67
68         if (!t_iseq(buf, ':'))
69                 return buf;
70
71         buf++;
72         while (*buf && pg_mblen(buf) == 1)
73         {
74                 switch (*buf)
75                 {
76                         case 'a':
77                         case 'A':
78                                 *weight |= 1 << 3;
79                                 break;
80                         case 'b':
81                         case 'B':
82                                 *weight |= 1 << 2;
83                                 break;
84                         case 'c':
85                         case 'C':
86                                 *weight |= 1 << 1;
87                                 break;
88                         case 'd':
89                         case 'D':
90                                 *weight |= 1;
91                                 break;
92                         case '*':
93                                 *prefix = true;
94                                 break;
95                         default:
96                                 return buf;
97                 }
98                 buf++;
99         }
100
101         return buf;
102 }
103
104 /*
105  * token types for parsing
106  */
107 typedef enum
108 {
109         PT_END = 0,
110         PT_ERR = 1,
111         PT_VAL = 2,
112         PT_OPR = 3,
113         PT_OPEN = 4,
114         PT_CLOSE = 5
115 } ts_tokentype;
116
117 /*
118  * get token from query string
119  *
120  * *operator is filled in with OP_* when return values is PT_OPR
121  * *strval, *lenval and *weight are filled in when return value is PT_VAL
122  */
123 static ts_tokentype
124 gettoken_query(TSQueryParserState state,
125                            int8 *operator,
126                            int *lenval, char **strval, int16 *weight, bool *prefix)
127 {
128         *weight = 0;
129         *prefix = false;
130
131         while (1)
132         {
133                 switch (state->state)
134                 {
135                         case WAITFIRSTOPERAND:
136                         case WAITOPERAND:
137                                 if (t_iseq(state->buf, '!'))
138                                 {
139                                         (state->buf)++;         /* can safely ++, t_iseq guarantee
140                                                                                  * that pg_mblen()==1 */
141                                         *operator = OP_NOT;
142                                         state->state = WAITOPERAND;
143                                         return PT_OPR;
144                                 }
145                                 else if (t_iseq(state->buf, '('))
146                                 {
147                                         state->count++;
148                                         (state->buf)++;
149                                         state->state = WAITOPERAND;
150                                         return PT_OPEN;
151                                 }
152                                 else if (t_iseq(state->buf, ':'))
153                                 {
154                                         ereport(ERROR,
155                                                         (errcode(ERRCODE_SYNTAX_ERROR),
156                                                          errmsg("syntax error in tsquery: \"%s\"",
157                                                                         state->buffer)));
158                                 }
159                                 else if (!t_isspace(state->buf))
160                                 {
161                                         /*
162                                          * We rely on the tsvector parser to parse the value for
163                                          * us
164                                          */
165                                         reset_tsvector_parser(state->valstate, state->buf);
166                                         if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
167                                         {
168                                                 state->buf = get_modifiers(state->buf, weight, prefix);
169                                                 state->state = WAITOPERATOR;
170                                                 return PT_VAL;
171                                         }
172                                         else if (state->state == WAITFIRSTOPERAND)
173                                                 return PT_END;
174                                         else
175                                                 ereport(ERROR,
176                                                                 (errcode(ERRCODE_SYNTAX_ERROR),
177                                                                  errmsg("no operand in tsquery: \"%s\"",
178                                                                                 state->buffer)));
179                                 }
180                                 break;
181                         case WAITOPERATOR:
182                                 if (t_iseq(state->buf, '&'))
183                                 {
184                                         state->state = WAITOPERAND;
185                                         *operator = OP_AND;
186                                         (state->buf)++;
187                                         return PT_OPR;
188                                 }
189                                 if (t_iseq(state->buf, '|'))
190                                 {
191                                         state->state = WAITOPERAND;
192                                         *operator = OP_OR;
193                                         (state->buf)++;
194                                         return PT_OPR;
195                                 }
196                                 else if (t_iseq(state->buf, ')'))
197                                 {
198                                         (state->buf)++;
199                                         state->count--;
200                                         return (state->count < 0) ? PT_ERR : PT_CLOSE;
201                                 }
202                                 else if (*(state->buf) == '\0')
203                                         return (state->count) ? PT_ERR : PT_END;
204                                 else if (!t_isspace(state->buf))
205                                         return PT_ERR;
206                                 break;
207                         case WAITSINGLEOPERAND:
208                                 if (*(state->buf) == '\0')
209                                         return PT_END;
210                                 *strval = state->buf;
211                                 *lenval = strlen(state->buf);
212                                 state->buf += strlen(state->buf);
213                                 state->count++;
214                                 return PT_VAL;
215                         default:
216                                 return PT_ERR;
217                                 break;
218                 }
219                 state->buf += pg_mblen(state->buf);
220         }
221         return PT_END;
222 }
223
224 /*
225  * Push an operator to state->polstr
226  */
227 void
228 pushOperator(TSQueryParserState state, int8 oper)
229 {
230         QueryOperator *tmp;
231
232         Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR);
233
234         tmp = (QueryOperator *) palloc0(sizeof(QueryOperator));
235         tmp->type = QI_OPR;
236         tmp->oper = oper;
237         /* left is filled in later with findoprnd */
238
239         state->polstr = lcons(tmp, state->polstr);
240 }
241
242 static void
243 pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
244 {
245         QueryOperand *tmp;
246
247         if (distance >= MAXSTRPOS)
248                 ereport(ERROR,
249                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
250                                  errmsg("value is too big in tsquery: \"%s\"",
251                                                 state->buffer)));
252         if (lenval >= MAXSTRLEN)
253                 ereport(ERROR,
254                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
255                                  errmsg("operand is too long in tsquery: \"%s\"",
256                                                 state->buffer)));
257
258         tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
259         tmp->type = QI_VAL;
260         tmp->weight = weight;
261         tmp->prefix = prefix;
262         tmp->valcrc = (int32) valcrc;
263         tmp->length = lenval;
264         tmp->distance = distance;
265
266         state->polstr = lcons(tmp, state->polstr);
267 }
268
269 /*
270  * Push an operand to state->polstr.
271  *
272  * strval must point to a string equal to state->curop. lenval is the length
273  * of the string.
274  */
275 void
276 pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight, bool prefix)
277 {
278         pg_crc32        valcrc;
279
280         if (lenval >= MAXSTRLEN)
281                 ereport(ERROR,
282                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
283                                  errmsg("word is too long in tsquery: \"%s\"",
284                                                 state->buffer)));
285
286         INIT_CRC32(valcrc);
287         COMP_CRC32(valcrc, strval, lenval);
288         FIN_CRC32(valcrc);
289         pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
290
291         /* append the value string to state.op, enlarging buffer if needed first */
292         while (state->curop - state->op + lenval + 1 >= state->lenop)
293         {
294                 int                     used = state->curop - state->op;
295
296                 state->lenop *= 2;
297                 state->op = (char *) repalloc((void *) state->op, state->lenop);
298                 state->curop = state->op + used;
299         }
300         memcpy((void *) state->curop, (void *) strval, lenval);
301         state->curop += lenval;
302         *(state->curop) = '\0';
303         state->curop++;
304         state->sumlen += lenval + 1 /* \0 */ ;
305 }
306
307
308 /*
309  * Push a stopword placeholder to state->polstr
310  */
311 void
312 pushStop(TSQueryParserState state)
313 {
314         QueryOperand *tmp;
315
316         tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
317         tmp->type = QI_VALSTOP;
318
319         state->polstr = lcons(tmp, state->polstr);
320 }
321
322
323 #define STACKDEPTH      32
324
325 /*
326  * Make polish (prefix) notation of query.
327  *
328  * See parse_tsquery for explanation of pushval.
329  */
330 static void
331 makepol(TSQueryParserState state,
332                 PushFunction pushval,
333                 Datum opaque)
334 {
335         int8            operator = 0;
336         ts_tokentype type;
337         int                     lenval = 0;
338         char       *strval = NULL;
339         int8            opstack[STACKDEPTH];
340         int                     lenstack = 0;
341         int16           weight = 0;
342         bool            prefix;
343
344         /* since this function recurses, it could be driven to stack overflow */
345         check_stack_depth();
346
347         while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END)
348         {
349                 switch (type)
350                 {
351                         case PT_VAL:
352                                 pushval(opaque, state, strval, lenval, weight, prefix);
353                                 while (lenstack && (opstack[lenstack - 1] == OP_AND ||
354                                                                         opstack[lenstack - 1] == OP_NOT))
355                                 {
356                                         lenstack--;
357                                         pushOperator(state, opstack[lenstack]);
358                                 }
359                                 break;
360                         case PT_OPR:
361                                 if (lenstack && operator == OP_OR)
362                                         pushOperator(state, OP_OR);
363                                 else
364                                 {
365                                         if (lenstack == STACKDEPTH) /* internal error */
366                                                 elog(ERROR, "tsquery stack too small");
367                                         opstack[lenstack] = operator;
368                                         lenstack++;
369                                 }
370                                 break;
371                         case PT_OPEN:
372                                 makepol(state, pushval, opaque);
373
374                                 if (lenstack && (opstack[lenstack - 1] == OP_AND ||
375                                                                  opstack[lenstack - 1] == OP_NOT))
376                                 {
377                                         lenstack--;
378                                         pushOperator(state, opstack[lenstack]);
379                                 }
380                                 break;
381                         case PT_CLOSE:
382                                 while (lenstack)
383                                 {
384                                         lenstack--;
385                                         pushOperator(state, opstack[lenstack]);
386                                 };
387                                 return;
388                         case PT_ERR:
389                         default:
390                                 ereport(ERROR,
391                                                 (errcode(ERRCODE_SYNTAX_ERROR),
392                                                  errmsg("syntax error in tsquery: \"%s\"",
393                                                                 state->buffer)));
394                 }
395         }
396         while (lenstack)
397         {
398                 lenstack--;
399                 pushOperator(state, opstack[lenstack]);
400         }
401 }
402
403 static void
404 findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes)
405 {
406         /* since this function recurses, it could be driven to stack overflow. */
407         check_stack_depth();
408
409         if (*pos >= nnodes)
410                 elog(ERROR, "malformed tsquery: operand not found");
411
412         if (ptr[*pos].type == QI_VAL ||
413                 ptr[*pos].type == QI_VALSTOP)   /* need to handle VALSTOP here, they
414                                                                                  * haven't been cleaned away yet. */
415         {
416                 (*pos)++;
417         }
418         else
419         {
420                 Assert(ptr[*pos].type == QI_OPR);
421
422                 if (ptr[*pos].operator.oper == OP_NOT)
423                 {
424                         ptr[*pos].operator.left = 1;
425                         (*pos)++;
426                         findoprnd_recurse(ptr, pos, nnodes);
427                 }
428                 else
429                 {
430                         QueryOperator *curitem = &ptr[*pos].operator;
431                         int                     tmp = *pos;
432
433                         Assert(curitem->oper == OP_AND || curitem->oper == OP_OR);
434
435                         (*pos)++;
436                         findoprnd_recurse(ptr, pos, nnodes);
437                         curitem->left = *pos - tmp;
438                         findoprnd_recurse(ptr, pos, nnodes);
439                 }
440         }
441 }
442
443
444 /*
445  * Fills in the left-fields previously left unfilled. The input
446  * QueryItems must be in polish (prefix) notation.
447  */
448 static void
449 findoprnd(QueryItem *ptr, int size)
450 {
451         uint32          pos;
452
453         pos = 0;
454         findoprnd_recurse(ptr, &pos, size);
455
456         if (pos != size)
457                 elog(ERROR, "malformed tsquery: extra nodes");
458 }
459
460
461 /*
462  * Each value (operand) in the query is be passed to pushval. pushval can
463  * transform the simple value to an arbitrarily complex expression using
464  * pushValue and pushOperator. It must push a single value with pushValue,
465  * a complete expression with all operands, or a a stopword placeholder
466  * with pushStop, otherwise the prefix notation representation will be broken,
467  * having an operator with no operand.
468  *
469  * opaque is passed on to pushval as is, pushval can use it to store its
470  * private state.
471  *
472  * The returned query might contain QI_STOPVAL nodes. The caller is responsible
473  * for cleaning them up (with clean_fakeval)
474  */
475 TSQuery
476 parse_tsquery(char *buf,
477                           PushFunction pushval,
478                           Datum opaque,
479                           bool isplain)
480 {
481         struct TSQueryParserStateData state;
482         int                     i;
483         TSQuery         query;
484         int                     commonlen;
485         QueryItem  *ptr;
486         ListCell   *cell;
487
488         /* init state */
489         state.buffer = buf;
490         state.buf = buf;
491         state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
492         state.count = 0;
493         state.polstr = NIL;
494
495         /* init value parser's state */
496         state.valstate = init_tsvector_parser(state.buffer, true, true);
497
498         /* init list of operand */
499         state.sumlen = 0;
500         state.lenop = 64;
501         state.curop = state.op = (char *) palloc(state.lenop);
502         *(state.curop) = '\0';
503
504         /* parse query & make polish notation (postfix, but in reverse order) */
505         makepol(&state, pushval, opaque);
506
507         close_tsvector_parser(state.valstate);
508
509         if (list_length(state.polstr) == 0)
510         {
511                 ereport(NOTICE,
512                                 (errmsg("text-search query doesn't contain lexemes: \"%s\"",
513                                                 state.buffer)));
514                 query = (TSQuery) palloc(HDRSIZETQ);
515                 SET_VARSIZE(query, HDRSIZETQ);
516                 query->size = 0;
517                 return query;
518         }
519
520         /* Pack the QueryItems in the final TSQuery struct to return to caller */
521         commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
522         query = (TSQuery) palloc0(commonlen);
523         SET_VARSIZE(query, commonlen);
524         query->size = list_length(state.polstr);
525         ptr = GETQUERY(query);
526
527         /* Copy QueryItems to TSQuery */
528         i = 0;
529         foreach(cell, state.polstr)
530         {
531                 QueryItem  *item = (QueryItem *) lfirst(cell);
532
533                 switch (item->type)
534                 {
535                         case QI_VAL:
536                                 memcpy(&ptr[i], item, sizeof(QueryOperand));
537                                 break;
538                         case QI_VALSTOP:
539                                 ptr[i].type = QI_VALSTOP;
540                                 break;
541                         case QI_OPR:
542                                 memcpy(&ptr[i], item, sizeof(QueryOperator));
543                                 break;
544                         default:
545                                 elog(ERROR, "unrecognized QueryItem type: %d", item->type);
546                 }
547                 i++;
548         }
549
550         /* Copy all the operand strings to TSQuery */
551         memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
552         pfree(state.op);
553
554         /* Set left operand pointers for every operator. */
555         findoprnd(ptr, query->size);
556
557         return query;
558 }
559
560 static void
561 pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
562                          int16 weight, bool prefix)
563 {
564         pushValue(state, strval, lenval, weight, prefix);
565 }
566
567 /*
568  * in without morphology
569  */
570 Datum
571 tsqueryin(PG_FUNCTION_ARGS)
572 {
573         char       *in = PG_GETARG_CSTRING(0);
574
575         pg_verifymbstr(in, strlen(in), false);
576
577         PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false));
578 }
579
580 /*
581  * out function
582  */
583 typedef struct
584 {
585         QueryItem  *curpol;
586         char       *buf;
587         char       *cur;
588         char       *op;
589         int                     buflen;
590 } INFIX;
591
592 /* Makes sure inf->buf is large enough for adding 'addsize' bytes */
593 #define RESIZEBUF(inf, addsize) \
594 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
595 { \
596         int len = (inf)->cur - (inf)->buf; \
597         (inf)->buflen *= 2; \
598         (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
599         (inf)->cur = (inf)->buf + len; \
600 }
601
602 /*
603  * recursive walk on tree and print it in
604  * infix (human-readable) view
605  */
606 static void
607 infix(INFIX *in, bool first)
608 {
609         /* since this function recurses, it could be driven to stack overflow. */
610         check_stack_depth();
611
612         if (in->curpol->type == QI_VAL)
613         {
614                 QueryOperand *curpol = &in->curpol->operand;
615                 char       *op = in->op + curpol->distance;
616                 int                     clen;
617
618                 RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
619                 *(in->cur) = '\'';
620                 in->cur++;
621                 while (*op)
622                 {
623                         if (t_iseq(op, '\''))
624                         {
625                                 *(in->cur) = '\'';
626                                 in->cur++;
627                         }
628                         else if (t_iseq(op, '\\'))
629                         {
630                                 *(in->cur) = '\\';
631                                 in->cur++;
632                         }
633                         COPYCHAR(in->cur, op);
634
635                         clen = pg_mblen(op);
636                         op += clen;
637                         in->cur += clen;
638                 }
639                 *(in->cur) = '\'';
640                 in->cur++;
641                 if (curpol->weight || curpol->prefix)
642                 {
643                         *(in->cur) = ':';
644                         in->cur++;
645                         if ( curpol->prefix )
646                         {
647                                 *(in->cur) = '*';
648                                 in->cur++;
649                         }
650                         if (curpol->weight & (1 << 3))
651                         {
652                                 *(in->cur) = 'A';
653                                 in->cur++;
654                         }
655                         if (curpol->weight & (1 << 2))
656                         {
657                                 *(in->cur) = 'B';
658                                 in->cur++;
659                         }
660                         if (curpol->weight & (1 << 1))
661                         {
662                                 *(in->cur) = 'C';
663                                 in->cur++;
664                         }
665                         if (curpol->weight & 1)
666                         {
667                                 *(in->cur) = 'D';
668                                 in->cur++;
669                         }
670                 }
671                 *(in->cur) = '\0';
672                 in->curpol++;
673         }
674         else if (in->curpol->operator.oper == OP_NOT)
675         {
676                 bool            isopr = false;
677
678                 RESIZEBUF(in, 1);
679                 *(in->cur) = '!';
680                 in->cur++;
681                 *(in->cur) = '\0';
682                 in->curpol++;
683
684                 if (in->curpol->type == QI_OPR)
685                 {
686                         isopr = true;
687                         RESIZEBUF(in, 2);
688                         sprintf(in->cur, "( ");
689                         in->cur = strchr(in->cur, '\0');
690                 }
691
692                 infix(in, isopr);
693                 if (isopr)
694                 {
695                         RESIZEBUF(in, 2);
696                         sprintf(in->cur, " )");
697                         in->cur = strchr(in->cur, '\0');
698                 }
699         }
700         else
701         {
702                 int8            op = in->curpol->operator.oper;
703                 INFIX           nrm;
704
705                 in->curpol++;
706                 if (op == OP_OR && !first)
707                 {
708                         RESIZEBUF(in, 2);
709                         sprintf(in->cur, "( ");
710                         in->cur = strchr(in->cur, '\0');
711                 }
712
713                 nrm.curpol = in->curpol;
714                 nrm.op = in->op;
715                 nrm.buflen = 16;
716                 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
717
718                 /* get right operand */
719                 infix(&nrm, false);
720
721                 /* get & print left operand */
722                 in->curpol = nrm.curpol;
723                 infix(in, false);
724
725                 /* print operator & right operand */
726                 RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
727                 switch (op)
728                 {
729                         case OP_OR:
730                                 sprintf(in->cur, " | %s", nrm.buf);
731                                 break;
732                         case OP_AND:
733                                 sprintf(in->cur, " & %s", nrm.buf);
734                                 break;
735                         default:
736                                 /* OP_NOT is handled in above if-branch */
737                                 elog(ERROR, "unrecognized operator type: %d", op);
738                 }
739                 in->cur = strchr(in->cur, '\0');
740                 pfree(nrm.buf);
741
742                 if (op == OP_OR && !first)
743                 {
744                         RESIZEBUF(in, 2);
745                         sprintf(in->cur, " )");
746                         in->cur = strchr(in->cur, '\0');
747                 }
748         }
749 }
750
751
752 Datum
753 tsqueryout(PG_FUNCTION_ARGS)
754 {
755         TSQuery         query = PG_GETARG_TSQUERY(0);
756         INFIX           nrm;
757
758         if (query->size == 0)
759         {
760                 char       *b = palloc(1);
761
762                 *b = '\0';
763                 PG_RETURN_POINTER(b);
764         }
765         nrm.curpol = GETQUERY(query);
766         nrm.buflen = 32;
767         nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
768         *(nrm.cur) = '\0';
769         nrm.op = GETOPERAND(query);
770         infix(&nrm, true);
771
772         PG_FREE_IF_COPY(query, 0);
773         PG_RETURN_CSTRING(nrm.buf);
774 }
775
776 /*
777  * Binary Input / Output functions. The binary format is as follows:
778  *
779  * uint32        number of operators/operands in the query
780  *
781  * Followed by the operators and operands, in prefix notation. For each
782  * operand:
783  *
784  * uint8        type, QI_VAL
785  * uint8        weight
786  *                      operand text in client encoding, null-terminated
787  * uint8        prefix
788  *
789  * For each operator:
790  * uint8        type, QI_OPR
791  * uint8        operator, one of OP_AND, OP_OR, OP_NOT.
792  */
793 Datum
794 tsquerysend(PG_FUNCTION_ARGS)
795 {
796         TSQuery         query = PG_GETARG_TSQUERY(0);
797         StringInfoData buf;
798         int                     i;
799         QueryItem  *item = GETQUERY(query);
800
801         pq_begintypsend(&buf);
802
803         pq_sendint(&buf, query->size, sizeof(uint32));
804         for (i = 0; i < query->size; i++)
805         {
806                 pq_sendint(&buf, item->type, sizeof(item->type));
807
808                 switch (item->type)
809                 {
810                         case QI_VAL:
811                                 pq_sendint(&buf, item->operand.weight, sizeof(uint8));
812                                 pq_sendint(&buf, item->operand.prefix, sizeof(uint8));
813                                 pq_sendstring(&buf, GETOPERAND(query) + item->operand.distance);
814                                 break;
815                         case QI_OPR:
816                                 pq_sendint(&buf, item->operator.oper, sizeof(item->operator.oper));
817                                 break;
818                         default:
819                                 elog(ERROR, "unrecognized tsquery node type: %d", item->type);
820                 }
821                 item++;
822         }
823
824         PG_FREE_IF_COPY(query, 0);
825
826         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
827 }
828
829 Datum
830 tsqueryrecv(PG_FUNCTION_ARGS)
831 {
832         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
833         TSQuery         query;
834         int                     i,
835                                 len;
836         QueryItem  *item;
837         int                     datalen;
838         char       *ptr;
839         uint32          size;
840         const char **operands;
841
842         size = pq_getmsgint(buf, sizeof(uint32));
843         if (size > (MaxAllocSize / sizeof(QueryItem)))
844                 elog(ERROR, "invalid size of tsquery");
845
846         /* Allocate space to temporarily hold operand strings */
847         operands = palloc(size * sizeof(char *));
848
849         /* Allocate space for all the QueryItems. */
850         len = HDRSIZETQ + sizeof(QueryItem) * size;
851         query = (TSQuery) palloc0(len);
852         query->size = size;
853         item = GETQUERY(query);
854
855         datalen = 0;
856         for (i = 0; i < size; i++)
857         {
858                 item->type = (int8) pq_getmsgint(buf, sizeof(int8));
859
860                 if (item->type == QI_VAL)
861                 {
862                         size_t          val_len;        /* length after recoding to server encoding */
863                         uint8           weight;
864                         uint8           prefix;
865                         const char *val;
866                         pg_crc32        valcrc;
867
868                         weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
869                         prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
870                         val = pq_getmsgstring(buf);
871                         val_len = strlen(val);
872
873                         /* Sanity checks */
874
875                         if (weight > 0xF)
876                                 elog(ERROR, "invalid tsquery: invalid weight bitmap");
877
878                         if (val_len > MAXSTRLEN)
879                                 elog(ERROR, "invalid tsquery: operand too long");
880
881                         if (datalen > MAXSTRPOS)
882                                 elog(ERROR, "invalid tsquery: total operand length exceeded");
883
884                         /* Looks valid. */
885
886                         INIT_CRC32(valcrc);
887                         COMP_CRC32(valcrc, val, val_len);
888                         FIN_CRC32(valcrc);
889
890                         item->operand.weight = weight;
891                         item->operand.prefix = (prefix) ? true : false;
892                         item->operand.valcrc = (int32) valcrc;
893                         item->operand.length = val_len;
894                         item->operand.distance = datalen;
895
896                         /*
897                          * Operand strings are copied to the final struct after this loop;
898                          * here we just collect them to an array
899                          */
900                         operands[i] = val;
901
902                         datalen += val_len + 1;         /* + 1 for the '\0' terminator */
903                 }
904                 else if (item->type == QI_OPR)
905                 {
906                         int8            oper;
907
908                         oper = (int8) pq_getmsgint(buf, sizeof(int8));
909                         if (oper != OP_NOT && oper != OP_OR && oper != OP_AND)
910                                 elog(ERROR, "invalid tsquery: unrecognized operator type %d",
911                                          (int) oper);
912                         if (i == size - 1)
913                                 elog(ERROR, "invalid pointer to right operand");
914
915                         item->operator.oper = oper;
916                 }
917                 else
918                         elog(ERROR, "unrecognized tsquery node type: %d", item->type);
919
920                 item++;
921         }
922
923         /* Enlarge buffer to make room for the operand values. */
924         query = (TSQuery) repalloc(query, len + datalen);
925         item = GETQUERY(query);
926         ptr = GETOPERAND(query);
927
928         /*
929          * Fill in the left-pointers. Checks that the tree is well-formed as a
930          * side-effect.
931          */
932         findoprnd(item, size);
933
934         /* Copy operands to output struct */
935         for (i = 0; i < size; i++)
936         {
937                 if (item->type == QI_VAL)
938                 {
939                         memcpy(ptr, operands[i], item->operand.length + 1);
940                         ptr += item->operand.length + 1;
941                 }
942                 item++;
943         }
944
945         pfree(operands);
946
947         Assert(ptr - GETOPERAND(query) == datalen);
948
949         SET_VARSIZE(query, len + datalen);
950
951         PG_RETURN_TSVECTOR(query);
952 }
953
954 /*
955  * debug function, used only for view query
956  * which will be executed in non-leaf pages in index
957  */
958 Datum
959 tsquerytree(PG_FUNCTION_ARGS)
960 {
961         TSQuery         query = PG_GETARG_TSQUERY(0);
962         INFIX           nrm;
963         text       *res;
964         QueryItem  *q;
965         int                     len;
966
967         if (query->size == 0)
968         {
969                 res = (text *) palloc(VARHDRSZ);
970                 SET_VARSIZE(res, VARHDRSZ);
971                 PG_RETURN_POINTER(res);
972         }
973
974         q = clean_NOT(GETQUERY(query), &len);
975
976         if (!q)
977         {
978                 res = cstring_to_text("T");
979         }
980         else
981         {
982                 nrm.curpol = q;
983                 nrm.buflen = 32;
984                 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
985                 *(nrm.cur) = '\0';
986                 nrm.op = GETOPERAND(query);
987                 infix(&nrm, true);
988                 res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf);
989                 pfree(q);
990         }
991
992         PG_FREE_IF_COPY(query, 0);
993
994         PG_RETURN_TEXT_P(res);
995 }