Refactoring by Heikki Linnakangas <heikki@enterprisedb.com> with

author Teodor Sigaev <teodor@sigaev.ru>

Fri, 7 Sep 2007 15:09:56 +0000 (15:09 +0000)

committer Teodor Sigaev <teodor@sigaev.ru>

Fri, 7 Sep 2007 15:09:56 +0000 (15:09 +0000)
author Teodor Sigaev <teodor@sigaev.ru>
Fri, 7 Sep 2007 15:09:56 +0000 (15:09 +0000)
committer Teodor Sigaev <teodor@sigaev.ru>
Fri, 7 Sep 2007 15:09:56 +0000 (15:09 +0000)
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c

index ee4b61d44bfe12fa3ab4830a0b80231337a9411a..d0b1bcc19def7f546644da8c2571c8467eb91ab1 100644 (file)
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.1 2007/08/21 01:11:18 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.2 2007/09/07 15:09:55 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -225,10 +225,17 @@ to_tsvector(PG_FUNCTION_ARGS)
  
  
  /*
- * This function is used for morph parsing
+ * This function is used for morph parsing.
+ *
+ * The value is passed to parsetext which will call the right dictionary to
+ * lexize the word. If it turns out to be a stopword, we push a QI_VALSTOP
+ * to the stack.
+ *
+ * All words belonging to the same variant are pushed as an ANDed list,
+ * and different variants are ORred together. 
   */
  static void
-pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval, int2 weight)
+pushval_morph(void *opaque, TSQueryParserState state, char *strval, int lenval, int2 weight)
  {
         int4            count = 0;
         ParsedText      prs;
@@ -237,13 +244,14 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
                                 cntvar = 0,
                                 cntpos = 0,
                                 cnt = 0;
+       Oid cfg_id = (Oid) opaque; /* the input is actually an Oid, not a pointer */
  
         prs.lenwords = 4;
         prs.curwords = 0;
         prs.pos = 0;
         prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
  
-       parsetext(state->cfg_id, &prs, strval, lenval);
+       parsetext(cfg_id, &prs, strval, lenval);
  
         if (prs.curwords > 0)
         {
@@ -260,21 +268,21 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
                                 while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
                                 {
  
-                                       pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
+                                       pushValue(state, prs.words[count].word, prs.words[count].len, weight);
                                         pfree(prs.words[count].word);
                                         if (cnt)
-                                               pushquery(state, OPR, (int4) '&', 0, 0, 0);
+                                               pushOperator(state, OP_AND);
                                         cnt++;
                                         count++;
                                 }
  
                                 if (cntvar)
-                                       pushquery(state, OPR, (int4) '|', 0, 0, 0);
+                                       pushOperator(state, OP_OR);
                                 cntvar++;
                         }
  
                         if (cntpos)
-                               pushquery(state, OPR, (int4) '&', 0, 0, 0);
+                               pushOperator(state, OP_AND);
  
                         cntpos++;
                 }
@@ -283,7 +291,7 @@ pushval_morph(TSQueryParserState * state, int typeval, char *strval, int lenval,
  
         }
         else
-               pushval_asis(state, VALSTOP, NULL, 0, 0);
+               pushStop(state);
  }
  
  Datum
@@ -295,7 +303,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
         QueryItem  *res;
         int4            len;
  
-       query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, false);
+       query = parse_tsquery(TextPGetCString(in), pushval_morph, (void *) cfgid, false);
  
         if (query->size == 0)
                 PG_RETURN_TSQUERY(query);
@@ -333,7 +341,7 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
         QueryItem  *res;
         int4            len;
  
-       query = parse_tsquery(TextPGetCString(in), pushval_morph, cfgid, true);
+       query = parse_tsquery(TextPGetCString(in), pushval_morph, (void *)cfgid, true);
  
         if (query->size == 0)
                 PG_RETURN_TSQUERY(query);
diff --git a/src/backend/tsearch/ts_parse.c b/src/backend/tsearch/ts_parse.c

index 47e18fc1ac5b3c75137208621c40676ae401d321..22c5f2b86eaf3b8dd37ea69ef078433bd92a7ea3 100644 (file)
--- a/src/backend/tsearch/ts_parse.c
+++ b/src/backend/tsearch/ts_parse.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.2 2007/08/25 00:03:59 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.3 2007/09/07 15:09:55 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -344,10 +344,12 @@ LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem)
  }
  
  /*
- * Parse string and lexize words
+ * Parse string and lexize words.
+ *
+ * prs will be filled in.
   */
  void
-parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen)
+parsetext(Oid cfgId, ParsedText * prs, char *buf, int buflen)
  {
         int                     type,
                                 lenlemm;
@@ -427,7 +429,7 @@ parsetext(Oid cfgId, ParsedText * prs, char *buf, int4 buflen)
   * Headline framework
   */
  static void
-hladdword(HeadlineParsedText * prs, char *buf, int4 buflen, int type)
+hladdword(HeadlineParsedText * prs, char *buf, int buflen, int type)
  {
         while (prs->curwords >= prs->lenwords)
         {
@@ -458,17 +460,19 @@ hlfinditem(HeadlineParsedText * prs, TSQuery query, char *buf, int buflen)
         word = &(prs->words[prs->curwords - 1]);
         for (i = 0; i < query->size; i++)
         {
-               if (item->type == VAL && item->length == buflen && strncmp(GETOPERAND(query) + item->distance, buf, buflen) == 0)
+               if (item->type == QI_VAL &&
+                       item->operand.length == buflen &&
+                       strncmp(GETOPERAND(query) + item->operand.distance, buf, buflen) == 0)
                 {
                         if (word->item)
                         {
                                 memcpy(&(prs->words[prs->curwords]), word, sizeof(HeadlineWordEntry));
-                               prs->words[prs->curwords].item = item;
+                               prs->words[prs->curwords].item = &item->operand;
                                 prs->words[prs->curwords].repeated = 1;
                                 prs->curwords++;
                         }
                         else
-                               word->item = item;
+                               word->item = &item->operand;
                 }
                 item++;
         }
@@ -511,7 +515,7 @@ addHLParsedLex(HeadlineParsedText * prs, TSQuery query, ParsedLex * lexs, TSLexe
  }
  
  void
-hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, char *buf, int4 buflen)
+hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query, char *buf, int buflen)
  {
         int                     type,
                                 lenlemm;
diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c

index 5b47f66d07fdf77c93e21cf4bdbed3eeb0f8d3d6..5f65cbc9fb239a07aa15fb28fa8356c5f0553bb1 100644 (file)
--- a/src/backend/tsearch/wparser_def.c
+++ b/src/backend/tsearch/wparser_def.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.2 2007/08/22 01:39:45 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.3 2007/09/07 15:09:55 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1575,7 +1575,7 @@ typedef struct
  } hlCheck;
  
  static bool
-checkcondition_HL(void *checkval, QueryItem * val)
+checkcondition_HL(void *checkval, QueryOperand * val)
  {
         int                     i;
  
@@ -1601,14 +1601,14 @@ hlCover(HeadlineParsedText * prs, TSQuery query, int *p, int *q)
  
         for (j = 0; j < query->size; j++)
         {
-               if (item->type != VAL)
+               if (item->type != QI_VAL)
                 {
                         item++;
                         continue;
                 }
                 for (i = pos; i < prs->curwords; i++)
                 {
-                       if (prs->words[i].item == item)
+                       if (prs->words[i].item == &item->operand)
                         {
                                 if (i > *q)
                                         *q = i;
@@ -1624,14 +1624,14 @@ hlCover(HeadlineParsedText * prs, TSQuery query, int *p, int *q)
         item = GETQUERY(query);
         for (j = 0; j < query->size; j++)
         {
-               if (item->type != VAL)
+               if (item->type != QI_VAL)
                 {
                         item++;
                         continue;
                 }
                 for (i = *q; i >= pos; i--)
                 {
-                       if (prs->words[i].item == item)
+                       if (prs->words[i].item == &item->operand)
                         {
                                 if (i < *p)
                                         *p = i;
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile

index a1f233dca8279ce0efa422ce0bfe9e301ee812ee..9a75c736df650dbf215a6c9f1bfcb35e70a4d1a8 100644 (file)
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -1,7 +1,7 @@
  #
  # Makefile for utils/adt
  #
-# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.66 2007/08/27 01:39:24 tgl Exp $
+# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.67 2007/09/07 15:09:56 teodor Exp $
  #
  
  subdir = src/backend/utils/adt
@@ -28,7 +28,7 @@ OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o bool.o \
         ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o \
         tsginidx.o tsgistidx.o tsquery.o tsquery_cleanup.o tsquery_gist.o \
         tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \
-       tsvector.o tsvector_op.o \
+       tsvector.o tsvector_op.o tsvector_parser.o\
         uuid.o xml.o
  
  like.o: like.c like_match.c
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c

index 491dd21aa81b40fae9c88bf0f762e9e71cfbf49c..10b80dc9566d304ed5ab768f6bde609edf2b1523 100644 (file)
--- a/src/backend/utils/adt/tsginidx.c
+++ b/src/backend/utils/adt/tsginidx.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -77,24 +77,25 @@ gin_extract_query(PG_FUNCTION_ARGS)
                 item = GETQUERY(query);
  
                 for (i = 0; i < query->size; i++)
-                       if (item[i].type == VAL)
+                       if (item[i].type == QI_VAL)
                                 (*nentries)++;
  
                 entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
  
                 for (i = 0; i < query->size; i++)
-                       if (item[i].type == VAL)
+                       if (item[i].type == QI_VAL)
                         {
                                 text       *txt;
+                               QueryOperand *val = &item[i].operand;
  
-                               txt = (text *) palloc(VARHDRSZ + item[i].length);
+                               txt = (text *) palloc(VARHDRSZ + val->length);
  
-                               SET_VARSIZE(txt, VARHDRSZ + item[i].length);
-                               memcpy(VARDATA(txt), GETOPERAND(query) + item[i].distance, item[i].length);
+                               SET_VARSIZE(txt, VARHDRSZ + val->length);
+                               memcpy(VARDATA(txt), GETOPERAND(query) + val->distance, val->length);
  
                                 entries[j++] = PointerGetDatum(txt);
  
-                               if (strategy != TSearchWithClassStrategyNumber && item[i].weight != 0)
+                               if (strategy != TSearchWithClassStrategyNumber && val->weight != 0)
                                         ereport(ERROR,
                                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                                                          errmsg("@@ operator does not support lexeme class restrictions"),
@@ -116,11 +117,11 @@ typedef struct
  } GinChkVal;
  
  static bool
-checkcondition_gin(void *checkval, QueryItem * val)
+checkcondition_gin(void *checkval, QueryOperand * val)
  {
         GinChkVal  *gcv = (GinChkVal *) checkval;
  
-       return gcv->mapped_check[val - gcv->frst];
+       return gcv->mapped_check[((QueryItem *) val) - gcv->frst];
  }
  
  Datum
@@ -142,7 +143,7 @@ gin_ts_consistent(PG_FUNCTION_ARGS)
                 gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size);
  
                 for (i = 0; i < query->size; i++)
-                       if (item[i].type == VAL)
+                       if (item[i].type == QI_VAL)
                                 gcv.mapped_check[i] = check[j++];
  
                 res = TS_execute(
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c

index 6c262521ef49b56c2be2ee4d5c9e7aa740b0d65d..4fc51378b4bf5c70cbfc5e3e7d16195e7597f79c 100644 (file)
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.2 2007/08/21 06:34:42 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -293,7 +293,7 @@ typedef struct
   * is there value 'val' in array or not ?
   */
  static bool
-checkcondition_arr(void *checkval, QueryItem * val)
+checkcondition_arr(void *checkval, QueryOperand * val)
  {
         int4       *StopLow = ((CHKVAL *) checkval)->arrb;
         int4       *StopHigh = ((CHKVAL *) checkval)->arre;
@@ -304,9 +304,9 @@ checkcondition_arr(void *checkval, QueryItem * val)
         while (StopLow < StopHigh)
         {
                 StopMiddle = StopLow + (StopHigh - StopLow) / 2;
-               if (*StopMiddle == val->val)
+               if (*StopMiddle == val->valcrc)
                         return (true);
-               else if (*StopMiddle < val->val)
+               else if (*StopMiddle < val->valcrc)
                         StopLow = StopMiddle + 1;
                 else
                         StopHigh = StopMiddle;
@@ -316,9 +316,9 @@ checkcondition_arr(void *checkval, QueryItem * val)
  }
  
  static bool
-checkcondition_bit(void *checkval, QueryItem * val)
+checkcondition_bit(void *checkval, QueryOperand * val)
  {
-       return GETBIT(checkval, HASHVAL(val->val));
+       return GETBIT(checkval, HASHVAL(val->valcrc));
  }
  
  Datum
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c

index 83759728ff96c97e3134e9da1a624a09ca9823c9..27b93eb64d7725fa0ec9795a31fd576e188ae13d 100644 (file)
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.2 2007/08/31 02:26:29 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -23,6 +23,29 @@
  #include "utils/pg_crc.h"
  
  
+struct TSQueryParserStateData
+{
+       /* State for gettoken_query */
+       char       *buffer;                     /* entire string we are scanning */
+       char       *buf;                        /* current scan point */
+       int                     state;
+       int                     count;                  /* nesting count, incremented by (, 
+                                                                  decremented by ) */
+
+       /* polish (prefix) notation in list, filled in by push* functions */
+       List       *polstr;
+
+       /* Strings from operands are collected in op. curop is a pointer to
+        * the end of used space of op. */
+       char       *op;
+       char       *curop;
+       int                     lenop; /* allocated size of op */
+       int                     sumlen; /* used size of op */
+
+       /* state for value's parser */
+       TSVectorParseState valstate;
+};
+
  /* parser's states */
  #define WAITOPERAND 1
  #define WAITOPERATOR   2
@@ -30,21 +53,10 @@
  #define WAITSINGLEOPERAND 4
  
  /*
- * node of query tree, also used
- * for storing polish notation in parser
+ * subroutine to parse the weight part, like ':1AB' of a query.
   */
-typedef struct ParseQueryNode
-{
-       int2            weight;
-       int2            type;
-       int4            val;
-       int2            distance;
-       int2            length;
-       struct ParseQueryNode *next;
-} ParseQueryNode;
-
  static char *
-get_weight(char *buf, int2 *weight)
+get_weight(char *buf, int16 *weight)
  {
         *weight = 0;
  
@@ -81,11 +93,28 @@ get_weight(char *buf, int2 *weight)
         return buf;
  }
  
+/*
+ * token types for parsing
+ */
+typedef enum {
+       PT_END = 0,
+       PT_ERR = 1,
+       PT_VAL = 2,
+       PT_OPR = 3,
+       PT_OPEN = 4,
+       PT_CLOSE = 5,
+} ts_tokentype;
+
  /*
   * get token from query string
+ *
+ * *operator is filled in with OP_* when return values is PT_OPR
+ * *strval, *lenval and *weight are filled in when return value is PT_VAL
   */
-static int4
-gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
+static ts_tokentype
+gettoken_query(TSQueryParserState state, 
+                          int8 *operator,
+                          int *lenval, char **strval, int16 *weight)
  {
         while (1)
         {
@@ -97,16 +126,16 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva
                                 {
                                         (state->buf)++;         /* can safely ++, t_iseq guarantee
                                                                                  * that pg_mblen()==1 */
-                                       *val = (int4) '!';
+                                       *operator = OP_NOT;
                                         state->state = WAITOPERAND;
-                                       return OPR;
+                                       return PT_OPR;
                                 }
                                 else if (t_iseq(state->buf, '('))
                                 {
                                         state->count++;
                                         (state->buf)++;
                                         state->state = WAITOPERAND;
-                                       return OPEN;
+                                       return PT_OPEN;
                                 }
                                 else if (t_iseq(state->buf, ':'))
                                 {
@@ -117,17 +146,16 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva
                                 }
                                 else if (!t_isspace(state->buf))
                                 {
-                                       state->valstate.prsbuf = state->buf;
-                                       if (gettoken_tsvector(&(state->valstate)))
+                                       /* We rely on the tsvector parser to parse the value for us */
+                                       reset_tsvector_parser(state->valstate, state->buf);
+                                       if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
                                         {
-                                               *strval = state->valstate.word;
-                                               *lenval = state->valstate.curpos - state->valstate.word;
-                                               state->buf = get_weight(state->valstate.prsbuf, weight);
+                                               state->buf = get_weight(state->buf, weight);
                                                 state->state = WAITOPERATOR;
-                                               return VAL;
+                                               return PT_VAL;
                                         }
                                         else if (state->state == WAITFIRSTOPERAND)
-                                               return END;
+                                               return PT_END;
                                         else
                                                 ereport(ERROR,
                                                                 (errcode(ERRCODE_SYNTAX_ERROR),
@@ -136,52 +164,71 @@ gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strva
                                 }
                                 break;
                         case WAITOPERATOR:
-                               if (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))
+                               if (t_iseq(state->buf, '&'))
+                               {
+                                       state->state = WAITOPERAND;
+                                       *operator = OP_AND;
+                                       (state->buf)++;
+                                       return PT_OPR;
+                               }
+                               if (t_iseq(state->buf, '|'))
                                 {
                                         state->state = WAITOPERAND;
-                                       *val = (int4) *(state->buf);
+                                       *operator = OP_OR;
                                         (state->buf)++;
-                                       return OPR;
+                                       return PT_OPR;
                                 }
                                 else if (t_iseq(state->buf, ')'))
                                 {
                                         (state->buf)++;
                                         state->count--;
-                                       return (state->count < 0) ? ERR : CLOSE;
+                                       return (state->count < 0) ? PT_ERR : PT_CLOSE;
                                 }
                                 else if (*(state->buf) == '\0')
-                                       return (state->count) ? ERR : END;
+                                       return (state->count) ? PT_ERR : PT_END;
                                 else if (!t_isspace(state->buf))
-                                       return ERR;
+                                       return PT_ERR;
                                 break;
                         case WAITSINGLEOPERAND:
                                 if (*(state->buf) == '\0')
-                                       return END;
+                                       return PT_END;
                                 *strval = state->buf;
                                 *lenval = strlen(state->buf);
                                 state->buf += strlen(state->buf);
                                 state->count++;
-                               return VAL;
+                               return PT_VAL;
                         default:
-                               return ERR;
+                               return PT_ERR;
                                 break;
                 }
                 state->buf += pg_mblen(state->buf);
         }
-       return END;
+       return PT_END;
  }
  
  /*
- * push new one in polish notation reverse view
+ * Push an operator to state->polstr
   */
  void
-pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
+pushOperator(TSQueryParserState state, int8 oper)
  {
-       ParseQueryNode *tmp = (ParseQueryNode *) palloc(sizeof(ParseQueryNode));
+       QueryOperator *tmp;
+
+       Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR);
+       
+       tmp = (QueryOperator *) palloc(sizeof(QueryOperator));
+       tmp->type = QI_OPR;
+       tmp->oper = oper;
+       /* left is filled in later with findoprnd */
+
+       state->polstr = lcons(tmp, state->polstr);
+}
+
+static void
+pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight)
+{
+       QueryOperand *tmp;
  
-       tmp->weight = weight;
-       tmp->type = type;
-       tmp->val = val;
         if (distance >= MAXSTRPOS)
                 ereport(ERROR,
                                 (errcode(ERRCODE_SYNTAX_ERROR),
@@ -192,20 +239,27 @@ pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 l
                                 (errcode(ERRCODE_SYNTAX_ERROR),
                                  errmsg("operand is too long in tsearch query: \"%s\"",
                                                 state->buffer)));
-       tmp->distance = distance;
+
+       tmp = (QueryOperand *) palloc(sizeof(QueryOperand));
+       tmp->type = QI_VAL;
+       tmp->weight = weight;
+       tmp->valcrc = (int32) valcrc;
         tmp->length = lenval;
-       tmp->next = state->str;
-       state->str = tmp;
-       state->num++;
+       tmp->distance = distance;
+
+       state->polstr = lcons(tmp, state->polstr);
  }
  
  /*
- * This function is used for tsquery parsing
+ * Push an operand to state->polstr.
+ *
+ * strval must point to a string equal to state->curop. lenval is the length
+ * of the string.
   */
  void
-pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int2 weight)
+pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
  {
-       pg_crc32        c;
+       pg_crc32        valcrc;
  
         if (lenval >= MAXSTRLEN)
                 ereport(ERROR,
@@ -213,162 +267,202 @@ pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int
                                  errmsg("word is too long in tsearch query: \"%s\"",
                                                 state->buffer)));
  
-       INIT_CRC32(c);
-       COMP_CRC32(c, strval, lenval);
-       FIN_CRC32(c);
-       pushquery(state, type, *(int4 *) &c,
-                         state->curop - state->op, lenval, weight);
+       INIT_CRC32(valcrc);
+       COMP_CRC32(valcrc, strval, lenval);
+       FIN_CRC32(valcrc);
+       pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight);
  
+       /* append the value string to state.op, enlarging buffer if needed first */
         while (state->curop - state->op + lenval + 1 >= state->lenop)
         {
-               int4            tmp = state->curop - state->op;
+               int     used = state->curop - state->op;
  
                 state->lenop *= 2;
                 state->op = (char *) repalloc((void *) state->op, state->lenop);
-               state->curop = state->op + tmp;
+               state->curop = state->op + used;
         }
         memcpy((void *) state->curop, (void *) strval, lenval);
         state->curop += lenval;
         *(state->curop) = '\0';
         state->curop++;
         state->sumlen += lenval + 1 /* \0 */ ;
-       return;
  }
  
+
+/*
+ * Push a stopword placeholder to state->polstr
+ */
+void
+pushStop(TSQueryParserState state)
+{
+       QueryOperand *tmp;
+
+       tmp = (QueryOperand *) palloc(sizeof(QueryOperand));
+       tmp->type = QI_VALSTOP;
+
+       state->polstr = lcons(tmp, state->polstr);
+}
+
+
  #define STACKDEPTH     32
  
  /*
- * make polish notation of query
+ * Make polish (prefix) notation of query.
+ *
+ * See parse_tsquery for explanation of pushval.
   */
-static int4
-makepol(TSQueryParserState * state,
-               void (*pushval) (TSQueryParserState *, int, char *, int, int2))
+static void
+makepol(TSQueryParserState state, 
+               PushFunction pushval,
+               void *opaque)
  {
-       int4            val = 0,
-                               type;
-       int4            lenval = 0;
+       int8            operator = 0;
+       ts_tokentype type;
+       int                     lenval = 0;
         char       *strval = NULL;
-       int4            stack[STACKDEPTH];
-       int4            lenstack = 0;
-       int2            weight = 0;
+       int8            opstack[STACKDEPTH];
+       int                     lenstack = 0;
+       int16           weight = 0;
  
         /* since this function recurses, it could be driven to stack overflow */
         check_stack_depth();
  
-       while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
+       while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight)) != PT_END)
         {
                 switch (type)
                 {
-                       case VAL:
-                               pushval(state, VAL, strval, lenval, weight);
-                               while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
-                                                                       stack[lenstack - 1] == (int4) '!'))
+                       case PT_VAL:
+                               pushval(opaque, state, strval, lenval, weight);
+                               while (lenstack && (opstack[lenstack - 1] == OP_AND ||
+                                                                       opstack[lenstack - 1] == OP_NOT))
                                 {
                                         lenstack--;
-                                       pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+                                       pushOperator(state, opstack[lenstack]);
                                 }
                                 break;
-                       case OPR:
-                               if (lenstack && val == (int4) '|')
-                                       pushquery(state, OPR, val, 0, 0, 0);
+                       case PT_OPR:
+                               if (lenstack && operator == OP_OR)
+                                       pushOperator(state, OP_OR);
                                 else
                                 {
                                         if (lenstack == STACKDEPTH)                     /* internal error */
                                                 elog(ERROR, "tsquery stack too small");
-                                       stack[lenstack] = val;
+                                       opstack[lenstack] = operator;
                                         lenstack++;
                                 }
                                 break;
-                       case OPEN:
-                               if (makepol(state, pushval) == ERR)
-                                       return ERR;
-                               if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
-                                                                stack[lenstack - 1] == (int4) '!'))
+                       case PT_OPEN:
+                               makepol(state, pushval, opaque);
+
+                               if (lenstack && (opstack[lenstack - 1] == OP_AND ||
+                                                                opstack[lenstack - 1] == OP_NOT))
                                 {
                                         lenstack--;
-                                       pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+                                       pushOperator(state, opstack[lenstack]);
                                 }
                                 break;
-                       case CLOSE:
+                       case PT_CLOSE:
                                 while (lenstack)
                                 {
                                         lenstack--;
-                                       pushquery(state, OPR, stack[lenstack], 0, 0, 0);
+                                       pushOperator(state, opstack[lenstack]);
                                 };
-                               return END;
-                               break;
-                       case ERR:
+                               return;
+                       case PT_ERR:
                         default:
                                 ereport(ERROR,
                                                 (errcode(ERRCODE_SYNTAX_ERROR),
                                                  errmsg("syntax error in tsearch query: \"%s\"",
                                                                 state->buffer)));
-                               return ERR;
-
                 }
         }
         while (lenstack)
         {
                 lenstack--;
-               pushquery(state, OPR, stack[lenstack], 0, 0, 0);
-       };
-       return END;
+               pushOperator(state, opstack[lenstack]);
+       }
  }
  
+/*
+ * Fills in the left-fields previously left unfilled. The input
+ * QueryItems must be in polish (prefix) notation. 
+ */
  static void
-findoprnd(QueryItem * ptr, int4 *pos)
+findoprnd(QueryItem *ptr, int *pos)
  {
-       if (ptr[*pos].type == VAL || ptr[*pos].type == VALSTOP)
-       {
-               ptr[*pos].left = 0;
-               (*pos)++;
-       }
-       else if (ptr[*pos].val == (int4) '!')
+       /* since this function recurses, it could be driven to stack overflow. */
+       check_stack_depth();
+
+       if (ptr[*pos].type == QI_VAL ||
+               ptr[*pos].type == QI_VALSTOP) /* need to handle VALSTOP here,
+                                                                          * they haven't been cleansed
+                                                                          * away yet.
+                                                                          */
         {
-               ptr[*pos].left = 1;
                 (*pos)++;
-               findoprnd(ptr, pos);
         }
-       else
+       else 
         {
-               QueryItem  *curitem = &ptr[*pos];
-               int4            tmp = *pos;
+               Assert(ptr[*pos].type == QI_OPR);
  
-               (*pos)++;
-               findoprnd(ptr, pos);
-               curitem->left = *pos - tmp;
-               findoprnd(ptr, pos);
+               if (ptr[*pos].operator.oper == OP_NOT)
+               {
+                       ptr[*pos].operator.left = 1;
+                       (*pos)++;
+                       findoprnd(ptr, pos);
+               }
+               else
+               {
+                       QueryOperator  *curitem = &ptr[*pos].operator;
+                       int     tmp = *pos;
+
+                       Assert(curitem->oper == OP_AND || curitem->oper == OP_OR);
+
+                       (*pos)++;
+                       findoprnd(ptr, pos);
+                       curitem->left = *pos - tmp;
+                       findoprnd(ptr, pos);
+               }
         }
  }
  
-
  /*
- * input
+ * Each value (operand) in the query is be passed to pushval. pushval can
+ * transform the simple value to an arbitrarily complex expression using
+ * pushValue and pushOperator. It must push a single value with pushValue,
+ * a complete expression with all operands, or a a stopword placeholder
+ * with pushStop, otherwise the prefix notation representation will be broken,
+ * having an operator with no operand.
+ *
+ * opaque is passed on to pushval as is, pushval can use it to store its 
+ * private state.
+ *
+ * The returned query might contain QI_STOPVAL nodes. The caller is responsible
+ * for cleaning them up (with clean_fakeval)
   */
  TSQuery
-parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int, int2), Oid cfg_id, bool isplain)
+parse_tsquery(char *buf, 
+                         PushFunction pushval,
+                         void *opaque,
+                         bool isplain)
  {
-       TSQueryParserState state;
-       int4            i;
+       struct TSQueryParserStateData state;
+       int                     i;
         TSQuery         query;
-       int4            commonlen;
+       int                     commonlen;
         QueryItem  *ptr;
-       ParseQueryNode *tmp;
-       int4            pos = 0;
+       int                     pos = 0;
+       ListCell   *cell;
  
         /* init state */
         state.buffer = buf;
         state.buf = buf;
         state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
         state.count = 0;
-       state.num = 0;
-       state.str = NULL;
-       state.cfg_id = cfg_id;
+       state.polstr = NIL;
  
         /* init value parser's state */
-       state.valstate.oprisdelim = true;
-       state.valstate.len = 32;
-       state.valstate.word = (char *) palloc(state.valstate.len);
+       state.valstate = init_tsvector_parser(NULL, true);
  
         /* init list of operand */
         state.sumlen = 0;
@@ -377,9 +471,11 @@ parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int
         *(state.curop) = '\0';
  
         /* parse query & make polish notation (postfix, but in reverse order) */
-       makepol(&state, pushval);
-       pfree(state.valstate.word);
-       if (!state.num)
+       makepol(&state, pushval, opaque);
+
+       close_tsvector_parser(state.valstate);
+
+       if (list_length(state.polstr) == 0)
         {
                 ereport(NOTICE,
                                 (errmsg("tsearch query doesn't contain lexeme(s): \"%s\"",
@@ -390,37 +486,54 @@ parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int
                 return query;
         }
  
-       /* make finish struct */
-       commonlen = COMPUTESIZE(state.num, state.sumlen);
-       query = (TSQuery) palloc(commonlen);
+       /* Pack the QueryItems in the final TSQuery struct to return to caller */
+       commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
+       query = (TSQuery) palloc0(commonlen);
         SET_VARSIZE(query, commonlen);
-       query->size = state.num;
+       query->size = list_length(state.polstr);
         ptr = GETQUERY(query);
  
-       /* set item in polish notation */
-       for (i = 0; i < state.num; i++)
+       /* Copy QueryItems to TSQuery */
+       i = 0;
+       foreach(cell, state.polstr)
         {
-               ptr[i].weight = state.str->weight;
-               ptr[i].type = state.str->type;
-               ptr[i].val = state.str->val;
-               ptr[i].distance = state.str->distance;
-               ptr[i].length = state.str->length;
-               tmp = state.str->next;
-               pfree(state.str);
-               state.str = tmp;
+               QueryItem *item = (QueryItem *) lfirst(cell);
+
+               switch(item->type)
+               {
+                       case QI_VAL:
+                               memcpy(&ptr[i], item, sizeof(QueryOperand));
+                               break;
+                       case QI_VALSTOP:
+                               ptr[i].type = QI_VALSTOP;
+                               break;
+                       case QI_OPR:
+                               memcpy(&ptr[i], item, sizeof(QueryOperator));
+                               break;
+                       default:
+                               elog(ERROR, "unknown QueryItem type %d", item->type);
+               }
+               i++;
         }
  
-       /* set user friendly-operand view */
+       /* Copy all the operand strings to TSQuery */
         memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
         pfree(state.op);
  
-       /* set left operand's position for every operator */
+       /* Set left operand pointers for every operator. */
         pos = 0;
         findoprnd(ptr, &pos);
  
         return query;
  }
  
+static void
+pushval_asis(void *opaque, TSQueryParserState state, char *strval, int lenval,
+                        int16 weight)
+{
+       pushValue(state, strval, lenval, weight);
+}
+
  /*
   * in without morphology
   */
@@ -431,7 +544,7 @@ tsqueryin(PG_FUNCTION_ARGS)
  
         pg_verifymbstr(in, strlen(in), false);
  
-       PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, InvalidOid, false));
+       PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, NULL, false));
  }
  
  /*
@@ -443,13 +556,14 @@ typedef struct
         char       *buf;
         char       *cur;
         char       *op;
-       int4            buflen;
+       int                     buflen;
  } INFIX;
  
-#define RESIZEBUF(inf,addsize) \
+/* Makes sure inf->buf is large enough for adding 'addsize' bytes */
+#define RESIZEBUF(inf, addsize) \
  while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
  { \
-       int4 len = (inf)->cur - (inf)->buf; \
+       int len = (inf)->cur - (inf)->buf; \
         (inf)->buflen *= 2; \
         (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
         (inf)->cur = (inf)->buf + len; \
@@ -462,12 +576,16 @@ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
  static void
  infix(INFIX * in, bool first)
  {
-       if (in->curpol->type == VAL)
+       /* since this function recurses, it could be driven to stack overflow. */
+       check_stack_depth();
+
+       if (in->curpol->type == QI_VAL)
         {
-               char       *op = in->op + in->curpol->distance;
+               QueryOperand *curpol = &in->curpol->operand;
+               char       *op = in->op + curpol->distance;
                 int                     clen;
  
-               RESIZEBUF(in, in->curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
+               RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
                 *(in->cur) = '\'';
                 in->cur++;
                 while (*op)
@@ -485,26 +603,26 @@ infix(INFIX * in, bool first)
                 }
                 *(in->cur) = '\'';
                 in->cur++;
-               if (in->curpol->weight)
+               if (curpol->weight)
                 {
                         *(in->cur) = ':';
                         in->cur++;
-                       if (in->curpol->weight & (1 << 3))
+                       if (curpol->weight & (1 << 3))
                         {
                                 *(in->cur) = 'A';
                                 in->cur++;
                         }
-                       if (in->curpol->weight & (1 << 2))
+                       if (curpol->weight & (1 << 2))
                         {
                                 *(in->cur) = 'B';
                                 in->cur++;
                         }
-                       if (in->curpol->weight & (1 << 1))
+                       if (curpol->weight & (1 << 1))
                         {
                                 *(in->cur) = 'C';
                                 in->cur++;
                         }
-                       if (in->curpol->weight & 1)
+                       if (curpol->weight & 1)
                         {
                                 *(in->cur) = 'D';
                                 in->cur++;
@@ -513,7 +631,7 @@ infix(INFIX * in, bool first)
                 *(in->cur) = '\0';
                 in->curpol++;
         }
-       else if (in->curpol->val == (int4) '!')
+       else if (in->curpol->operator.oper == OP_NOT)
         {
                 bool            isopr = false;
  
@@ -522,13 +640,15 @@ infix(INFIX * in, bool first)
                 in->cur++;
                 *(in->cur) = '\0';
                 in->curpol++;
-               if (in->curpol->type == OPR)
+
+               if (in->curpol->type == QI_OPR)
                 {
                         isopr = true;
                         RESIZEBUF(in, 2);
                         sprintf(in->cur, "( ");
                         in->cur = strchr(in->cur, '\0');
                 }
+
                 infix(in, isopr);
                 if (isopr)
                 {
@@ -539,11 +659,11 @@ infix(INFIX * in, bool first)
         }
         else
         {
-               int4            op = in->curpol->val;
+               int8            op = in->curpol->operator.oper;
                 INFIX           nrm;
  
                 in->curpol++;
-               if (op == (int4) '|' && !first)
+               if (op == OP_OR && !first)
                 {
                         RESIZEBUF(in, 2);
                         sprintf(in->cur, "( ");
@@ -564,11 +684,22 @@ infix(INFIX * in, bool first)
  
                 /* print operator & right operand */
                 RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
-               sprintf(in->cur, " %c %s", op, nrm.buf);
+               switch(op)
+               {
+                       case OP_OR:
+                               sprintf(in->cur, " | %s", nrm.buf);
+                               break;
+                       case OP_AND:
+                               sprintf(in->cur, " & %s", nrm.buf);
+                               break;
+                       default:
+                               /* OP_NOT is handled in above if-branch*/
+                               elog(ERROR, "unexpected operator type %d", op);
+               }
                 in->cur = strchr(in->cur, '\0');
                 pfree(nrm.buf);
  
-               if (op == (int4) '|' && !first)
+               if (op == OP_OR && !first)
                 {
                         RESIZEBUF(in, 2);
                         sprintf(in->cur, " )");
@@ -615,28 +746,33 @@ tsquerysend(PG_FUNCTION_ARGS)
         pq_sendint(&buf, query->size, sizeof(int32));
         for (i = 0; i < query->size; i++)
         {
-               int                     tmp;
-
                 pq_sendint(&buf, item->type, sizeof(item->type));
-               pq_sendint(&buf, item->weight, sizeof(item->weight));
-               pq_sendint(&buf, item->left, sizeof(item->left));
-               pq_sendint(&buf, item->val, sizeof(item->val));
-
-               /*
-                * We are sure that sizeof(WordEntry) == sizeof(int32), and about
-                * layout of QueryItem
-                */
-               tmp = *(int32 *) (((char *) item) + HDRSIZEQI);
-               pq_sendint(&buf, tmp, sizeof(tmp));
  
+               switch(item->type)
+               {
+                       case QI_VAL:
+                               pq_sendint(&buf, item->operand.weight, sizeof(item->operand.weight));
+                               pq_sendint(&buf, item->operand.valcrc, sizeof(item->operand.valcrc));
+                               pq_sendint(&buf, item->operand.length, sizeof(int16));
+                               /* istrue flag is just for temporary use in tsrank.c/Cover,
+                                * so we don't need to transfer that */
+                               break;
+                       case QI_OPR:
+                               pq_sendint(&buf, item->operator.oper, sizeof(item->operator.oper));
+                               if (item->operator.oper != OP_NOT)
+                                       pq_sendint(&buf, item->operator.left, sizeof(item->operator.left));
+                               break;
+                       default:
+                               elog(ERROR, "unknown tsquery node type %d", item->type);
+               }
                 item++;
         }
  
         item = GETQUERY(query);
         for (i = 0; i < query->size; i++)
         {
-               if (item->type == VAL)
-                       pq_sendbytes(&buf, GETOPERAND(query) + item->distance, item->length);
+               if (item->type == QI_VAL)
+                       pq_sendbytes(&buf, GETOPERAND(query) + item->operand.distance, item->operand.length);
                 item++;
         }
  
@@ -652,8 +788,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
         TSQuery         query;
         int                     i,
                                 size,
-                               tmp,
-                               len = HDRSIZETQ;
+                               len;
         QueryItem  *item;
         int                     datalen = 0;
         char       *ptr;
@@ -661,7 +796,8 @@ tsqueryrecv(PG_FUNCTION_ARGS)
         size = pq_getmsgint(buf, sizeof(uint32));
         if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem)))
                 elog(ERROR, "invalid size of tsquery");
-       len += sizeof(QueryItem) * size;
+
+       len = HDRSIZETQ + sizeof(QueryItem) * size;
  
         query = (TSQuery) palloc(len);
         query->size = size;
@@ -670,32 +806,67 @@ tsqueryrecv(PG_FUNCTION_ARGS)
         for (i = 0; i < size; i++)
         {
                 item->type = (int8) pq_getmsgint(buf, sizeof(int8));
-               item->weight = (int8) pq_getmsgint(buf, sizeof(int8));
-               item->left = (int16) pq_getmsgint(buf, sizeof(int16));
-               item->val = (int32) pq_getmsgint(buf, sizeof(int32));
-               tmp = pq_getmsgint(buf, sizeof(int32));
-               memcpy((((char *) item) + HDRSIZEQI), &tmp, sizeof(int32));
-
-               /*
-                * Sanity checks
-                */
-               if (item->type == VAL)
-               {
-                       datalen += item->length + 1;            /* \0 */
-               }
-               else if (item->type == OPR)
+
+               switch(item->type)
                 {
-                       if (item->val == '|' || item->val == '&')
-                       {
-                               if (item->left <= 0 || i + item->left >= size)
-                                       elog(ERROR, "invalid pointer to left operand");
-                       }
+                       case QI_VAL:
+                               item->operand.weight = (int8) pq_getmsgint(buf, sizeof(int8));
+                               item->operand.valcrc = (int32) pq_getmsgint(buf, sizeof(int32));
+                               item->operand.length = pq_getmsgint(buf, sizeof(int16));
+
+                               /*
+                                * Check that datalen doesn't grow too large. Without the
+                                * check, a malicious client could induce a buffer overflow
+                                * by sending a tsquery whose size exceeds 2GB. datalen
+                                * would overflow, we would allocate a too small buffer below,
+                                * and overflow the buffer. Because operand.length is a 20-bit
+                                * field, adding one such value to datalen must exceed
+                                * MaxAllocSize before wrapping over the 32-bit datalen field,
+                                * so this check will protect from it.
+                                */
+                               if (datalen > MAXSTRLEN)
+                                       elog(ERROR, "invalid tsquery; total operand length exceeded");
+
+                               /* We can calculate distance from datalen, no need to send it
+                                * through the wire. If we did, we would have to check that
+                                * it's valid anyway.
+                                */
+                               item->operand.distance = datalen;
+
+                               datalen += item->operand.length + 1;            /* \0 */
  
-                       if (i == size - 1)
-                               elog(ERROR, "invalid pointer to right operand");
+                               break;
+                       case QI_OPR:
+                               item->operator.oper = (int8) pq_getmsgint(buf, sizeof(int8));
+                               if (item->operator.oper != OP_NOT &&
+                                       item->operator.oper != OP_OR &&
+                                       item->operator.oper != OP_AND)
+                                       elog(ERROR, "unknown operator type %d", (int) item->operator.oper);
+                               if(item->operator.oper != OP_NOT)
+                               {
+                                       item->operator.left = (int16) pq_getmsgint(buf, sizeof(int16));
+                                       /*
+                                        * Sanity checks
+                                        */
+                                       if (item->operator.left <= 0 || i + item->operator.left >= size)
+                                               elog(ERROR, "invalid pointer to left operand");
+
+                                       /* XXX: Though there's no way to construct a TSQuery that's
+                                        * not in polish notation, we don't enforce that for
+                                        * queries received from client in binary mode. Is there
+                                        * anything that relies on it?
+                                        *
+                                        * XXX: The tree could be malformed in other ways too,
+                                        * a node could have two parents, for example.
+                                        */
+                               }
+
+                               if (i == size - 1)
+                                       elog(ERROR, "invalid pointer to right operand");
+                               break;
+                       default:
+                               elog(ERROR, "unknown tsquery node type %d", item->type);
                 }
-               else
-                       elog(ERROR, "unknown tsquery node type");
  
                 item++;
         }
@@ -706,13 +877,12 @@ tsqueryrecv(PG_FUNCTION_ARGS)
         ptr = GETOPERAND(query);
         for (i = 0; i < size; i++)
         {
-               if (item->type == VAL)
+               if (item->type == QI_VAL)
                 {
-                       item->distance = ptr - GETOPERAND(query);
                         memcpy(ptr,
-                                  pq_getmsgbytes(buf, item->length),
-                                  item->length);
-                       ptr += item->length;
+                                  pq_getmsgbytes(buf, item->operand.length),
+                                  item->operand.length);
+                       ptr += item->operand.length;
                         *ptr++ = '\0';
                 }
                 item++;
@@ -736,7 +906,7 @@ tsquerytree(PG_FUNCTION_ARGS)
         INFIX           nrm;
         text       *res;
         QueryItem  *q;
-       int4            len;
+       int                     len;
  
         if (query->size == 0)
         {
diff --git a/src/backend/utils/adt/tsquery_cleanup.c b/src/backend/utils/adt/tsquery_cleanup.c

index 7991a4ad198c2d838aef08f5e641360d531df848..22e6f7c8198918aac94ef73273fc9f90ef2bea77 100644 (file)
--- a/src/backend/utils/adt/tsquery_cleanup.c
+++ b/src/backend/utils/adt/tsquery_cleanup.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -35,20 +35,23 @@ maketree(QueryItem * in)
  
         node->valnode = in;
         node->right = node->left = NULL;
-       if (in->type == OPR)
+       if (in->type == QI_OPR)
         {
                 node->right = maketree(in + 1);
-               if (in->val != (int4) '!')
-                       node->left = maketree(in + in->left);
+               if (in->operator.oper != OP_NOT)
+                       node->left = maketree(in + in->operator.left);
         }
         return node;
  }
  
+/*
+ * Internal state for plaintree and plainnode
+ */
  typedef struct
  {
         QueryItem  *ptr;
-       int4            len;
-       int4            cur;
+       int             len; /* allocated size of ptr */
+       int             cur; /* number of elements in ptr */
  } PLAINTREE;
  
  static void
@@ -60,37 +63,37 @@ plainnode(PLAINTREE * state, NODE * node)
                 state->ptr = (QueryItem *) repalloc((void *) state->ptr, state->len * sizeof(QueryItem));
         }
         memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(QueryItem));
-       if (node->valnode->type == VAL)
+       if (node->valnode->type == QI_VAL)
                 state->cur++;
-       else if (node->valnode->val == (int4) '!')
+       else if (node->valnode->operator.oper == OP_NOT)
         {
-               state->ptr[state->cur].left = 1;
+               state->ptr[state->cur].operator.left = 1;
                 state->cur++;
                 plainnode(state, node->right);
         }
         else
         {
-               int4            cur = state->cur;
+               int     cur = state->cur;
  
                 state->cur++;
                 plainnode(state, node->right);
-               state->ptr[cur].left = state->cur - cur;
+               state->ptr[cur].operator.left = state->cur - cur;
                 plainnode(state, node->left);
         }
         pfree(node);
  }
  
  /*
- * make plain view of tree from 'normal' view of tree
+ * make plain view of tree from a NODE-tree representation
   */
  static QueryItem *
-plaintree(NODE * root, int4 *len)
+plaintree(NODE * root, int *len)
  {
         PLAINTREE       pl;
  
         pl.cur = 0;
         pl.len = 16;
-       if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
+       if (root && (root->valnode->type == QI_VAL || root->valnode->type == QI_OPR))
         {
                 pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem));
                 plainnode(&pl, root);
@@ -122,17 +125,17 @@ freetree(NODE * node)
  static NODE *
  clean_NOT_intree(NODE * node)
  {
-       if (node->valnode->type == VAL)
+       if (node->valnode->type == QI_VAL)
                 return node;
  
-       if (node->valnode->val == (int4) '!')
+       if (node->valnode->operator.oper == OP_NOT)
         {
                 freetree(node);
                 return NULL;
         }
  
         /* operator & or | */
-       if (node->valnode->val == (int4) '|')
+       if (node->valnode->operator.oper == OP_OR)
         {
                 if ((node->left = clean_NOT_intree(node->left)) == NULL ||
                         (node->right = clean_NOT_intree(node->right)) == NULL)
@@ -144,6 +147,8 @@ clean_NOT_intree(NODE * node)
         else
         {
                 NODE       *res = node;
+               
+               Assert(node->valnode->operator.oper == OP_AND);
  
                 node->left = clean_NOT_intree(node->left);
                 node->right = clean_NOT_intree(node->right);
@@ -168,7 +173,7 @@ clean_NOT_intree(NODE * node)
  }
  
  QueryItem *
-clean_NOT(QueryItem * ptr, int4 *len)
+clean_NOT(QueryItem * ptr, int *len)
  {
         NODE       *root = maketree(ptr);
  
@@ -180,10 +185,13 @@ clean_NOT(QueryItem * ptr, int4 *len)
  #undef V_UNKNOWN
  #endif
  
-#define V_UNKNOWN      0
-#define V_TRUE         1
-#define V_FALSE                2
-#define V_STOP         3
+/*
+ * output values for result output parameter of clean_fakeval_intree
+ */
+#define V_UNKNOWN      0 /* the expression can't be evaluated statically */
+#define V_TRUE         1 /* the expression is always true (not implemented) */
+#define V_FALSE                2 /* the expression is always false (not implemented) */
+#define V_STOP         3 /* the expression is a stop word */
  
  /*
   * Clean query tree from values which is always in
@@ -195,17 +203,19 @@ clean_fakeval_intree(NODE * node, char *result)
         char            lresult = V_UNKNOWN,
                                 rresult = V_UNKNOWN;
  
-       if (node->valnode->type == VAL)
+       if (node->valnode->type == QI_VAL)
                 return node;
-       else if (node->valnode->type == VALSTOP)
+       else 
+       if (node->valnode->type == QI_VALSTOP)
         {
                 pfree(node);
                 *result = V_STOP;
                 return NULL;
         }
  
+       Assert(node->valnode->type == QI_OPR);
  
-       if (node->valnode->val == (int4) '!')
+       if (node->valnode->operator.oper == OP_NOT)
         {
                 node->right = clean_fakeval_intree(node->right, &rresult);
                 if (!node->right)
@@ -221,6 +231,7 @@ clean_fakeval_intree(NODE * node, char *result)
  
                 node->left = clean_fakeval_intree(node->left, &lresult);
                 node->right = clean_fakeval_intree(node->right, &rresult);
+
                 if (lresult == V_STOP && rresult == V_STOP)
                 {
                         freetree(node);
@@ -243,7 +254,7 @@ clean_fakeval_intree(NODE * node, char *result)
  }
  
  QueryItem *
-clean_fakeval(QueryItem * ptr, int4 *len)
+clean_fakeval(QueryItem * ptr, int *len)
  {
         NODE       *root = maketree(ptr);
         char            result = V_UNKNOWN;
diff --git a/src/backend/utils/adt/tsquery_op.c b/src/backend/utils/adt/tsquery_op.c

index fd97c2796df771580739328c910f3b53cb2342a1..cbf06f7adeb8cc8419a907c9bb642556fa2e4b8a 100644 (file)
--- a/src/backend/utils/adt/tsquery_op.c
+++ b/src/backend/utils/adt/tsquery_op.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -30,14 +30,15 @@ tsquery_numnode(PG_FUNCTION_ARGS)
  }
  
  static QTNode *
-join_tsqueries(TSQuery a, TSQuery b)
+join_tsqueries(TSQuery a, TSQuery b, int8 operator)
  {
         QTNode     *res = (QTNode *) palloc0(sizeof(QTNode));
  
         res->flags |= QTN_NEEDFREE;
  
         res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
-       res->valnode->type = OPR;
+       res->valnode->type = QI_OPR;
+       res->valnode->operator.oper = operator;
  
         res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
         res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b));
@@ -66,9 +67,7 @@ tsquery_and(PG_FUNCTION_ARGS)
                 PG_RETURN_POINTER(a);
         }
  
-       res = join_tsqueries(a, b);
-
-       res->valnode->val = '&';
+       res = join_tsqueries(a, b, OP_AND);
  
         query = QTN2QT(res);
  
@@ -98,9 +97,7 @@ tsquery_or(PG_FUNCTION_ARGS)
                 PG_RETURN_POINTER(a);
         }
  
-       res = join_tsqueries(a, b);
-
-       res->valnode->val = '|';
+       res = join_tsqueries(a, b, OP_OR);
  
         query = QTN2QT(res);
  
@@ -126,8 +123,8 @@ tsquery_not(PG_FUNCTION_ARGS)
         res->flags |= QTN_NEEDFREE;
  
         res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
-       res->valnode->type = OPR;
-       res->valnode->val = '!';
+       res->valnode->type = QI_OPR;
+       res->valnode->operator.oper = OP_NOT;
  
         res->child = (QTNode **) palloc0(sizeof(QTNode *));
         res->child[0] = QT2QTN(GETQUERY(a), GETOPERAND(a));
@@ -209,8 +206,8 @@ makeTSQuerySign(TSQuery a)
  
         for (i = 0; i < a->size; i++)
         {
-               if (ptr->type == VAL)
-                       sign |= ((TSQuerySign) 1) << (ptr->val % TSQS_SIGLEN);
+               if (ptr->type == QI_VAL)
+                       sign |= ((TSQuerySign) 1) << (ptr->operand.valcrc % TSQS_SIGLEN);
                 ptr++;
         }
  
@@ -253,10 +250,10 @@ tsq_mcontains(PG_FUNCTION_ARGS)
         for (i = 0; i < ex->size; i++)
         {
                 iq = GETQUERY(query);
-               if (ie[i].type != VAL)
+               if (ie[i].type != QI_VAL)
                         continue;
                 for (j = 0; j < query->size; j++)
-                       if (iq[j].type == VAL && ie[i].val == iq[j].val)
+                       if (iq[j].type == QI_VAL && ie[i].operand.valcrc == iq[j].operand.valcrc)
                         {
                                 j = query->size + 1;
                                 break;
diff --git a/src/backend/utils/adt/tsquery_rewrite.c b/src/backend/utils/adt/tsquery_rewrite.c

index f0d22c644ae702b59a0df740631631aa40307e61..db2fe6c53ef91681bc9d39b98c1249d80ef3e103 100644 (file)
--- a/src/backend/utils/adt/tsquery_rewrite.c
+++ b/src/backend/utils/adt/tsquery_rewrite.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -34,18 +34,26 @@ addone(int *counters, int last, int total)
         return 1;
  }
  
+/*
+ * If node is equal to ex, replace it with subs. Replacement is actually done
+ * by returning either node or a copy of subs.
+ */
  static QTNode *
  findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
  {
  
-       if ((node->sign & ex->sign) != ex->sign || node->valnode->type != ex->valnode->type || node->valnode->val != ex->valnode->val)
+       if ((node->sign & ex->sign) != ex->sign || 
+               node->valnode->type != ex->valnode->type)
                 return node;
  
         if (node->flags & QTN_NOCHANGE)
                 return node;
-
-       if (node->valnode->type == OPR)
+       
+       if (node->valnode->type == QI_OPR)
         {
+               if (node->valnode->operator.oper != ex->valnode->operator.oper)
+                       return node;
+
                 if (node->nchild == ex->nchild)
                 {
                         if (QTNEq(node, ex))
@@ -63,6 +71,12 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
                 }
                 else if (node->nchild > ex->nchild)
                 {
+                       /*
+                        * AND and NOT are commutative, so we check if a subset of the
+                        * children match. For example, if tnode is A | B | C, and 
+                        * ex is B | C, we have a match after we convert tnode to
+                        * A | (B | C).
+                        */
                         int                *counters = (int *) palloc(sizeof(int) * node->nchild);
                         int                     i;
                         QTNode     *tnode = (QTNode *) palloc(sizeof(QTNode));
@@ -131,19 +145,26 @@ findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
                         pfree(counters);
                 }
         }
-       else if (QTNEq(node, ex))
+       else 
         {
-               QTNFree(node);
-               if (subs)
-               {
-                       node = QTNCopy(subs);
-                       node->flags |= QTN_NOCHANGE;
-               }
-               else
+               Assert(node->valnode->type == QI_VAL);
+
+               if (node->valnode->operand.valcrc != ex->valnode->operand.valcrc)
+                       return node;
+               else if (QTNEq(node, ex))
                 {
-                       node = NULL;
+                       QTNFree(node);
+                       if (subs)
+                       {
+                               node = QTNCopy(subs);
+                               node->flags |= QTN_NOCHANGE;
+                       }
+                       else
+                       {
+                               node = NULL;
+                       }
+                       *isfind = true;
                 }
-               *isfind = true;
         }
  
         return node;
@@ -154,7 +175,7 @@ dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
  {
         root = findeq(root, ex, subs, isfind);
  
-       if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == OPR)
+       if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == QI_OPR)
         {
                 int                     i;
  
@@ -172,7 +193,7 @@ dropvoidsubtree(QTNode * root)
         if (!root)
                 return NULL;
  
-       if (root->valnode->type == OPR)
+       if (root->valnode->type == QI_OPR)
         {
                 int                     i,
                                         j = 0;
@@ -188,7 +209,7 @@ dropvoidsubtree(QTNode * root)
  
                 root->nchild = j;
  
-               if (root->valnode->val == (int4) '!' && root->nchild == 0)
+               if (root->valnode->operator.oper == OP_NOT && root->nchild == 0)
                 {
                         QTNFree(root);
                         root = NULL;
@@ -256,9 +277,9 @@ ts_rewrite_accum(PG_FUNCTION_ARGS)
                 elog(ERROR, "array must be one-dimensional, not %d dimensions",
                          ARR_NDIM(qa));
         if (ArrayGetNItems(ARR_NDIM(qa), ARR_DIMS(qa)) != 3)
-               elog(ERROR, "array should have only three elements");
+               elog(ERROR, "array must have three elements");
         if (ARR_ELEMTYPE(qa) != TSQUERYOID)
-               elog(ERROR, "array should contain tsquery type");
+               elog(ERROR, "array must contain tsquery elements");
  
         deconstruct_array(qa, TSQUERYOID, -1, false, 'i', &elemsp, NULL, &nelemsp);
  
@@ -499,6 +520,7 @@ tsquery_rewrite_query(PG_FUNCTION_ARGS)
                 subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst));
  
         tree = findsubquery(tree, qex, subs, NULL);
+
         QTNFree(qex);
         QTNFree(subs);
  
diff --git a/src/backend/utils/adt/tsquery_util.c b/src/backend/utils/adt/tsquery_util.c

index ae8cc318da93b340c16fa5ec441ade75b5e44c9d..e378661488bd8604958c9802b18b4c51333ad6f5 100644 (file)
--- a/src/backend/utils/adt/tsquery_util.c
+++ b/src/backend/utils/adt/tsquery_util.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -17,7 +17,6 @@
  #include "tsearch/ts_type.h"
  #include "tsearch/ts_utils.h"
  
-
  QTNode *
  QT2QTN(QueryItem * in, char *operand)
  {
@@ -25,24 +24,24 @@ QT2QTN(QueryItem * in, char *operand)
  
         node->valnode = in;
  
-       if (in->type == OPR)
+       if (in->type == QI_OPR)
         {
                 node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
                 node->child[0] = QT2QTN(in + 1, operand);
                 node->sign = node->child[0]->sign;
-               if (in->val == (int4) '!')
+               if (in->operator.oper == OP_NOT)
                         node->nchild = 1;
                 else
                 {
                         node->nchild = 2;
-                       node->child[1] = QT2QTN(in + in->left, operand);
+                       node->child[1] = QT2QTN(in + in->operator.left, operand);
                         node->sign |= node->child[1]->sign;
                 }
         }
         else if (operand)
         {
-               node->word = operand + in->distance;
-               node->sign = 1 << (in->val % 32);
+               node->word = operand + in->operand.distance;
+               node->sign = 1 << (in->operand.valcrc % 32);
         }
  
         return node;
@@ -54,14 +53,14 @@ QTNFree(QTNode * in)
         if (!in)
                 return;
  
-       if (in->valnode->type == VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
+       if (in->valnode->type == QI_VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
                 pfree(in->word);
  
         if (in->child)
         {
                 if (in->valnode)
                 {
-                       if (in->valnode->type == OPR && in->nchild > 0)
+                       if (in->valnode->type == QI_OPR && in->nchild > 0)
                         {
                                 int                     i;
  
@@ -82,30 +81,45 @@ QTNodeCompare(QTNode * an, QTNode * bn)
  {
         if (an->valnode->type != bn->valnode->type)
                 return (an->valnode->type > bn->valnode->type) ? -1 : 1;
-       else if (an->valnode->val != bn->valnode->val)
-               return (an->valnode->val > bn->valnode->val) ? -1 : 1;
-       else if (an->valnode->type == VAL)
-       {
-               if (an->valnode->length == bn->valnode->length)
-                       return strncmp(an->word, bn->word, an->valnode->length);
-               else
-                       return (an->valnode->length > bn->valnode->length) ? -1 : 1;
-       }
-       else if (an->nchild != bn->nchild)
+       
+       if (an->valnode->type == QI_OPR)
         {
-               return (an->nchild > bn->nchild) ? -1 : 1;
+               QueryOperator *ao = &an->valnode->operator;
+               QueryOperator *bo = &bn->valnode->operator;
+
+               if(ao->oper != bo->oper)
+                       return (ao->oper > bo->oper) ? -1 : 1;
+
+               if (an->nchild != bn->nchild)
+                       return (an->nchild > bn->nchild) ? -1 : 1;
+
+               {
+                       int                     i,
+                                               res;
+
+                       for (i = 0; i < an->nchild; i++)
+                               if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
+                                       return res;
+               }
+               return 0;
         }
         else
         {
-               int                     i,
-                                       res;
+               QueryOperand *ao = &an->valnode->operand;
+               QueryOperand *bo = &bn->valnode->operand;
  
-               for (i = 0; i < an->nchild; i++)
-                       if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
-                               return res;
-       }
+               Assert(an->valnode->type == QI_VAL);
+
+               if (ao->valcrc != bo->valcrc)
+               {
+                       return (ao->valcrc > bo->valcrc) ? -1 : 1;
+               }
  
-       return 0;
+               if (ao->length == bo->length)
+                       return strncmp(an->word, bn->word, ao->length);
+               else
+                       return (ao->length > bo->length) ? -1 : 1;
+       }
  }
  
  static int
@@ -119,7 +133,7 @@ QTNSort(QTNode * in)
  {
         int                     i;
  
-       if (in->valnode->type != OPR)
+       if (in->valnode->type != QI_OPR)
                 return;
  
         for (i = 0; i < in->nchild; i++)
@@ -139,12 +153,19 @@ QTNEq(QTNode * a, QTNode * b)
         return (QTNodeCompare(a, b) == 0) ? true : false;
  }
  
+/*
+ * Remove unnecessary intermediate nodes. For example:
+ *
+ *  OR          OR
+ * a  OR    -> a b c
+ *   b  c      
+ */
  void
  QTNTernary(QTNode * in)
  {
         int                     i;
  
-       if (in->valnode->type != OPR)
+       if (in->valnode->type != QI_OPR)
                 return;
  
         for (i = 0; i < in->nchild; i++)
@@ -152,9 +173,10 @@ QTNTernary(QTNode * in)
  
         for (i = 0; i < in->nchild; i++)
         {
-               if (in->valnode->type == in->child[i]->valnode->type && in->valnode->val == in->child[i]->valnode->val)
+               QTNode     *cc = in->child[i];
+
+               if (cc->valnode->type == QI_OPR && in->valnode->operator.oper == cc->valnode->operator.oper)
                 {
-                       QTNode     *cc = in->child[i];
                         int                     oldnchild = in->nchild;
  
                         in->nchild += cc->nchild - 1;
@@ -167,17 +189,23 @@ QTNTernary(QTNode * in)
                         memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *));
                         i += cc->nchild - 1;
  
+                       if(cc->flags & QTN_NEEDFREE)
+                               pfree(cc->valnode);
                         pfree(cc);
                 }
         }
  }
  
+/*
+ * Convert a tree to binary tree by inserting intermediate nodes.
+ * (Opposite of QTNTernary)
+ */
  void
  QTNBinary(QTNode * in)
  {
         int                     i;
  
-       if (in->valnode->type != OPR)
+       if (in->valnode->type != QI_OPR)
                 return;
  
         for (i = 0; i < in->nchild; i++)
@@ -201,7 +229,7 @@ QTNBinary(QTNode * in)
                 nn->sign = nn->child[0]->sign | nn->child[1]->sign;
  
                 nn->valnode->type = in->valnode->type;
-               nn->valnode->val = in->valnode->val;
+               nn->valnode->operator.oper = in->valnode->operator.oper;
  
                 in->child[0] = nn;
                 in->child[1] = in->child[in->nchild - 1];
@@ -209,11 +237,15 @@ QTNBinary(QTNode * in)
         }
  }
  
+/*
+ * Count the total length of operand string in tree, including '\0'-
+ * terminators.
+ */
  static void
-cntsize(QTNode * in, int4 *sumlen, int4 *nnode)
+cntsize(QTNode * in, int *sumlen, int *nnode)
  {
         *nnode += 1;
-       if (in->valnode->type == OPR)
+       if (in->valnode->type == QI_OPR)
         {
                 int                     i;
  
@@ -222,7 +254,7 @@ cntsize(QTNode * in, int4 *sumlen, int4 *nnode)
         }
         else
         {
-               *sumlen += in->valnode->length + 1;
+               *sumlen += in->valnode->operand.length + 1;
         }
  }
  
@@ -234,22 +266,26 @@ typedef struct
  } QTN2QTState;
  
  static void
-fillQT(QTN2QTState * state, QTNode * in)
+fillQT(QTN2QTState *state, QTNode *in)
  {
-       *(state->curitem) = *(in->valnode);
-
-       if (in->valnode->type == VAL)
+       if (in->valnode->type == QI_VAL)
         {
-               memcpy(state->curoperand, in->word, in->valnode->length);
-               state->curitem->distance = state->curoperand - state->operand;
-               state->curoperand[in->valnode->length] = '\0';
-               state->curoperand += in->valnode->length + 1;
+               memcpy(state->curitem, in->valnode, sizeof(QueryOperand));
+
+               memcpy(state->curoperand, in->word, in->valnode->operand.length);
+               state->curitem->operand.distance = state->curoperand - state->operand;
+               state->curoperand[in->valnode->operand.length] = '\0';
+               state->curoperand += in->valnode->operand.length + 1;
                 state->curitem++;
         }
         else
         {
                 QueryItem  *curitem = state->curitem;
  
+               Assert(in->valnode->type == QI_OPR);
+
+               memcpy(state->curitem, in->valnode, sizeof(QueryOperator));
+
                 Assert(in->nchild <= 2);
                 state->curitem++;
  
@@ -257,7 +293,7 @@ fillQT(QTN2QTState * state, QTNode * in)
  
                 if (in->nchild == 2)
                 {
-                       curitem->left = state->curitem - curitem;
+                       curitem->operator.left = state->curitem - curitem;
                         fillQT(state, in->child[1]);
                 }
         }
@@ -296,11 +332,11 @@ QTNCopy(QTNode *in)
         *(out->valnode) = *(in->valnode);
         out->flags |= QTN_NEEDFREE;
  
-       if (in->valnode->type == VAL)
+       if (in->valnode->type == QI_VAL)
         {
-               out->word = palloc(in->valnode->length + 1);
-               memcpy(out->word, in->word, in->valnode->length);
-               out->word[in->valnode->length] = '\0';
+               out->word = palloc(in->valnode->operand.length + 1);
+               memcpy(out->word, in->word, in->valnode->operand.length);
+               out->word[in->valnode->operand.length] = '\0';
                 out->flags |= QTN_WORDFREE;
         }
         else
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c

index 8b2ab884c8cb6082787e217db6fe587b2d86c05e..d48e9b4a470be27d2843789fe27487c978e4e796 100644 (file)
--- a/src/backend/utils/adt/tsrank.c
+++ b/src/backend/utils/adt/tsrank.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.2 2007/09/07 15:09:56 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -68,7 +68,7 @@ cnt_length(TSVector t)
  }
  
  static int4
-WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item)
+WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item)
  {
         if (ptr->len == item->length)
                 return strncmp(
@@ -80,7 +80,7 @@ WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item)
  }
  
  static WordEntry *
-find_wordentry(TSVector t, TSQuery q, QueryItem * item)
+find_wordentry(TSVector t, TSQuery q, QueryOperand *item)
  {
         WordEntry  *StopLow = ARRPTR(t);
         WordEntry  *StopHigh = (WordEntry *) STRPTR(t);
@@ -105,33 +105,48 @@ find_wordentry(TSVector t, TSQuery q, QueryItem * item)
  }
  
  
+/*
+ * sort QueryOperands by (length, word)
+ */
  static int
-compareQueryItem(const void *a, const void *b, void *arg)
+compareQueryOperand(const void *a, const void *b, void *arg)
  {
         char       *operand = (char *) arg;
+       QueryOperand *qa = (*(QueryOperand **) a);
+       QueryOperand *qb = (*(QueryOperand **) b);
  
-       if ((*(QueryItem **) a)->length == (*(QueryItem **) b)->length)
-               return strncmp(operand + (*(QueryItem **) a)->distance,
-                                          operand + (*(QueryItem **) b)->distance,
-                                          (*(QueryItem **) b)->length);
+       if (qa->length == qb->length)
+               return strncmp(operand + qa->distance,
+                                          operand + qb->distance,
+                                          qb->length);
  
-       return ((*(QueryItem **) a)->length > (*(QueryItem **) b)->length) ? 1 : -1;
+       return (qa->length > qb->length) ? 1 : -1;
  }
  
-static QueryItem **
-SortAndUniqItems(char *operand, QueryItem * item, int *size)
+/*
+ * Returns a sorted, de-duplicated array of QueryOperands in a query.
+ * The returned QueryOperands are pointers to the original QueryOperands
+ * in the query.
+ *
+ * Length of the returned array is stored in *size
+ */
+static QueryOperand **
+SortAndUniqItems(TSQuery q, int *size)
  {
-       QueryItem **res,
+       char *operand = GETOPERAND(q);
+       QueryItem * item = GETQUERY(q);
+       QueryOperand **res,
                           **ptr,
                           **prevptr;
  
-       ptr = res = (QueryItem **) palloc(sizeof(QueryItem *) * *size);
+       ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
  
+       /* Collect all operands from the tree to res */
         while ((*size)--)
         {
-               if (item->type == VAL)
+               if (item->type == QI_VAL)
                 {
-                       *ptr = item;
+                       *ptr = (QueryOperand *) item;
                         ptr++;
                 }
                 item++;
@@ -141,14 +156,15 @@ SortAndUniqItems(char *operand, QueryItem * item, int *size)
         if (*size < 2)
                 return res;
  
-       qsort_arg(res, *size, sizeof(QueryItem **), compareQueryItem, (void *) operand);
+       qsort_arg(res, *size, sizeof(QueryOperand **), compareQueryOperand, (void *) operand);
  
         ptr = res + 1;
         prevptr = res;
  
+       /* remove duplicates */
         while (ptr - res < *size)
         {
-               if (compareQueryItem((void *) ptr, (void *) prevptr, (void *) operand) != 0)
+               if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0)
                 {
                         prevptr++;
                         *prevptr = *ptr;
@@ -180,10 +196,10 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
                                 lenct,
                                 dist;
         float           res = -1.0;
-       QueryItem **item;
+       QueryOperand **item;
         int                     size = q->size;
  
-       item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
+       item = SortAndUniqItems(q, &size);
         if (size < 2)
         {
                 pfree(item);
@@ -246,11 +262,11 @@ calc_rank_or(float *w, TSVector t, TSQuery q)
                                 j,
                                 i;
         float           res = 0.0;
-       QueryItem **item;
+       QueryOperand **item;
         int                     size = q->size;
  
         *(uint16 *) POSNULL = lengthof(POSNULL) - 1;
-       item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
+       item = SortAndUniqItems(q, &size);
  
         for (i = 0; i < size; i++)
         {
@@ -310,7 +326,8 @@ calc_rank(float *w, TSVector t, TSQuery q, int4 method)
         if (!t->size || !q->size)
                 return 0.0;
  
-       res = (item->type != VAL && item->val == (int4) '&') ?
+       /* XXX: What about NOT? */
+       res = (item->type == QI_OPR && item->operator.oper == OP_AND) ?
                 calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
  
         if (res < 0)
@@ -453,7 +470,7 @@ compareDocR(const void *a, const void *b)
  }
  
  static bool
-checkcondition_QueryItem(void *checkval, QueryItem * val)
+checkcondition_QueryOperand(void *checkval, QueryOperand *val)
  {
         return (bool) (val->istrue);
  }
@@ -467,8 +484,8 @@ reset_istrue_flag(TSQuery query)
         /* reset istrue flag */
         for (i = 0; i < query->size; i++)
         {
-               if (item->type == VAL)
-                       item->istrue = 0;
+               if (item->type == QI_VAL)
+                       item->operand.istrue = 0;
                 item++;
         }
  }
@@ -484,7 +501,7 @@ typedef struct
  
  
  static bool
-Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
+Cover(DocRepresentation *doc, int len, TSQuery query, Extention *ext)
  {
         DocRepresentation *ptr;
         int                     lastpos = ext->pos;
@@ -501,8 +518,11 @@ Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
         while (ptr - doc < len)
         {
                 for (i = 0; i < ptr->nitem; i++)
-                       ptr->item[i]->istrue = 1;
-               if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryItem))
+               {
+                       if(ptr->item[i]->type == QI_VAL)
+                               ptr->item[i]->operand.istrue = 1;
+               }
+               if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryOperand))
                 {
                         if (ptr->pos > ext->q)
                         {
@@ -527,8 +547,9 @@ Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
         while (ptr >= doc + ext->pos)
         {
                 for (i = 0; i < ptr->nitem; i++)
-                       ptr->item[i]->istrue = 1;
-               if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryItem))
+                       if(ptr->item[i]->type  == QI_VAL) /* XXX */
+                               ptr->item[i]->operand.istrue = 1;
+               if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryOperand))
                 {
                         if (ptr->pos < ext->p)
                         {
@@ -575,10 +596,17 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
  
         for (i = 0; i < query->size; i++)
         {
-               if (item[i].type != VAL || item[i].istrue)
+               QueryOperand *curoperand;
+
+               if (item[i].type != QI_VAL)
+                       continue;
+               
+               curoperand = &item[i].operand;
+               
+               if(item[i].operand.istrue)
                         continue;
  
-               entry = find_wordentry(txt, query, &(item[i]));
+               entry = find_wordentry(txt, query, curoperand);
                 if (!entry)
                         continue;
  
@@ -603,8 +631,6 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
                 {
                         if (j == 0)
                         {
-                               QueryItem  *kptr,
-                                                  *iptr = item + i;
                                 int                     k;
  
                                 doc[cur].needfree = false;
@@ -613,14 +639,17 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
  
                                 for (k = 0; k < query->size; k++)
                                 {
-                                       kptr = item + k;
+                                       QueryOperand *kptr = &item[k].operand;
+                                       QueryOperand *iptr = &item[i].operand;
+
                                         if (k == i ||
-                                               (item[k].type == VAL &&
-                                                compareQueryItem(&kptr, &iptr, operand) == 0))
+                                               (item[k].type == QI_VAL &&
+                                                compareQueryOperand(&kptr, &iptr, operand) == 0))
                                         {
+                                               /* if k == i, we've already checked above that it's type == Q_VAL */
                                                 doc[cur].item[doc[cur].nitem] = item + k;
                                                 doc[cur].nitem++;
-                                               kptr->istrue = 1;
+                                               item[k].operand.istrue = 1;
                                         }
                                 }
                         }
@@ -640,8 +669,7 @@ get_docrep(TSVector txt, TSQuery query, int *doclen)
  
         if (cur > 0)
         {
-               if (cur > 1)
-                       qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+               qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
                 return doc;
         }
  
@@ -746,7 +774,7 @@ ts_rankcd_wttf(PG_FUNCTION_ARGS)
  {
         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
         TSVector        txt = PG_GETARG_TSVECTOR(1);
-       TSQuery         query = PG_GETARG_TSQUERY_COPY(2);
+       TSQuery         query = PG_GETARG_TSQUERY_COPY(2); /* copy because we modify the istrue-flag */
         int                     method = PG_GETARG_INT32(3);
         float           res;
  
@@ -763,7 +791,7 @@ ts_rankcd_wtt(PG_FUNCTION_ARGS)
  {
         ArrayType  *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
         TSVector        txt = PG_GETARG_TSVECTOR(1);
-       TSQuery         query = PG_GETARG_TSQUERY_COPY(2);
+       TSQuery         query = PG_GETARG_TSQUERY_COPY(2); /* copy because we modify the istrue-flag */
         float           res;
  
         res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
@@ -778,7 +806,7 @@ Datum
  ts_rankcd_ttf(PG_FUNCTION_ARGS)
  {
         TSVector        txt = PG_GETARG_TSVECTOR(0);
-       TSQuery         query = PG_GETARG_TSQUERY_COPY(1);
+       TSQuery         query = PG_GETARG_TSQUERY_COPY(1); /* copy because we modify the istrue-flag */
         int                     method = PG_GETARG_INT32(2);
         float           res;
  
@@ -793,7 +821,7 @@ Datum
  ts_rankcd_tt(PG_FUNCTION_ARGS)
  {
         TSVector        txt = PG_GETARG_TSVECTOR(0);
-       TSQuery         query = PG_GETARG_TSQUERY_COPY(1);
+       TSQuery         query = PG_GETARG_TSQUERY_COPY(1); /* copy because we modify the istrue-flag */
         float           res;
  
         res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c

index 8ab024650f72a6887bbe4a17453b4decd9115a16..2866e028da02b778c7592d909524b90413f2d612 100644 (file)
--- a/src/backend/utils/adt/tsvector.c
+++ b/src/backend/utils/adt/tsvector.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.2 2007/08/21 01:45:33 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -20,22 +20,37 @@
  #include "tsearch/ts_utils.h"
  #include "utils/memutils.h"
  
+typedef struct
+{
+       WordEntry       entry;                  /* should be first ! */
+       WordEntryPos *pos;
+       int                     poslen;                 /* number of elements in pos */
+} WordEntryIN;
  
  static int
  comparePos(const void *a, const void *b)
  {
-       if (WEP_GETPOS(*(WordEntryPos *) a) == WEP_GETPOS(*(WordEntryPos *) b))
+       int apos = WEP_GETPOS(*(WordEntryPos *) a);
+       int bpos = WEP_GETPOS(*(WordEntryPos *) b);
+
+       if (apos == bpos)
                 return 0;
-       return (WEP_GETPOS(*(WordEntryPos *) a) > WEP_GETPOS(*(WordEntryPos *) b)) ? 1 : -1;
+       return (apos > bpos) ? 1 : -1;
  }
  
+/*
+ * Removes duplicate pos entries. If there's two entries with same pos
+ * but different weight, the higher weight is retained.
+ *
+ * Returns new length.
+ */
  static int
-uniquePos(WordEntryPos * a, int4 l)
+uniquePos(WordEntryPos * a, int l)
  {
         WordEntryPos *ptr,
                            *res;
  
-       if (l == 1)
+       if (l <= 1)
                 return l;
  
         res = a;
@@ -75,21 +90,23 @@ compareentry(const void *a, const void *b, void *arg)
  }
  
  static int
-uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
+uniqueentry(WordEntryIN * a, int l, char *buf, int *outbuflen)
  {
         WordEntryIN *ptr,
                            *res;
  
-       res = a;
+       Assert(l >= 1);
+
         if (l == 1)
         {
                 if (a->entry.haspos)
                 {
-                       *(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos));
-                       *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos);
+                       a->poslen = uniquePos(a->pos, a->poslen);
+                       *outbuflen = SHORTALIGN(a->entry.len) + (a->poslen + 1) * sizeof(WordEntryPos);
                 }
                 return l;
         }
+       res = a;
  
         ptr = a + 1;
         qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry, (void *) buf);
@@ -101,8 +118,8 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
                 {
                         if (res->entry.haspos)
                         {
-                               *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
-                               *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
+                               res->poslen = uniquePos(res->pos, res->poslen);
+                               *outbuflen += res->poslen * sizeof(WordEntryPos);
                         }
                         *outbuflen += SHORTALIGN(res->entry.len);
                         res++;
@@ -112,12 +129,14 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
                 {
                         if (res->entry.haspos)
                         {
-                               int4            len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos);
+                               int     newlen = ptr->poslen + res->poslen;
+
+                               /* Append res to pos */
  
-                               res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos));
-                               memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]),
-                                          &(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos));
-                               *(uint16 *) (res->pos) += *(uint16 *) (ptr->pos);
+                               res->pos = (WordEntryPos *) repalloc(res->pos, newlen * sizeof(WordEntryPos));
+                               memcpy(&res->pos[res->poslen],
+                                          ptr->pos, ptr->poslen * sizeof(WordEntryPos));
+                               res->poslen = newlen;
                                 pfree(ptr->pos);
                         }
                         else
@@ -130,8 +149,8 @@ uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
         }
         if (res->entry.haspos)
         {
-               *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
-               *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
+               res->poslen = uniquePos(res->pos, res->poslen);
+               *outbuflen += res->poslen * sizeof(WordEntryPos);
         }
         *outbuflen += SHORTALIGN(res->entry.len);
  
@@ -144,248 +163,6 @@ WordEntryCMP(WordEntry * a, WordEntry * b, char *buf)
         return compareentry(a, b, buf);
  }
  
-#define WAITWORD               1
-#define WAITENDWORD            2
-#define WAITNEXTCHAR   3
-#define WAITENDCMPLX   4
-#define WAITPOSINFO            5
-#define INPOSINFO              6
-#define WAITPOSDELIM   7
-#define WAITCHARCMPLX  8
-
-#define RESIZEPRSBUF \
-do { \
-       if ( state->curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
-       { \
-               int4 clen = state->curpos - state->word; \
-               state->len *= 2; \
-               state->word = (char*)repalloc( (void*)state->word, state->len ); \
-               state->curpos = state->word + clen; \
-       } \
-} while (0)
-
-bool
-gettoken_tsvector(TSVectorParseState *state)
-{
-       int4            oldstate = 0;
-
-       state->curpos = state->word;
-       state->state = WAITWORD;
-       state->alen = 0;
-
-       while (1)
-       {
-               if (state->state == WAITWORD)
-               {
-                       if (*(state->prsbuf) == '\0')
-                               return false;
-                       else if (t_iseq(state->prsbuf, '\''))
-                               state->state = WAITENDCMPLX;
-                       else if (t_iseq(state->prsbuf, '\\'))
-                       {
-                               state->state = WAITNEXTCHAR;
-                               oldstate = WAITENDWORD;
-                       }
-                       else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_SYNTAX_ERROR),
-                                                errmsg("syntax error in tsvector")));
-                       else if (!t_isspace(state->prsbuf))
-                       {
-                               COPYCHAR(state->curpos, state->prsbuf);
-                               state->curpos += pg_mblen(state->prsbuf);
-                               state->state = WAITENDWORD;
-                       }
-               }
-               else if (state->state == WAITNEXTCHAR)
-               {
-                       if (*(state->prsbuf) == '\0')
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_SYNTAX_ERROR),
-                                                errmsg("there is no escaped character")));
-                       else
-                       {
-                               RESIZEPRSBUF;
-                               COPYCHAR(state->curpos, state->prsbuf);
-                               state->curpos += pg_mblen(state->prsbuf);
-                               state->state = oldstate;
-                       }
-               }
-               else if (state->state == WAITENDWORD)
-               {
-                       if (t_iseq(state->prsbuf, '\\'))
-                       {
-                               state->state = WAITNEXTCHAR;
-                               oldstate = WAITENDWORD;
-                       }
-                       else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
-                                        (state->oprisdelim && ISOPERATOR(state->prsbuf)))
-                       {
-                               RESIZEPRSBUF;
-                               if (state->curpos == state->word)
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_SYNTAX_ERROR),
-                                                        errmsg("syntax error in tsvector")));
-                               *(state->curpos) = '\0';
-                               return true;
-                       }
-                       else if (t_iseq(state->prsbuf, ':'))
-                       {
-                               if (state->curpos == state->word)
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_SYNTAX_ERROR),
-                                                        errmsg("syntax error in tsvector")));
-                               *(state->curpos) = '\0';
-                               if (state->oprisdelim)
-                                       return true;
-                               else
-                                       state->state = INPOSINFO;
-                       }
-                       else
-                       {
-                               RESIZEPRSBUF;
-                               COPYCHAR(state->curpos, state->prsbuf);
-                               state->curpos += pg_mblen(state->prsbuf);
-                       }
-               }
-               else if (state->state == WAITENDCMPLX)
-               {
-                       if (t_iseq(state->prsbuf, '\''))
-                       {
-                               state->state = WAITCHARCMPLX;
-                       }
-                       else if (t_iseq(state->prsbuf, '\\'))
-                       {
-                               state->state = WAITNEXTCHAR;
-                               oldstate = WAITENDCMPLX;
-                       }
-                       else if (*(state->prsbuf) == '\0')
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_SYNTAX_ERROR),
-                                                errmsg("syntax error in tsvector")));
-                       else
-                       {
-                               RESIZEPRSBUF;
-                               COPYCHAR(state->curpos, state->prsbuf);
-                               state->curpos += pg_mblen(state->prsbuf);
-                       }
-               }
-               else if (state->state == WAITCHARCMPLX)
-               {
-                       if (t_iseq(state->prsbuf, '\''))
-                       {
-                               RESIZEPRSBUF;
-                               COPYCHAR(state->curpos, state->prsbuf);
-                               state->curpos += pg_mblen(state->prsbuf);
-                               state->state = WAITENDCMPLX;
-                       }
-                       else
-                       {
-                               RESIZEPRSBUF;
-                               *(state->curpos) = '\0';
-                               if (state->curpos == state->word)
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_SYNTAX_ERROR),
-                                                        errmsg("syntax error in tsvector")));
-                               if (state->oprisdelim)
-                               {
-                                       /* state->prsbuf+=pg_mblen(state->prsbuf); */
-                                       return true;
-                               }
-                               else
-                                       state->state = WAITPOSINFO;
-                               continue;               /* recheck current character */
-                       }
-               }
-               else if (state->state == WAITPOSINFO)
-               {
-                       if (t_iseq(state->prsbuf, ':'))
-                               state->state = INPOSINFO;
-                       else
-                               return true;
-               }
-               else if (state->state == INPOSINFO)
-               {
-                       if (t_isdigit(state->prsbuf))
-                       {
-                               if (state->alen == 0)
-                               {
-                                       state->alen = 4;
-                                       state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen);
-                                       *(uint16 *) (state->pos) = 0;
-                               }
-                               else if (*(uint16 *) (state->pos) + 1 >= state->alen)
-                               {
-                                       state->alen *= 2;
-                                       state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen);
-                               }
-                               (*(uint16 *) (state->pos))++;
-                               WEP_SETPOS(state->pos[*(uint16 *) (state->pos)], LIMITPOS(atoi(state->prsbuf)));
-                               if (WEP_GETPOS(state->pos[*(uint16 *) (state->pos)]) == 0)
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_SYNTAX_ERROR),
-                                                        errmsg("wrong position info in tsvector")));
-                               WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
-                               state->state = WAITPOSDELIM;
-                       }
-                       else
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_SYNTAX_ERROR),
-                                                errmsg("syntax error in tsvector")));
-               }
-               else if (state->state == WAITPOSDELIM)
-               {
-                       if (t_iseq(state->prsbuf, ','))
-                               state->state = INPOSINFO;
-                       else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
-                       {
-                               if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_SYNTAX_ERROR),
-                                                        errmsg("syntax error in tsvector")));
-                               WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 3);
-                       }
-                       else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
-                       {
-                               if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_SYNTAX_ERROR),
-                                                        errmsg("syntax error in tsvector")));
-                               WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 2);
-                       }
-                       else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
-                       {
-                               if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_SYNTAX_ERROR),
-                                                        errmsg("syntax error in tsvector")));
-                               WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 1);
-                       }
-                       else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
-                       {
-                               if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_SYNTAX_ERROR),
-                                                        errmsg("syntax error in tsvector")));
-                               WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
-                       }
-                       else if (t_isspace(state->prsbuf) ||
-                                        *(state->prsbuf) == '\0')
-                               return true;
-                       else if (!t_isdigit(state->prsbuf))
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_SYNTAX_ERROR),
-                                                errmsg("syntax error in tsvector")));
-               }
-               else                                    /* internal error */
-                       elog(ERROR, "internal error in gettoken_tsvector");
-
-               /* get next char */
-               state->prsbuf += pg_mblen(state->prsbuf);
-       }
-
-       return false;
-}
  
  Datum
  tsvectorin(PG_FUNCTION_ARGS)
@@ -393,70 +170,82 @@ tsvectorin(PG_FUNCTION_ARGS)
         char       *buf = PG_GETARG_CSTRING(0);
         TSVectorParseState state;
         WordEntryIN *arr;
+       int                     totallen;
+       int                     arrlen;  /* allocated size of arr */
         WordEntry  *inarr;
-       int4            len = 0,
-                               totallen = 64;
+       int                     len = 0;
         TSVector        in;
-       char       *tmpbuf,
-                          *cur;
-       int4            i,
-                               buflen = 256;
+       int                     i;
+       char       *token;
+       int                     toklen;
+       WordEntryPos *pos;
+       int                     poslen;
+
+       /*
+        * Tokens are appended to tmpbuf, cur is a pointer
+        * to the end of used space in tmpbuf.
+        */
+       char       *tmpbuf;
+       char       *cur;
+       int                     buflen = 256; /* allocated size of tmpbuf */
  
         pg_verifymbstr(buf, strlen(buf), false);
-       state.prsbuf = buf;
-       state.len = 32;
-       state.word = (char *) palloc(state.len);
-       state.oprisdelim = false;
  
-       arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
+       state = init_tsvector_parser(buf, false);
+       
+       arrlen = 64;
+       arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
         cur = tmpbuf = (char *) palloc(buflen);
  
-       while (gettoken_tsvector(&state))
+       while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL))
         {
-               /*
-                * Realloc buffers if it's needed
-                */
-               if (len >= totallen)
-               {
-                       totallen *= 2;
-                       arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
-               }
-
-               while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
-               {
-                       int4            dist = cur - tmpbuf;
-
-                       buflen *= 2;
-                       tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
-                       cur = tmpbuf + dist;
-               }
  
-               if (state.curpos - state.word >= MAXSTRLEN)
+               if (toklen >= MAXSTRLEN)
                         ereport(ERROR,
                                         (errcode(ERRCODE_SYNTAX_ERROR),
                                          errmsg("word is too long (%ld bytes, max %ld bytes)",
-                                                       (long) (state.curpos - state.word),
+                                                       (long) toklen,
                                                         (long) MAXSTRLEN)));
  
-               arr[len].entry.len = state.curpos - state.word;
+
                 if (cur - tmpbuf > MAXSTRPOS)
                         ereport(ERROR,
                                         (errcode(ERRCODE_SYNTAX_ERROR),
                                          errmsg("position value too large")));
+
+               /*
+                * Enlarge buffers if needed
+                */
+               if (len >= arrlen)
+               {
+                       arrlen *= 2;
+                       arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * arrlen);
+               }
+               while ((cur - tmpbuf) + toklen >= buflen)
+               {
+                       int     dist = cur - tmpbuf;
+
+                       buflen *= 2;
+                       tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+                       cur = tmpbuf + dist;
+               }
+               arr[len].entry.len = toklen;
                 arr[len].entry.pos = cur - tmpbuf;
-               memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
-               cur += arr[len].entry.len;
+               memcpy((void *) cur, (void *) token, toklen);
+               cur += toklen;
  
-               if (state.alen)
+               if (poslen != 0)
                 {
                         arr[len].entry.haspos = 1;
-                       arr[len].pos = state.pos;
+                       arr[len].pos = pos;
+                       arr[len].poslen = poslen;
                 }
                 else
                         arr[len].entry.haspos = 0;
                 len++;
         }
-       pfree(state.word);
+
+       close_tsvector_parser(state);
  
         if (len > 0)
                 len = uniqueentry(arr, len, tmpbuf, &buflen);
@@ -476,8 +265,21 @@ tsvectorin(PG_FUNCTION_ARGS)
                 cur += SHORTALIGN(arr[i].entry.len);
                 if (arr[i].entry.haspos)
                 {
-                       memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos));
-                       cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos);
+                       uint16 tmplen;
+
+                       if(arr[i].poslen > 0xFFFF)
+                               elog(ERROR, "positions array too long");
+
+                       tmplen = (uint16) arr[i].poslen;
+
+                       /* Copy length to output struct */
+                       memcpy(cur, &tmplen, sizeof(uint16));
+                       cur += sizeof(uint16);
+
+                       /* Copy positions */
+                       memcpy(cur, arr[i].pos, (arr[i].poslen) * sizeof(WordEntryPos));
+                       cur += arr[i].poslen * sizeof(WordEntryPos);
+
                         pfree(arr[i].pos);
                 }
                 inarr[i] = arr[i].entry;
@@ -604,26 +406,26 @@ tsvectorrecv(PG_FUNCTION_ARGS)
  {
         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
         TSVector        vec;
-       int                     i,
-                               size,
-                               len = DATAHDRSIZE;
+       int                     i;
+       uint32          size;
         WordEntry  *weptr;
         int                     datalen = 0;
+       Size            len;
  
         size = pq_getmsgint(buf, sizeof(uint32));
         if (size < 0 || size > (MaxAllocSize / sizeof(WordEntry)))
                 elog(ERROR, "invalid size of tsvector");
  
-       len += sizeof(WordEntry) * size;
+       len = DATAHDRSIZE + sizeof(WordEntry) * size;
  
-       len *= 2;
+       len = len * 2; /* times two to make room for lexemes */
         vec = (TSVector) palloc0(len);
         vec->size = size;
  
         weptr = ARRPTR(vec);
         for (i = 0; i < size; i++)
         {
-               int                     tmp;
+               int32 tmp;
  
                 weptr = ARRPTR(vec) + i;
  
@@ -654,7 +456,7 @@ tsvectorrecv(PG_FUNCTION_ARGS)
                                                 npos;
                         WordEntryPos *wepptr;
  
-                       npos = (uint16) pq_getmsgint(buf, sizeof(int16));
+                       npos = (uint16) pq_getmsgint(buf, sizeof(uint16));
                         if (npos > MAXNUMPOS)
                                 elog(ERROR, "unexpected number of positions");
  
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c

index 8567172c64f6f3c7330fff535dd0ba5c1d893eae..d34ab1fcf0bd68872f76fb7043cf8b442f1c4921 100644 (file)
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -7,7 +7,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.2 2007/08/31 02:26:29 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.3 2007/09/07 15:09:56 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -66,6 +66,9 @@ typedef struct
  static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
  
  
+/*
+ * Order: haspos, len, word, for all positions (pos, weight)
+ */
  static int
  silly_cmp_tsvector(const TSVector a, const TSVector b)
  {
@@ -464,7 +467,7 @@ tsvector_concat(PG_FUNCTION_ARGS)
   * compare 2 string values
   */
  static int4
-ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryItem * item)
+ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryOperand * item)
  {
         if (ptr->len == item->length)
                 return strncmp(
@@ -479,7 +482,7 @@ ValCompare(CHKVAL * chkval, WordEntry * ptr, QueryItem * item)
   * check weight info
   */
  static bool
-checkclass_str(CHKVAL * chkval, WordEntry * val, QueryItem * item)
+checkclass_str(CHKVAL * chkval, WordEntry * val, QueryOperand * item)
  {
         WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16));
         uint16          len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len)));
@@ -497,10 +500,11 @@ checkclass_str(CHKVAL * chkval, WordEntry * val, QueryItem * item)
   * is there value 'val' in array or not ?
   */
  static bool
-checkcondition_str(void *checkval, QueryItem * val)
+checkcondition_str(void *checkval, QueryOperand * val)
  {
-       WordEntry  *StopLow = ((CHKVAL *) checkval)->arrb;
-       WordEntry  *StopHigh = ((CHKVAL *) checkval)->arre;
+       CHKVAL *chkval = (CHKVAL *) checkval;
+       WordEntry  *StopLow = chkval->arrb;
+       WordEntry  *StopHigh = chkval->arre;
         WordEntry  *StopMiddle;
         int                     difference;
  
@@ -509,10 +513,10 @@ checkcondition_str(void *checkval, QueryItem * val)
         while (StopLow < StopHigh)
         {
                 StopMiddle = StopLow + (StopHigh - StopLow) / 2;
-               difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
+               difference = ValCompare(chkval, StopMiddle, val);
                 if (difference == 0)
                         return (val->weight && StopMiddle->haspos) ?
-                               checkclass_str((CHKVAL *) checkval, StopMiddle, val) : true;
+                               checkclass_str(chkval, StopMiddle, val) : true;
                 else if (difference < 0)
                         StopLow = StopMiddle + 1;
                 else
@@ -523,37 +527,48 @@ checkcondition_str(void *checkval, QueryItem * val)
  }
  
  /*
- * check for boolean condition
+ * check for boolean condition.
+ *
+ * if calcnot is false, NOT expressions are always evaluated to be true. This is used in ranking.
+ * checkval can be used to pass information to the callback. TS_execute doesn't
+ * do anything with it.
+ * chkcond is a callback function used to evaluate each VAL node in the query.
+ *
   */
  bool
  TS_execute(QueryItem * curitem, void *checkval, bool calcnot,
-                  bool (*chkcond) (void *checkval, QueryItem * val))
+                  bool (*chkcond) (void *checkval, QueryOperand * val))
  {
         /* since this function recurses, it could be driven to stack overflow */
         check_stack_depth();
  
-       if (curitem->type == VAL)
-               return chkcond(checkval, curitem);
-       else if (curitem->val == (int4) '!')
-       {
-               return (calcnot) ?
-                       !TS_execute(curitem + 1, checkval, calcnot, chkcond)
-                       : true;
-       }
-       else if (curitem->val == (int4) '&')
+       if (curitem->type == QI_VAL)
+               return chkcond(checkval, (QueryOperand *) curitem);
+
+       switch(curitem->operator.oper)
         {
-               if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
-                       return TS_execute(curitem + 1, checkval, calcnot, chkcond);
-               else
-                       return false;
-       }
-       else
-       {                                                       /* |-operator */
-               if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
-                       return true;
-               else
-                       return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+               case OP_NOT:
+                       if (calcnot)
+                               return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
+                       else
+                               return true;
+               case OP_AND:
+                       if (TS_execute(curitem + curitem->operator.left, checkval, calcnot, chkcond))
+                               return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+                       else
+                               return false;
+
+               case OP_OR:
+                       if (TS_execute(curitem + curitem->operator.left, checkval, calcnot, chkcond))
+                               return true;
+                       else
+                               return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+
+               default:
+                       elog(ERROR, "unknown operator %d", curitem->operator.oper);
         }
+
+       /* not reachable, but keep compiler quiet */
         return false;
  }
  
diff --git a/src/backend/utils/adt/tsvector_parser.c b/src/backend/utils/adt/tsvector_parser.c

new file mode 100644 (file)

index 0000000..26a2716
--- /dev/null
+++ b/src/backend/utils/adt/tsvector_parser.c
@@ -0,0 +1,357 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsvector_parser.c
+ *       Parser for tsvector
+ *
+ * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_parser.c,v 1.1 2007/09/07 15:09:56 teodor Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "libpq/pqformat.h"
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_utils.h"
+#include "utils/memutils.h"
+
+struct TSVectorParseStateData
+{
+       char   *prsbuf;
+       char   *word;           /* buffer to hold the current word */
+       int             len;            /* size in bytes allocated for 'word' */
+       bool    oprisdelim;
+};
+
+/*
+ * Initializes parser for the input string. If oprisdelim is set, the
+ * following characters are treated as delimiters in addition to whitespace:
+ * ! | & ( )
+ */
+TSVectorParseState
+init_tsvector_parser(char *input, bool oprisdelim)
+{
+       TSVectorParseState state;
+
+       state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData));
+       state->prsbuf = input;
+       state->len = 32;
+       state->word = (char *) palloc(state->len);
+       state->oprisdelim = oprisdelim;
+
+       return state;
+}
+
+/*
+ * Reinitializes parser for parsing 'input', instead of previous input.
+ */
+void
+reset_tsvector_parser(TSVectorParseState state, char *input)
+{
+       state->prsbuf = input;  
+}
+
+/*
+ * Shuts down a tsvector parser.
+ */
+void
+close_tsvector_parser(TSVectorParseState state)
+{
+       pfree(state->word);
+       pfree(state);
+}
+
+#define RESIZEPRSBUF \
+do { \
+       if ( curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
+       { \
+               int clen = curpos - state->word; \
+               state->len *= 2; \
+               state->word = (char*)repalloc( (void*)state->word, state->len ); \
+               curpos = state->word + clen; \
+       } \
+} while (0)
+
+
+#define ISOPERATOR(x)  ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
+
+/* Fills the output parameters, and returns true */
+#define RETURN_TOKEN \
+do { \
+       if (pos_ptr != NULL) \
+       { \
+               *pos_ptr = pos; \
+               *poslen = npos; \
+       } \
+       else if (pos != NULL) \
+               pfree(pos); \
+       \
+       if (strval != NULL) \
+               *strval = state->word; \
+       if (lenval != NULL) \
+               *lenval = curpos - state->word; \
+       if (endptr != NULL) \
+               *endptr = state->prsbuf; \
+       return true; \
+} while(0)
+
+
+/* State codes used in gettoken_tsvector */
+#define WAITWORD               1
+#define WAITENDWORD            2
+#define WAITNEXTCHAR   3
+#define WAITENDCMPLX   4
+#define WAITPOSINFO            5
+#define INPOSINFO              6
+#define WAITPOSDELIM   7
+#define WAITCHARCMPLX  8
+
+/*
+ * Get next token from string being parsed. Returns false if
+ * end of input string is reached, otherwise strval, lenval, pos_ptr
+ * and poslen output parameters are filled in:
+ * 
+ * *strval             token
+ * *lenval             length of*strval
+ * *pos_ptr            pointer to a palloc'd array of positions and weights
+ *                             associated with the token. If the caller is not interested
+ *                             in the information, NULL can be supplied. Otherwise
+ *                             the caller is responsible for pfreeing the array.
+ * *poslen             number of elements in *pos_ptr
+ */
+bool
+gettoken_tsvector(TSVectorParseState state, 
+                                 char **strval, int *lenval,
+                                 WordEntryPos **pos_ptr, int *poslen,
+                                 char **endptr)
+{
+       int     oldstate        = 0;
+       char *curpos    = state->word;
+       int     statecode       = WAITWORD;
+
+       /* pos is for collecting the comma delimited list of positions followed
+        * by the actual token. 
+        */
+       WordEntryPos *pos = NULL;
+       int npos                = 0; /* elements of pos used */
+       int posalen             = 0; /* allocated size of pos */
+
+       while (1)
+       {
+               if (statecode == WAITWORD)
+               {
+                       if (*(state->prsbuf) == '\0')
+                               return false;
+                       else if (t_iseq(state->prsbuf, '\''))
+                               statecode = WAITENDCMPLX;
+                       else if (t_iseq(state->prsbuf, '\\'))
+                       {
+                               statecode = WAITNEXTCHAR;
+                               oldstate = WAITENDWORD;
+                       }
+                       else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("syntax error in tsvector")));
+                       else if (!t_isspace(state->prsbuf))
+                       {
+                               COPYCHAR(curpos, state->prsbuf);
+                               curpos += pg_mblen(state->prsbuf);
+                               statecode = WAITENDWORD;
+                       }
+               }
+               else if (statecode == WAITNEXTCHAR)
+               {
+                       if (*(state->prsbuf) == '\0')
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("there is no escaped character")));
+                       else
+                       {
+                               RESIZEPRSBUF;
+                               COPYCHAR(curpos, state->prsbuf);
+                               curpos += pg_mblen(state->prsbuf);
+                               Assert(oldstate != 0);
+                               statecode = oldstate;
+                       }
+               }
+               else if (statecode == WAITENDWORD)
+               {
+                       if (t_iseq(state->prsbuf, '\\'))
+                       {
+                               statecode = WAITNEXTCHAR;
+                               oldstate = WAITENDWORD;
+                       }
+                       else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
+                                        (state->oprisdelim && ISOPERATOR(state->prsbuf)))
+                       {
+                               RESIZEPRSBUF;
+                               if (curpos == state->word)
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_SYNTAX_ERROR),
+                                                        errmsg("syntax error in tsvector")));
+                               *(curpos) = '\0';
+                               RETURN_TOKEN;
+                       }
+                       else if (t_iseq(state->prsbuf, ':'))
+                       {
+                               if (curpos == state->word)
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_SYNTAX_ERROR),
+                                                        errmsg("syntax error in tsvector")));
+                               *(curpos) = '\0';
+                               if (state->oprisdelim)
+                                       RETURN_TOKEN;
+                               else
+                                       statecode = INPOSINFO;
+                       }
+                       else
+                       {
+                               RESIZEPRSBUF;
+                               COPYCHAR(curpos, state->prsbuf);
+                               curpos += pg_mblen(state->prsbuf);
+                       }
+               }
+               else if (statecode == WAITENDCMPLX)
+               {
+                       if (t_iseq(state->prsbuf, '\''))
+                       {
+                               statecode = WAITCHARCMPLX;
+                       }
+                       else if (t_iseq(state->prsbuf, '\\'))
+                       {
+                               statecode = WAITNEXTCHAR;
+                               oldstate = WAITENDCMPLX;
+                       }
+                       else if (*(state->prsbuf) == '\0')
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("syntax error in tsvector")));
+                       else
+                       {
+                               RESIZEPRSBUF;
+                               COPYCHAR(curpos, state->prsbuf);
+                               curpos += pg_mblen(state->prsbuf);
+                       }
+               }
+               else if (statecode == WAITCHARCMPLX)
+               {
+                       if (t_iseq(state->prsbuf, '\''))
+                       {
+                               RESIZEPRSBUF;
+                               COPYCHAR(curpos, state->prsbuf);
+                               curpos += pg_mblen(state->prsbuf);
+                               statecode = WAITENDCMPLX;
+                       }
+                       else
+                       {
+                               RESIZEPRSBUF;
+                               *(curpos) = '\0';
+                               if (curpos == state->word)
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_SYNTAX_ERROR),
+                                                        errmsg("syntax error in tsvector")));
+                               if (state->oprisdelim)
+                               {
+                                       /* state->prsbuf+=pg_mblen(state->prsbuf); */
+                                       RETURN_TOKEN;
+                               }
+                               else
+                                       statecode = WAITPOSINFO;
+                               continue;               /* recheck current character */
+                       }
+               }
+               else if (statecode == WAITPOSINFO)
+               {
+                       if (t_iseq(state->prsbuf, ':'))
+                               statecode = INPOSINFO;
+                       else
+                               RETURN_TOKEN;
+               }
+               else if (statecode == INPOSINFO)
+               {
+                       if (t_isdigit(state->prsbuf))
+                       {
+                               if (posalen == 0)
+                               {
+                                       posalen = 4;
+                                       pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen);
+                                       npos = 0;
+                               }
+                               else if (npos + 1 >= posalen)
+                               {
+                                       posalen *= 2;
+                                       pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen);
+                               }
+                               npos++;
+                               WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
+                               if (WEP_GETPOS(pos[npos - 1]) == 0)
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_SYNTAX_ERROR),
+                                                        errmsg("wrong position info in tsvector")));
+                               WEP_SETWEIGHT(pos[npos - 1], 0);
+                               statecode = WAITPOSDELIM;
+                       }
+                       else
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("syntax error in tsvector")));
+               }
+               else if (statecode == WAITPOSDELIM)
+               {
+                       if (t_iseq(state->prsbuf, ','))
+                               statecode = INPOSINFO;
+                       else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
+                       {
+                               if (WEP_GETWEIGHT(pos[npos - 1]))
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_SYNTAX_ERROR),
+                                                        errmsg("syntax error in tsvector")));
+                               WEP_SETWEIGHT(pos[npos - 1], 3);
+                       }
+                       else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
+                       {
+                               if (WEP_GETWEIGHT(pos[npos - 1]))
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_SYNTAX_ERROR),
+                                                        errmsg("syntax error in tsvector")));
+                               WEP_SETWEIGHT(pos[npos - 1], 2);
+                       }
+                       else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
+                       {
+                               if (WEP_GETWEIGHT(pos[npos - 1]))
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_SYNTAX_ERROR),
+                                                        errmsg("syntax error in tsvector")));
+                               WEP_SETWEIGHT(pos[npos - 1], 1);
+                       }
+                       else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
+                       {
+                               if (WEP_GETWEIGHT(pos[npos - 1]))
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_SYNTAX_ERROR),
+                                                        errmsg("syntax error in tsvector")));
+                               WEP_SETWEIGHT(pos[npos - 1], 0);
+                       }
+                       else if (t_isspace(state->prsbuf) ||
+                                        *(state->prsbuf) == '\0')
+                               RETURN_TOKEN;
+                       else if (!t_isdigit(state->prsbuf))
+                               ereport(ERROR,
+                                               (errcode(ERRCODE_SYNTAX_ERROR),
+                                                errmsg("syntax error in tsvector")));
+               }
+               else                                    /* internal error */
+                       elog(ERROR, "internal error in gettoken_tsvector");
+
+               /* get next char */
+               state->prsbuf += pg_mblen(state->prsbuf);
+       }
+
+       return false;
+}
diff --git a/src/include/tsearch/ts_public.h b/src/include/tsearch/ts_public.h

index 148129aa8bc5b165959d8223c4b4f0d7d640d179..ab19de7924f05037e9e7a572d067b1070f2dfe9d 100644 (file)
--- a/src/include/tsearch/ts_public.h
+++ b/src/include/tsearch/ts_public.h
@@ -6,7 +6,7 @@
   *
   * Copyright (c) 1998-2007, PostgreSQL Global Development Group
   *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.3 2007/08/25 00:03:59 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.4 2007/09/07 15:09:56 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -42,7 +42,7 @@ typedef struct
                                 type:8,
                                 len:16;
         char       *word;
-       QueryItem  *item;
+       QueryOperand  *item;
  } HeadlineWordEntry;
  
  typedef struct
diff --git a/src/include/tsearch/ts_type.h b/src/include/tsearch/ts_type.h

index ec22f96f59f91393f90aa06c1e986eeb63c80120..91d724ef1c67752e313e24fe22fbbbe16774e407 100644 (file)
--- a/src/include/tsearch/ts_type.h
+++ b/src/include/tsearch/ts_type.h
@@ -5,7 +5,7 @@
   *
   * Copyright (c) 1998-2007, PostgreSQL Global Development Group
   *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.1 2007/08/21 01:11:29 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_type.h,v 1.2 2007/09/07 15:09:56 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -13,6 +13,8 @@
  #define _PG_TSTYPE_H_
  
  #include "fmgr.h"
+#include "utils/pg_crc.h"
+
  
  /*
   * TSVector type.
@@ -27,8 +29,8 @@ typedef struct
                                 pos:20;                 /* MAX 1Mb */
  } WordEntry;
  
-#define MAXSTRLEN ( 1<<11 )
-#define MAXSTRPOS ( 1<<20 )
+#define MAXSTRLEN ( (1<<11) - 1)
+#define MAXSTRPOS ( (1<<20) - 1)
  
  /*
   * Equivalent to
@@ -68,7 +70,7 @@ typedef uint16 WordEntryPos;
  typedef struct
  {
         int32           vl_len_;                /* varlena header (do not touch directly!) */
-       int4            size;
+       uint32          size;
         char            data[1];
  } TSVectorData;
  
@@ -140,36 +142,65 @@ extern Datum ts_rankcd_wttf(PG_FUNCTION_ARGS);
  
  /*
   * TSQuery
+ *
+ *
   */
  
+typedef int8 QueryItemType;
+
+/* Valid values for QueryItemType: */
+#define QI_VAL 1
+#define QI_OPR 2
+#define QI_VALSTOP 3   /* This is only used in an intermediate stack representation in parse_tsquery. It's not a legal type elsewhere. */
+
  /*
   * QueryItem is one node in tsquery - operator or operand.
   */
-
-typedef struct QueryItem
+typedef struct
  {
-       int8            type;                   /* operand or kind of operator */
-       int8            weight;                 /* weights of operand to search */
-       int2            left;                   /* pointer to left operand Right operand is
-                                                                * item + 1, left operand is placed
-                                                                * item+item->left */
-       int4            val;                    /* crc32 value of operand's value */
+       QueryItemType           type;   /* operand or kind of operator (ts_tokentype) */
+       int8            weight;                 /* weights of operand to search. It's a bitmask of allowed weights.
+                                                                * if it =0 then any weight are allowed */
+       int32   valcrc;                         /* XXX: pg_crc32 would be a more appropriate data type, 
+                                                                * but we use comparisons to signed integers in the code. 
+                                                                * They would need to be changed as well. */
+
         /* pointer to text value of operand, must correlate with WordEntry */
         uint32
                                 istrue:1,               /* use for ranking in Cover */
                                 length:11,
                                 distance:20;
-} QueryItem;
+} QueryOperand;
+
+
+/* Legal values for QueryOperator.operator */
+#define        OP_NOT  1
+#define        OP_AND  2
+#define        OP_OR   3
+
+typedef struct 
+{
+       QueryItemType   type;
+       int8            oper;           /* see above */
+       int16           left;           /* pointer to left operand. Right operand is
+                                                        * item + 1, left operand is placed
+                                                        * item+item->left */
+} QueryOperator;
  
  /*
- * It's impossible to use offsetof(QueryItem, istrue)
+ * Note: TSQuery is 4-bytes aligned, so make sure there's no fields
+ * inside QueryItem requiring 8-byte alignment, like int64.
   */
-#define HDRSIZEQI      ( sizeof(int8) + sizeof(int8) + sizeof(int2) +  sizeof(int4) )
+typedef union
+{
+       QueryItemType   type;
+       QueryOperator operator;
+       QueryOperand operand;
+} QueryItem;
  
  /*
   * Storage:
- *     (len)(size)(array of ITEM)(array of operand in text form)
- *     operands are always finished by '\0'
+ *     (len)(size)(array of QueryItem)(operands as '\0'-terminated c-strings)
   */
  
  typedef struct
@@ -182,13 +213,17 @@ typedef struct
  typedef TSQueryData *TSQuery;
  
  #define HDRSIZETQ      ( VARHDRSZ + sizeof(int4) )
-#define COMPUTESIZE(size,lenofoperand) ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
-#define GETQUERY(x)  ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
-#define GETOPERAND(x)  ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
-#define OPERANDSSIZE(x)                ( (x)->len - HDRSIZETQ - (x)->size * sizeof(QueryItem) )
  
-#define ISOPERATOR(x)  ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
+/* Computes the size of header and all QueryItems. size is the number of
+ * QueryItems, and lenofoperand is the total length of all operands
+ */
+#define COMPUTESIZE(size, lenofoperand)        ( HDRSIZETQ + (size) * sizeof(QueryItem) + (lenofoperand) )
  
+/* Returns a pointer to the first QueryItem in a TSVector */
+#define GETQUERY(x)  ((QueryItem*)( (char*)(x)+HDRSIZETQ ))
+
+/* Returns a pointer to the beginning of operands in a TSVector */
+#define GETOPERAND(x)  ( (char*)GETQUERY(x) + ((TSQuery)(x))->size * sizeof(QueryItem) )
  
  /*
   * fmgr interface macros
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h

index d2e5c8d8e4957d231897fe4e9cad33072ce43e80..31a76e50b6cf23bf8c00ac7a4096f8c1daf91826 100644 (file)
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -5,7 +5,7 @@
   *
   * Copyright (c) 1998-2007, PostgreSQL Global Development Group
   *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.2 2007/08/25 00:03:59 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.3 2007/09/07 15:09:56 teodor Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -14,65 +14,41 @@
  
  #include "tsearch/ts_type.h"
  #include "tsearch/ts_public.h"
+#include "nodes/pg_list.h"
  
  /*
   * Common parse definitions for tsvector and tsquery
   */
  
-typedef struct
-{
-       WordEntry       entry;                  /* should be first ! */
-       WordEntryPos *pos;
-} WordEntryIN;
-
-typedef struct
-{
-       char       *prsbuf;
-       char       *word;
-       char       *curpos;
-       int4            len;
-       int4            state;
-       int4            alen;
-       WordEntryPos *pos;
-       bool            oprisdelim;
-} TSVectorParseState;
-
-extern bool gettoken_tsvector(TSVectorParseState *state);
+/* tsvector parser support. */
  
-struct ParseQueryNode;                 /* private in backend/utils/adt/tsquery.c */
+struct TSVectorParseStateData;
+typedef struct TSVectorParseStateData *TSVectorParseState;
  
-typedef struct
-{
-       char       *buffer;                     /* entire string we are scanning */
-       char       *buf;                        /* current scan point */
-       int4            state;
-       int4            count;
+extern TSVectorParseState init_tsvector_parser(char *input, bool oprisdelim);
+extern void reset_tsvector_parser(TSVectorParseState state, char *input);
+extern bool gettoken_tsvector(TSVectorParseState state, 
+                                                         char **token, int *len,
+                                                         WordEntryPos **pos, int *poslen,
+                                                         char **endptr);
+extern void close_tsvector_parser(TSVectorParseState state);
  
-       /* reverse polish notation in list (for temporary usage) */
-       struct ParseQueryNode *str;
+/* parse_tsquery */
  
-       /* number in str */
-       int4            num;
+struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */
+typedef struct TSQueryParserStateData *TSQueryParserState;
  
-       /* text-form operand */
-       int4            lenop;
-       int4            sumlen;
-       char       *op;
-       char       *curop;
-
-       /* state for value's parser */
-       TSVectorParseState valstate;
-       /* tscfg */
-       Oid                     cfg_id;
-} TSQueryParserState;
+typedef void (*PushFunction)(void *opaque, TSQueryParserState state, char *, int, int2);
  
  extern TSQuery parse_tsquery(char *buf,
-                         void (*pushval) (TSQueryParserState *, int, char *, int, int2),
-                         Oid cfg_id, bool isplain);
-extern void pushval_asis(TSQueryParserState * state,
-                        int type, char *strval, int lenval, int2 weight);
-extern void pushquery(TSQueryParserState * state, int4 type, int4 val,
-                 int4 distance, int4 lenval, int2 weight);
+                         PushFunction pushval,
+                         void *opaque, bool isplain);
+
+/* Functions for use by PushFunction implementations */
+extern void pushValue(TSQueryParserState state,
+                        char *strval, int lenval, int2 weight);
+extern void pushStop(TSQueryParserState state);
+extern void pushOperator(TSQueryParserState state, int8 operator);
  
  /*
   * parse plain text and lexize words
@@ -84,6 +60,11 @@ typedef struct
         union
         {
                 uint16          pos;
+               /*
+                * When apos array is used, apos[0] is the number of elements
+                * in the array (excluding apos[0]), and alen is the allocated
+                * size of the array.
+                */
                 uint16     *apos;
         }                       pos;
         char       *word;
@@ -111,23 +92,12 @@ extern void hlparsetext(Oid cfgId, HeadlineParsedText * prs, TSQuery query,
                         char *buf, int4 buflen);
  extern text *generateHeadline(HeadlineParsedText * prs);
  
-/*
- * token/node types for parsing
- */
-#define END                            0
-#define ERR                            1
-#define VAL                            2
-#define OPR                            3
-#define OPEN                   4
-#define CLOSE                  5
-#define VALSTOP                        6               /* for stop words */
-
  /*
   * Common check function for tsvector @@ tsquery
   */
  
  extern bool TS_execute(QueryItem * curitem, void *checkval, bool calcnot,
-                  bool (*chkcond) (void *checkval, QueryItem * val));
+                  bool (*chkcond) (void *checkval, QueryOperand * val));
  
  /*
   * Useful conversion macros
author	Teodor Sigaev <teodor@sigaev.ru>
	Fri, 7 Sep 2007 15:09:56 +0000 (15:09 +0000)
committer	Teodor Sigaev <teodor@sigaev.ru>
	Fri, 7 Sep 2007 15:09:56 +0000 (15:09 +0000)
src/backend/tsearch/to_tsany.c		patch \| blob \| history
src/backend/tsearch/ts_parse.c		patch \| blob \| history
src/backend/tsearch/wparser_def.c		patch \| blob \| history
src/backend/utils/adt/Makefile		patch \| blob \| history
src/backend/utils/adt/tsginidx.c		patch \| blob \| history
src/backend/utils/adt/tsgistidx.c		patch \| blob \| history
src/backend/utils/adt/tsquery.c		patch \| blob \| history
src/backend/utils/adt/tsquery_cleanup.c		patch \| blob \| history
src/backend/utils/adt/tsquery_op.c		patch \| blob \| history
src/backend/utils/adt/tsquery_rewrite.c		patch \| blob \| history
src/backend/utils/adt/tsquery_util.c		patch \| blob \| history
src/backend/utils/adt/tsrank.c		patch \| blob \| history
src/backend/utils/adt/tsvector.c		patch \| blob \| history
src/backend/utils/adt/tsvector_op.c		patch \| blob \| history
src/backend/utils/adt/tsvector_parser.c	[new file with mode: 0644]	patch \| blob
src/include/tsearch/ts_public.h		patch \| blob \| history
src/include/tsearch/ts_type.h		patch \| blob \| history
src/include/tsearch/ts_utils.h		patch \| blob \| history