]> granicus.if.org Git - postgresql/commitdiff
Fix shared tsvector/tsquery input code so that we don't say "syntax error in
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 21 Oct 2007 22:29:56 +0000 (22:29 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 21 Oct 2007 22:29:56 +0000 (22:29 +0000)
tsvector" when we are really parsing a tsquery.  Report the bogus input,
too.  Make styles of some related error messages more consistent.

src/backend/utils/adt/tsquery.c
src/backend/utils/adt/tsvector.c
src/backend/utils/adt/tsvector_parser.c
src/include/tsearch/ts_utils.h

index 7fd018915cd544effa1a70d5571d1d686db28807..f8e84cb266874ee00d6235b7fa5162c0d5f1ffe4 100644 (file)
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.7 2007/09/11 16:01:40 teodor Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.8 2007/10/21 22:29:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -141,7 +141,7 @@ gettoken_query(TSQueryParserState state,
                                {
                                        ereport(ERROR,
                                                        (errcode(ERRCODE_SYNTAX_ERROR),
-                                                        errmsg("syntax error at start of operand in tsearch query: \"%s\"",
+                                                        errmsg("syntax error in tsquery: \"%s\"",
                                                                        state->buffer)));
                                }
                                else if (!t_isspace(state->buf))
@@ -159,7 +159,7 @@ gettoken_query(TSQueryParserState state,
                                        else
                                                ereport(ERROR,
                                                                (errcode(ERRCODE_SYNTAX_ERROR),
-                                                                errmsg("no operand in tsearch query: \"%s\"",
+                                                                errmsg("no operand in tsquery: \"%s\"",
                                                                                state->buffer)));
                                }
                                break;
@@ -232,12 +232,12 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
        if (distance >= MAXSTRPOS)
                ereport(ERROR,
                                (errcode(ERRCODE_SYNTAX_ERROR),
-                                errmsg("value is too big in tsearch query: \"%s\"",
+                                errmsg("value is too big in tsquery: \"%s\"",
                                                state->buffer)));
        if (lenval >= MAXSTRLEN)
                ereport(ERROR,
                                (errcode(ERRCODE_SYNTAX_ERROR),
-                                errmsg("operand is too long in tsearch query: \"%s\"",
+                                errmsg("operand is too long in tsquery: \"%s\"",
                                                state->buffer)));
 
        tmp = (QueryOperand *) palloc(sizeof(QueryOperand));
@@ -264,7 +264,7 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
        if (lenval >= MAXSTRLEN)
                ereport(ERROR,
                                (errcode(ERRCODE_SYNTAX_ERROR),
-                                errmsg("word is too long in tsearch query: \"%s\"",
+                                errmsg("word is too long in tsquery: \"%s\"",
                                                state->buffer)));
 
        INIT_CRC32(valcrc);
@@ -372,7 +372,7 @@ makepol(TSQueryParserState state,
                        default:
                                ereport(ERROR,
                                                (errcode(ERRCODE_SYNTAX_ERROR),
-                                                errmsg("syntax error in tsearch query: \"%s\"",
+                                                errmsg("syntax error in tsquery: \"%s\"",
                                                                state->buffer)));
                }
        }
@@ -478,7 +478,7 @@ parse_tsquery(char *buf,
        state.polstr = NIL;
 
        /* init value parser's state */
-       state.valstate = init_tsvector_parser(NULL, true);
+       state.valstate = init_tsvector_parser(state.buffer, true, true);
 
        /* init list of operand */
        state.sumlen = 0;
index 992da4a9b4097a7a6dde10b6dab6c694d0efd382..0d82da1f902438218da0b994235c123a82cd3c3a 100644 (file)
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.4 2007/09/07 16:03:40 teodor Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.5 2007/10/21 22:29:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -204,7 +204,7 @@ tsvectorin(PG_FUNCTION_ARGS)
 
        pg_verifymbstr(buf, strlen(buf), false);
 
-       state = init_tsvector_parser(buf, false);
+       state = init_tsvector_parser(buf, false, false);
        
        arrlen = 64;
        arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
@@ -224,7 +224,7 @@ tsvectorin(PG_FUNCTION_ARGS)
                if (cur - tmpbuf > MAXSTRPOS)
                        ereport(ERROR,
                                        (errcode(ERRCODE_SYNTAX_ERROR),
-                                        errmsg("position value too large")));
+                                        errmsg("position value is too large")));
 
                /*
                 * Enlarge buffers if needed
@@ -496,7 +496,7 @@ tsvectorrecv(PG_FUNCTION_ARGS)
                datalen += lex_len;
 
                if (i > 0 && WordEntryCMP(&vec->entries[i], &vec->entries[i - 1], STRPTR(vec)) <= 0)
-                       elog(ERROR, "lexemes are unordered");
+                       elog(ERROR, "lexemes are misordered");
 
                /* Receive positions */
 
@@ -523,7 +523,7 @@ tsvectorrecv(PG_FUNCTION_ARGS)
                        {
                                wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(WordEntryPos));
                                if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
-                                       elog(ERROR, "position information is unordered");
+                                       elog(ERROR, "position information is misordered");
                        }
 
                        datalen += (npos + 1) * sizeof(WordEntry);
index 26a271679d4cc95addaa9d5a5f75670b127ab790..5ee8bb78422b95717147b3646e3d8afa377ead9d 100644 (file)
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_parser.c,v 1.1 2007/09/07 15:09:56 teodor Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_parser.c,v 1.2 2007/10/21 22:29:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "tsearch/ts_utils.h"
 #include "utils/memutils.h"
 
+
+/*
+ * Private state of tsvector parser.  Note that tsquery also uses this code to
+ * parse its input, hence the boolean flags.  The two flags are both true or
+ * both false in current usage, but we keep them separate for clarity.
+ * is_tsquery affects *only* the content of error messages.
+ */
 struct TSVectorParseStateData
 {
-       char   *prsbuf;
-       char   *word;           /* buffer to hold the current word */
-       int             len;            /* size in bytes allocated for 'word' */
-       bool    oprisdelim;
+       char   *prsbuf;                         /* next input character */
+       char   *bufstart;                       /* whole string (used only for errors) */
+       char   *word;                           /* buffer to hold the current word */
+       int             len;                            /* size in bytes allocated for 'word' */
+       int             eml;                            /* max bytes per character */
+       bool    oprisdelim;                     /* treat ! | * ( ) as delimiters? */
+       bool    is_tsquery;                     /* say "tsquery" not "tsvector" in errors? */
 };
 
+
 /*
  * Initializes parser for the input string. If oprisdelim is set, the
  * following characters are treated as delimiters in addition to whitespace:
  * ! | & ( )
  */
 TSVectorParseState
-init_tsvector_parser(char *input, bool oprisdelim)
+init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery)
 {
        TSVectorParseState state;
 
        state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData));
        state->prsbuf = input;
+       state->bufstart = input;
        state->len = 32;
        state->word = (char *) palloc(state->len);
+       state->eml = pg_database_encoding_max_length();
        state->oprisdelim = oprisdelim;
+       state->is_tsquery = is_tsquery;
 
        return state;
 }
 
 /*
- * Reinitializes parser for parsing 'input', instead of previous input.
+ * Reinitializes parser to parse 'input', instead of previous input.
  */
 void
 reset_tsvector_parser(TSVectorParseState state, char *input)
@@ -66,21 +80,21 @@ close_tsvector_parser(TSVectorParseState state)
        pfree(state);
 }
 
+/* increase the size of 'word' if needed to hold one more character */
 #define RESIZEPRSBUF \
 do { \
-       if ( curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
+       int clen = curpos - state->word; \
+       if ( clen + state->eml >= state->len ) \
        { \
-               int clen = curpos - state->word; \
                state->len *= 2; \
-               state->word = (char*)repalloc( (void*)state->word, state->len ); \
+               state->word = (char *) repalloc(state->word, state->len); \
                curpos = state->word + clen; \
        } \
 } while (0)
 
-
 #define ISOPERATOR(x)  ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
 
-/* Fills the output parameters, and returns true */
+/* Fills gettoken_tsvector's output parameters, and returns true */
 #define RETURN_TOKEN \
 do { \
        if (pos_ptr != NULL) \
@@ -111,18 +125,34 @@ do { \
 #define WAITPOSDELIM   7
 #define WAITCHARCMPLX  8
 
+#define PRSSYNTAXERROR prssyntaxerror(state)
+
+static void
+prssyntaxerror(TSVectorParseState state)
+{
+       ereport(ERROR,
+                       (errcode(ERRCODE_SYNTAX_ERROR),
+                        state->is_tsquery ?
+                        errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
+                        errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
+}
+
+
 /*
- * Get next token from string being parsed. Returns false if
- * end of input string is reached, otherwise strval, lenval, pos_ptr
- * and poslen output parameters are filled in:
+ * Get next token from string being parsed. Returns true if successful,
+ * false if end of input string is reached.  On success, these output
+ * parameters are filled in:
  * 
- * *strval             token
- * *lenval             length of*strval
+ * *strval             pointer to token
+ * *lenval             length of *strval
  * *pos_ptr            pointer to a palloc'd array of positions and weights
  *                             associated with the token. If the caller is not interested
  *                             in the information, NULL can be supplied. Otherwise
  *                             the caller is responsible for pfreeing the array.
  * *poslen             number of elements in *pos_ptr
+ * *endptr             scan resumption point
+ *
+ * Pass NULL for unwanted output parameters.
  */
 bool
 gettoken_tsvector(TSVectorParseState state, 
@@ -155,9 +185,7 @@ gettoken_tsvector(TSVectorParseState state,
                                oldstate = WAITENDWORD;
                        }
                        else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_SYNTAX_ERROR),
-                                                errmsg("syntax error in tsvector")));
+                               PRSSYNTAXERROR;
                        else if (!t_isspace(state->prsbuf))
                        {
                                COPYCHAR(curpos, state->prsbuf);
@@ -170,7 +198,8 @@ gettoken_tsvector(TSVectorParseState state,
                        if (*(state->prsbuf) == '\0')
                                ereport(ERROR,
                                                (errcode(ERRCODE_SYNTAX_ERROR),
-                                                errmsg("there is no escaped character")));
+                                                errmsg("there is no escaped character: \"%s\"",
+                                                               state->bufstart)));
                        else
                        {
                                RESIZEPRSBUF;
@@ -192,18 +221,14 @@ gettoken_tsvector(TSVectorParseState state,
                        {
                                RESIZEPRSBUF;
                                if (curpos == state->word)
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_SYNTAX_ERROR),
-                                                        errmsg("syntax error in tsvector")));
+                                       PRSSYNTAXERROR;
                                *(curpos) = '\0';
                                RETURN_TOKEN;
                        }
                        else if (t_iseq(state->prsbuf, ':'))
                        {
                                if (curpos == state->word)
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_SYNTAX_ERROR),
-                                                        errmsg("syntax error in tsvector")));
+                                       PRSSYNTAXERROR;
                                *(curpos) = '\0';
                                if (state->oprisdelim)
                                        RETURN_TOKEN;
@@ -229,9 +254,7 @@ gettoken_tsvector(TSVectorParseState state,
                                oldstate = WAITENDCMPLX;
                        }
                        else if (*(state->prsbuf) == '\0')
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_SYNTAX_ERROR),
-                                                errmsg("syntax error in tsvector")));
+                               PRSSYNTAXERROR;
                        else
                        {
                                RESIZEPRSBUF;
@@ -253,9 +276,7 @@ gettoken_tsvector(TSVectorParseState state,
                                RESIZEPRSBUF;
                                *(curpos) = '\0';
                                if (curpos == state->word)
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_SYNTAX_ERROR),
-                                                        errmsg("syntax error in tsvector")));
+                                       PRSSYNTAXERROR;
                                if (state->oprisdelim)
                                {
                                        /* state->prsbuf+=pg_mblen(state->prsbuf); */
@@ -290,17 +311,17 @@ gettoken_tsvector(TSVectorParseState state,
                                }
                                npos++;
                                WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
+                               /* we cannot get here in tsquery, so no need for 2 errmsgs */
                                if (WEP_GETPOS(pos[npos - 1]) == 0)
                                        ereport(ERROR,
                                                        (errcode(ERRCODE_SYNTAX_ERROR),
-                                                        errmsg("wrong position info in tsvector")));
+                                                        errmsg("wrong position info in tsvector: \"%s\"",
+                                                                       state->bufstart)));
                                WEP_SETWEIGHT(pos[npos - 1], 0);
                                statecode = WAITPOSDELIM;
                        }
                        else
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_SYNTAX_ERROR),
-                                                errmsg("syntax error in tsvector")));
+                               PRSSYNTAXERROR;
                }
                else if (statecode == WAITPOSDELIM)
                {
@@ -309,42 +330,32 @@ gettoken_tsvector(TSVectorParseState state,
                        else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
                        {
                                if (WEP_GETWEIGHT(pos[npos - 1]))
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_SYNTAX_ERROR),
-                                                        errmsg("syntax error in tsvector")));
+                                       PRSSYNTAXERROR;
                                WEP_SETWEIGHT(pos[npos - 1], 3);
                        }
                        else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
                        {
                                if (WEP_GETWEIGHT(pos[npos - 1]))
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_SYNTAX_ERROR),
-                                                        errmsg("syntax error in tsvector")));
+                                       PRSSYNTAXERROR;
                                WEP_SETWEIGHT(pos[npos - 1], 2);
                        }
                        else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
                        {
                                if (WEP_GETWEIGHT(pos[npos - 1]))
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_SYNTAX_ERROR),
-                                                        errmsg("syntax error in tsvector")));
+                                       PRSSYNTAXERROR;
                                WEP_SETWEIGHT(pos[npos - 1], 1);
                        }
                        else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
                        {
                                if (WEP_GETWEIGHT(pos[npos - 1]))
-                                       ereport(ERROR,
-                                                       (errcode(ERRCODE_SYNTAX_ERROR),
-                                                        errmsg("syntax error in tsvector")));
+                                       PRSSYNTAXERROR;
                                WEP_SETWEIGHT(pos[npos - 1], 0);
                        }
                        else if (t_isspace(state->prsbuf) ||
                                         *(state->prsbuf) == '\0')
                                RETURN_TOKEN;
                        else if (!t_isdigit(state->prsbuf))
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_SYNTAX_ERROR),
-                                                errmsg("syntax error in tsvector")));
+                               PRSSYNTAXERROR;
                }
                else                                    /* internal error */
                        elog(ERROR, "internal error in gettoken_tsvector");
index 60f176054ae55c6d41a7a542044504c6dc59afbf..ed9137b074ddd7c39fba53b64b745f8073d44828 100644 (file)
@@ -5,7 +5,7 @@
  *
  * Copyright (c) 1998-2007, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.5 2007/10/19 22:01:45 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_utils.h,v 1.6 2007/10/21 22:29:56 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 
 /* tsvector parser support. */
 
-struct TSVectorParseStateData;
+struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */
 typedef struct TSVectorParseStateData *TSVectorParseState;
 
-extern TSVectorParseState init_tsvector_parser(char *input, bool oprisdelim);
+extern TSVectorParseState init_tsvector_parser(char *input,
+                                                                                          bool oprisdelim,
+                                                                                          bool is_tsquery);
 extern void reset_tsvector_parser(TSVectorParseState state, char *input);
 extern bool gettoken_tsvector(TSVectorParseState state, 
                                                          char **token, int *len,