]> granicus.if.org Git - postgresql/commitdiff
Change predecence of phrase operator.
authorTeodor Sigaev <teodor@sigaev.ru>
Mon, 27 Jun 2016 17:55:24 +0000 (20:55 +0300)
committerTeodor Sigaev <teodor@sigaev.ru>
Mon, 27 Jun 2016 17:55:24 +0000 (20:55 +0300)
<-> operator now have higher predecence than & (AND) operator. This change
was motivated by unexpected difference of similar queries:
'a & b <-> c'::tsquery and 'b <-> c & a'. Before first query means
(a & b) <-> c and second one - '(b <-> c) & a', now phrase operator evaluates
first.

Per suggestion from Tom Lane 32260.1465402409@sss.pgh.pa.us

src/backend/utils/adt/tsquery.c
src/backend/utils/adt/tsquery_cleanup.c
src/include/tsearch/ts_type.h
src/test/regress/expected/tsdicts.out
src/test/regress/expected/tsearch.out
src/test/regress/expected/tstypes.out

index 21a18bfbc4479d77c96b75802a8eb25d9d074aff..72e608eb913738d0d4de9afd264fd2f1d98dcf86 100644 (file)
 /* FTS operator priorities, see ts_type.h */
 const int      tsearch_op_priority[OP_COUNT] =
 {
-       3,                                                      /* OP_NOT */
+       4,                                                      /* OP_NOT */
        2,                                                      /* OP_AND */
        1,                                                      /* OP_OR */
-       4                                                       /* OP_PHRASE */
+       3                                                       /* OP_PHRASE */
 };
 
 struct TSQueryParserStateData
@@ -430,6 +430,40 @@ pushStop(TSQueryParserState state)
 
 #define STACKDEPTH     32
 
+typedef struct OperatorElement {
+       int8    op;
+       int16   distance;
+} OperatorElement;
+
+static void
+pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance)
+{
+       if (*lenstack == STACKDEPTH) /* internal error */
+               elog(ERROR, "tsquery stack too small");
+
+       stack[*lenstack].op = op;
+       stack[*lenstack].distance = distance;
+
+       (*lenstack)++;
+}
+
+static void
+cleanOpStack(TSQueryParserState state,
+                  OperatorElement *stack, int *lenstack, int8 op)
+{
+       int     opPriority = OP_PRIORITY(op);
+
+       while(*lenstack)
+       {
+               if (opPriority > OP_PRIORITY(stack[*lenstack - 1].op))
+                       break;
+
+               (*lenstack)--;
+               pushOperator(state, stack[*lenstack].op,
+                                                       stack[*lenstack].distance);
+       }
+}
+
 /*
  * Make polish (prefix) notation of query.
  *
@@ -444,11 +478,7 @@ makepol(TSQueryParserState state,
        ts_tokentype type;
        int                     lenval = 0;
        char       *strval = NULL;
-       struct
-       {
-               int8            op;
-               int16           distance;
-       }                       opstack[STACKDEPTH];
+       OperatorElement opstack[STACKDEPTH];
        int                     lenstack = 0;
        int16           weight = 0;
        bool            prefix;
@@ -462,49 +492,16 @@ makepol(TSQueryParserState state,
                {
                        case PT_VAL:
                                pushval(opaque, state, strval, lenval, weight, prefix);
-                               while (lenstack && (opstack[lenstack - 1].op == OP_AND ||
-                                                                       opstack[lenstack - 1].op == OP_PHRASE ||
-                                                                       opstack[lenstack - 1].op == OP_NOT))
-                               {
-                                       lenstack--;
-                                       pushOperator(state,
-                                                                opstack[lenstack].op,
-                                                                opstack[lenstack].distance);
-                               }
                                break;
                        case PT_OPR:
-                               if (lenstack && operator == OP_OR)
-                                       pushOperator(state, OP_OR, 0);
-                               else
-                               {
-                                       if (lenstack == STACKDEPTH) /* internal error */
-                                               elog(ERROR, "tsquery stack too small");
-                                       opstack[lenstack].op = operator;
-                                       opstack[lenstack].distance = weight;
-                                       lenstack++;
-                               }
+                               cleanOpStack(state, opstack, &lenstack, operator);
+                               pushOpStack(opstack, &lenstack, operator, weight);
                                break;
                        case PT_OPEN:
                                makepol(state, pushval, opaque);
-
-                               while (lenstack && (opstack[lenstack - 1].op == OP_AND ||
-                                                                       opstack[lenstack - 1].op == OP_PHRASE ||
-                                                                       opstack[lenstack - 1].op == OP_NOT))
-                               {
-                                       lenstack--;
-                                       pushOperator(state,
-                                                                opstack[lenstack].op,
-                                                                opstack[lenstack].distance);
-                               }
                                break;
                        case PT_CLOSE:
-                               while (lenstack)
-                               {
-                                       lenstack--;
-                                       pushOperator(state,
-                                                                opstack[lenstack].op,
-                                                                opstack[lenstack].distance);
-                               };
+                               cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */);
                                return;
                        case PT_ERR:
                        default:
@@ -514,13 +511,8 @@ makepol(TSQueryParserState state,
                                                                state->buffer)));
                }
        }
-       while (lenstack)
-       {
-               lenstack--;
-               pushOperator(state,
-                                        opstack[lenstack].op,
-                                        opstack[lenstack].distance);
-       }
+
+       cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */);
 }
 
 static void
@@ -750,7 +742,7 @@ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
  * print it in infix (human-readable) form
  */
 static void
-infix(INFIX *in, int parentPriority)
+infix(INFIX *in, int parentPriority, bool rightPhraseOp)
 {
        /* since this function recurses, it could be driven to stack overflow. */
        check_stack_depth();
@@ -819,7 +811,7 @@ infix(INFIX *in, int parentPriority)
        }
        else if (in->curpol->qoperator.oper == OP_NOT)
        {
-               int                     priority = PRINT_PRIORITY(in->curpol);
+               int                     priority = QO_PRIORITY(in->curpol);
 
                if (priority < parentPriority)
                {
@@ -833,7 +825,7 @@ infix(INFIX *in, int parentPriority)
                *(in->cur) = '\0';
                in->curpol++;
 
-               infix(in, priority);
+               infix(in, priority, false);
                if (priority < parentPriority)
                {
                        RESIZEBUF(in, 2);
@@ -844,17 +836,15 @@ infix(INFIX *in, int parentPriority)
        else
        {
                int8            op = in->curpol->qoperator.oper;
-               int                     priority = PRINT_PRIORITY(in->curpol);
+               int                     priority = QO_PRIORITY(in->curpol);
                int16           distance = in->curpol->qoperator.distance;
                INFIX           nrm;
                bool            needParenthesis = false;
 
                in->curpol++;
                if (priority < parentPriority ||
-                       (op == OP_PHRASE &&
-                        (priority == parentPriority ||         /* phrases are not
-                                                                                                * commutative! */
-                         parentPriority == OP_PRIORITY(OP_AND))))
+                        /* phrase operator depends on order */
+                        (op == OP_PHRASE && rightPhraseOp))
                {
                        needParenthesis = true;
                        RESIZEBUF(in, 2);
@@ -868,11 +858,11 @@ infix(INFIX *in, int parentPriority)
                nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
 
                /* get right operand */
-               infix(&nrm, priority);
+               infix(&nrm, priority, (op == OP_PHRASE));
 
                /* get & print left operand */
                in->curpol = nrm.curpol;
-               infix(in, priority);
+               infix(in, priority, false);
 
                /* print operator & right operand */
                RESIZEBUF(in, 3 + (2 + 10 /* distance */ ) + (nrm.cur - nrm.buf));
@@ -924,7 +914,7 @@ tsqueryout(PG_FUNCTION_ARGS)
        nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
        *(nrm.cur) = '\0';
        nrm.op = GETOPERAND(query);
-       infix(&nrm, -1 /* lowest priority */ );
+       infix(&nrm, -1 /* lowest priority */, false);
 
        PG_FREE_IF_COPY(query, 0);
        PG_RETURN_CSTRING(nrm.buf);
@@ -1151,7 +1141,7 @@ tsquerytree(PG_FUNCTION_ARGS)
                nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
                *(nrm.cur) = '\0';
                nrm.op = GETOPERAND(query);
-               infix(&nrm, true);
+               infix(&nrm, -1, false);
                res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf);
                pfree(q);
        }
index 6c74070e45ea3db34b555b760ca4fe9020fc6adf..1a90964ce79d470f6d645c1ecd32d5150964b6af 100644 (file)
@@ -25,11 +25,18 @@ typedef struct NODE
        QueryItem  *valnode;
 } NODE;
 
-/* Non-operator nodes have fake (but highest) priority */
+/*
+ * To simplify walking on query tree and pushing down of phrase operator
+ * we define some fake priority here: phrase operator has highest priority
+ * of any other operators (and we believe here that OP_PHRASE is a highest
+ * code of operations) and value node has ever highest priority.
+ * Priority values of other operations don't matter until they are less than
+ * phrase operator and value node.
+ */
+#define VALUE_PRIORITY                 (OP_COUNT + 1)
 #define NODE_PRIORITY(x) \
        ( ((x)->valnode->qoperator.type == QI_OPR) ? \
-                       QO_PRIORITY((x)->valnode) : \
-                       TOP_PRIORITY )
+               (x)->valnode->qoperator.oper : VALUE_PRIORITY )
 
 /*
  * make query tree from plain view of query
@@ -416,6 +423,10 @@ normalize_phrase_tree(NODE *node)
                node->left = normalize_phrase_tree(node->left);
                node->right = normalize_phrase_tree(node->right);
 
+               /*
+                * if subtree contains only nodes with higher "priority" then
+                * we are done. See comment near NODE_PRIORITY()
+                */
                if (NODE_PRIORITY(node) <= NODE_PRIORITY(node->right) &&
                        NODE_PRIORITY(node) <= NODE_PRIORITY(node->left))
                        return node;
index 80eb75c14eecf56fca0292503643348103f62df9..d1f4114d21fb5ffdd55ea044970285c61c6138be 100644 (file)
@@ -217,33 +217,19 @@ typedef struct
 
 /*
  * Legal values for QueryOperator.operator.
- * They should be ordered by priority! We assume that phrase
- * has highest priority, but this agreement is only
- * for query transformation! That's need to simplify
- * algorithm of query transformation.
  */
 #define OP_NOT                 1
 #define OP_AND                 2
 #define OP_OR                  3
-#define OP_PHRASE              4
+#define OP_PHRASE              4  /* highest code, tsquery_cleanup.c */
 #define OP_COUNT               4
 
 extern const int tsearch_op_priority[OP_COUNT];
 
-#define NOT_PHRASE_P   5               /* OP_PHRASE negation operations must have
-                                                                * greater priority in order to force infix()
-                                                                * to surround the whole OP_PHRASE expression
-                                                                * with parentheses. */
-
-#define TOP_PRIORITY   6               /* highest priority for val nodes */
-
 /* get operation priority  by its code*/
 #define OP_PRIORITY(x) ( tsearch_op_priority[(x) - 1] )
 /* get QueryOperator priority */
 #define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper)
-/* special case: get QueryOperator priority for correct printing !(a <-> b>) */
-#define PRINT_PRIORITY(x) \
-       ( (((QueryOperator *) (x))->oper == OP_NOT) ? NOT_PHRASE_P : QO_PRIORITY(x) )
 
 typedef struct
 {
index 5ddbe8023466bc714347db3f9ac0ed24c7ee8139..c55591a6787d1434f07a92acd46123a50c7b6234 100644 (file)
@@ -470,15 +470,15 @@ SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
 (1 row)
 
 SELECT to_tsquery('hunspell_tst', 'footballyklubber:b <-> sky');
-                                 to_tsquery                                  
------------------------------------------------------------------------------
- ( 'foot':B <-> 'sky' ) & ( 'ball':B <-> 'sky' ) & ( 'klubber':B <-> 'sky' )
+                           to_tsquery                            
+-----------------------------------------------------------------
+ 'foot':B <-> 'sky' & 'ball':B <-> 'sky' & 'klubber':B <-> 'sky'
 (1 row)
 
 SELECT phraseto_tsquery('hunspell_tst', 'footballyklubber sky');
-                           phraseto_tsquery                            
------------------------------------------------------------------------
- ( 'foot' <-> 'sky' ) & ( 'ball' <-> 'sky' ) & ( 'klubber' <-> 'sky' )
+                     phraseto_tsquery                      
+-----------------------------------------------------------
+ 'foot' <-> 'sky' & 'ball' <-> 'sky' & 'klubber' <-> 'sky'
 (1 row)
 
 -- Test ispell dictionary with hunspell affix with FLAG long in configuration
index 3a13ad985ff8eda8f450ad8a26061a765e78215a..2ec3d1b6ab94e6cad1aef5b65b2f98eb39156376 100644 (file)
@@ -778,9 +778,9 @@ SELECT to_tsquery('english', 'foo <-> a <-> the <-> bar');
 (1 row)
 
 SELECT phraseto_tsquery('english', 'PostgreSQL can be extended by the user in many ways');
-                           phraseto_tsquery                            
------------------------------------------------------------------------
( ( ( 'postgresql' <3> 'extend' ) <3> 'user' ) <2> 'mani' ) <-> 'way'
+                     phraseto_tsquery                      
+-----------------------------------------------------------
'postgresql' <3> 'extend' <3> 'user' <2> 'mani' <-> 'way'
 (1 row)
 
 SELECT ts_rank_cd(to_tsvector('english', '
@@ -1215,15 +1215,15 @@ SELECT ts_rewrite('1 & (2 <-> 3)', 'SELECT keyword, sample FROM test_tsquery'::t
 (1 row)
 
 SELECT ts_rewrite('1 & (2 <2> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
-      ts_rewrite       
------------------------
- '1' & ( '2' <2> '3' )
+    ts_rewrite     
+-------------------
+ '1' & '2' <2> '3'
 (1 row)
 
 SELECT ts_rewrite('5 <-> (1 & (2 <-> 3))', 'SELECT keyword, sample FROM test_tsquery'::text );
-                  ts_rewrite                   
------------------------------------------------
( '5' <-> '1' ) & ( '5' <-> ( '2' <-> '3' ) )
+              ts_rewrite               
+---------------------------------------
'5' <-> '1' & '5' <-> ( '2' <-> '3' )
 (1 row)
 
 SELECT ts_rewrite('5 <-> (6 | 8)', 'SELECT keyword, sample FROM test_tsquery'::text );
index 781be70736bf03c0b4fdaa5695b1329787dea4a2..6705e6900fcca14c89001dcbb68bc2d7757d5919 100644 (file)
@@ -350,21 +350,21 @@ SELECT '(a|b) <-> (d|c)'::tsquery;
 (1 row)
 
 SELECT 'a <-> (b&c)'::tsquery;
-              tsquery              
------------------------------------
- ( 'a' <-> 'b' ) & ( 'a' <-> 'c' )
+          tsquery          
+---------------------------
+ 'a' <-> 'b' & 'a' <-> 'c'
 (1 row)
 
 SELECT '(a&b) <-> c'::tsquery;
-              tsquery              
------------------------------------
- ( 'a' <-> 'c' ) & ( 'b' <-> 'c' )
+          tsquery          
+---------------------------
+ 'a' <-> 'c' & 'b' <-> 'c'
 (1 row)
 
 SELECT '(a&b) <-> (d&c)'::tsquery;
-                                tsquery                                
------------------------------------------------------------------------
- ( 'a' <-> 'd' ) & ( 'b' <-> 'd' ) & ( 'a' <-> 'c' ) & ( 'b' <-> 'c' )
+                        tsquery                        
+-------------------------------------------------------
+ 'a' <-> 'd' & 'b' <-> 'd' & 'a' <-> 'c' & 'b' <-> 'c'
 (1 row)
 
 SELECT 'a <-> !b'::tsquery;
@@ -386,9 +386,9 @@ SELECT '!a <-> !b'::tsquery;
 (1 row)
 
 SELECT 'a <-> !(b&c)'::tsquery;
-                   tsquery                    
-----------------------------------------------
- 'a' & !( ( 'a' <-> 'b' ) & ( 'a' <-> 'c' ) )
+               tsquery                
+--------------------------------------
+ 'a' & !( 'a' <-> 'b' & 'a' <-> 'c' )
 (1 row)
 
 SELECT 'a <-> !(b|c)'::tsquery;
@@ -398,9 +398,9 @@ SELECT 'a <-> !(b|c)'::tsquery;
 (1 row)
 
 SELECT  '!(a&b) <-> c'::tsquery;
-                   tsquery                    
-----------------------------------------------
- !( ( 'a' <-> 'c' ) & ( 'b' <-> 'c' ) ) & 'c'
+               tsquery                
+--------------------------------------
+ !( 'a' <-> 'c' & 'b' <-> 'c' ) & 'c'
 (1 row)
 
 SELECT  '!(a|b) <-> c'::tsquery;
@@ -416,9 +416,9 @@ SELECT  '(!a|b) <-> c'::tsquery;
 (1 row)
 
 SELECT  '(!a&b) <-> c'::tsquery;
-                 tsquery                  
-------------------------------------------
- !( 'a' <-> 'c' ) & 'c' & ( 'b' <-> 'c' )
+               tsquery                
+--------------------------------------
+ !( 'a' <-> 'c' ) & 'c' & 'b' <-> 'c'
 (1 row)
 
 SELECT  'c <-> (!a|b)'::tsquery;
@@ -428,9 +428,9 @@ SELECT  'c <-> (!a|b)'::tsquery;
 (1 row)
 
 SELECT  'c <-> (!a&b)'::tsquery;
-                 tsquery                  
-------------------------------------------
- 'c' & !( 'c' <-> 'a' ) & ( 'c' <-> 'b' )
+               tsquery                
+--------------------------------------
+ 'c' & !( 'c' <-> 'a' ) & 'c' <-> 'b'
 (1 row)
 
 SELECT  '(a|b) <-> !c'::tsquery;
@@ -440,9 +440,9 @@ SELECT  '(a|b) <-> !c'::tsquery;
 (1 row)
 
 SELECT  '(a&b) <-> !c'::tsquery;
-                      tsquery                       
-----------------------------------------------------
- 'a' & 'b' & !( ( 'a' <-> 'c' ) & ( 'b' <-> 'c' ) )
+                  tsquery                   
+--------------------------------------------
+ 'a' & 'b' & !( 'a' <-> 'c' & 'b' <-> 'c' )
 (1 row)
 
 SELECT  '!c <-> (a|b)'::tsquery;
@@ -532,33 +532,33 @@ SELECT 'foo & bar'::tsquery && 'asd | fg';
 (1 row)
 
 SELECT 'a' <-> 'b & d'::tsquery;
-             ?column?              
------------------------------------
- ( 'a' <-> 'b' ) & ( 'a' <-> 'd' )
+         ?column?          
+---------------------------
+ 'a' <-> 'b' & 'a' <-> 'd'
 (1 row)
 
 SELECT 'a & g' <-> 'b & d'::tsquery;
-                               ?column?                                
------------------------------------------------------------------------
- ( 'a' <-> 'b' ) & ( 'g' <-> 'b' ) & ( 'a' <-> 'd' ) & ( 'g' <-> 'd' )
+                       ?column?                        
+-------------------------------------------------------
+ 'a' <-> 'b' & 'g' <-> 'b' & 'a' <-> 'd' & 'g' <-> 'd'
 (1 row)
 
 SELECT 'a & g' <-> 'b | d'::tsquery;
-                               ?column?                                
------------------------------------------------------------------------
- ( 'a' <-> 'b' ) & ( 'g' <-> 'b' ) | ( 'a' <-> 'd' ) & ( 'g' <-> 'd' )
+                       ?column?                        
+-------------------------------------------------------
+ 'a' <-> 'b' & 'g' <-> 'b' | 'a' <-> 'd' & 'g' <-> 'd'
 (1 row)
 
 SELECT 'a & g' <-> 'b <-> d'::tsquery;
-                         ?column?                          
------------------------------------------------------------
( 'a' <-> ( 'b' <-> 'd' ) ) & ( 'g' <-> ( 'b' <-> 'd' ) )
+                     ?column?                      
+---------------------------------------------------
'a' <-> ( 'b' <-> 'd' ) & 'g' <-> ( 'b' <-> 'd' )
 (1 row)
 
 SELECT tsquery_phrase('a <3> g', 'b & d', 10);
-                       tsquery_phrase                        
--------------------------------------------------------------
- ( ( 'a' <3> 'g' ) <10> 'b' ) & ( ( 'a' <3> 'g' ) <10> 'd' )
+               tsquery_phrase                
+---------------------------------------------
+ 'a' <3> 'g' <10> 'b' & 'a' <3> 'g' <10> 'd'
 (1 row)
 
 -- tsvector-tsquery operations