]> granicus.if.org Git - postgresql/commitdiff
Do not fallback to AND for FTS phrase operator.
authorTeodor Sigaev <teodor@sigaev.ru>
Mon, 27 Jun 2016 17:47:32 +0000 (20:47 +0300)
committerTeodor Sigaev <teodor@sigaev.ru>
Mon, 27 Jun 2016 17:47:32 +0000 (20:47 +0300)
If there is no positional information of lexemes then phrase operator will not
fallback to AND operator. This change makes needing to modify TS_execute()
interface, because somewhere (in indexes, for example) positional information
is unaccesible and in this cases we need to force fallback to AND.

Per discussion c19fcfec308e6ccd952cdde9e648b505@mail.gmail.com

src/backend/utils/adt/tsginidx.c
src/backend/utils/adt/tsgistidx.c
src/backend/utils/adt/tsrank.c
src/backend/utils/adt/tsvector_op.c
src/include/tsearch/ts_utils.h
src/test/regress/expected/tsearch.out
src/test/regress/sql/tsearch.sql

index b096329143348145de45a5ff043fecfb489350f8..c953f531ff73f865bf0bd658b566e9c6eecbee69 100644 (file)
@@ -308,7 +308,7 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS)
 
                res = TS_execute(GETQUERY(query),
                                                 &gcv,
-                                                true,
+                                                TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_AS_AND,
                                                 checkcondition_gin);
        }
 
index cdd5d43fce5f33c3ee9ab013fcc0a0cf16b612fb..6cdfb13f6d12221e309bc536a1ba71235141f75b 100644 (file)
@@ -361,7 +361,8 @@ gtsvector_consistent(PG_FUNCTION_ARGS)
 
                PG_RETURN_BOOL(TS_execute(
                                                                  GETQUERY(query),
-                                                                 (void *) GETSIGN(key), false,
+                                                                 (void *) GETSIGN(key),
+                                                                 TS_EXEC_PHRASE_AS_AND,
                                                                  checkcondition_bit
                                                                  ));
        }
@@ -373,7 +374,8 @@ gtsvector_consistent(PG_FUNCTION_ARGS)
                chkval.arre = chkval.arrb + ARRNELEM(key);
                PG_RETURN_BOOL(TS_execute(
                                                                  GETQUERY(query),
-                                                                 (void *) &chkval, true,
+                                                                 (void *) &chkval,
+                                                                 TS_EXEC_PHRASE_AS_AND | TS_EXEC_CALC_NOT,
                                                                  checkcondition_arr
                                                                  ));
        }
index 32023821b3fe1b92cc33633a55b96fcb3f07d49d..d887a14d057f404a098e306e7bfdd5586cb8af36 100644 (file)
@@ -662,7 +662,8 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
        {
                fillQueryRepresentationData(qr, ptr);
 
-               if (TS_execute(GETQUERY(qr->query), (void *) qr, false, checkcondition_QueryOperand))
+               if (TS_execute(GETQUERY(qr->query), (void *) qr,
+                                          TS_EXEC_EMPTY, checkcondition_QueryOperand))
                {
                        if (WEP_GETPOS(ptr->pos) > ext->q)
                        {
@@ -691,7 +692,8 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
                 */
                fillQueryRepresentationData(qr, ptr);
 
-               if (TS_execute(GETQUERY(qr->query), (void *) qr, true, checkcondition_QueryOperand))
+               if (TS_execute(GETQUERY(qr->query), (void *) qr,
+                                          TS_EXEC_CALC_NOT, checkcondition_QueryOperand))
                {
                        if (WEP_GETPOS(ptr->pos) < ext->p)
                        {
index 04718829a0b960afaa560969e6ce22da6956c49c..242b7e137b727ecd099f421231bbb904e45ed131 100644 (file)
@@ -1360,7 +1360,7 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
  */
 static bool
 TS_phrase_execute(QueryItem *curitem,
-                                 void *checkval, bool calcnot, ExecPhraseData *data,
+                                 void *checkval, uint32 flags, ExecPhraseData *data,
                                  bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *))
 {
        /* since this function recurses, it could be driven to stack overflow */
@@ -1382,18 +1382,19 @@ TS_phrase_execute(QueryItem *curitem,
                Assert(curitem->qoperator.oper == OP_PHRASE);
 
                if (!TS_phrase_execute(curitem + curitem->qoperator.left,
-                                                          checkval, calcnot, &Ldata, chkcond))
+                                                          checkval, flags, &Ldata, chkcond))
                        return false;
 
-               if (!TS_phrase_execute(curitem + 1, checkval, calcnot, &Rdata, chkcond))
+               if (!TS_phrase_execute(curitem + 1, checkval, flags, &Rdata, chkcond))
                        return false;
 
                /*
                 * if at least one of the operands has no position information,
-                * fallback to AND operation.
+                * then return false. But if TS_EXEC_PHRASE_AS_AND flag is set then
+                * we return true as it is a AND operation
                 */
                if (Ldata.npos == 0 || Rdata.npos == 0)
-                       return true;
+                       return (flags & TS_EXEC_PHRASE_AS_AND) ? true : false;
 
                /*
                 * Result of the operation is a list of the corresponding positions of
@@ -1498,13 +1499,11 @@ TS_phrase_execute(QueryItem *curitem,
  * chkcond is a callback function used to evaluate each VAL node in the query.
  * checkval can be used to pass information to the callback. TS_execute doesn't
  * do anything with it.
- * if calcnot is false, NOT expressions are always evaluated to be true. This
- * is used in ranking.
  * It believes that ordinary operators are always closier to root than phrase
  * operator, so, TS_execute() may not take care of lexeme's position at all.
  */
 bool
-TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
+TS_execute(QueryItem *curitem, void *checkval, uint32 flags,
    bool (*chkcond) (void *checkval, QueryOperand *val, ExecPhraseData *data))
 {
        /* since this function recurses, it could be driven to stack overflow */
@@ -1517,25 +1516,29 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
        switch (curitem->qoperator.oper)
        {
                case OP_NOT:
-                       if (calcnot)
-                               return !TS_execute(curitem + 1, checkval, calcnot, chkcond);
+                       if (flags & TS_EXEC_CALC_NOT)
+                               return !TS_execute(curitem + 1, checkval, flags, chkcond);
                        else
                                return true;
 
                case OP_AND:
-                       if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond))
-                               return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+                       if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond))
+                               return TS_execute(curitem + 1, checkval, flags, chkcond);
                        else
                                return false;
 
                case OP_OR:
-                       if (TS_execute(curitem + curitem->qoperator.left, checkval, calcnot, chkcond))
+                       if (TS_execute(curitem + curitem->qoperator.left, checkval, flags, chkcond))
                                return true;
                        else
-                               return TS_execute(curitem + 1, checkval, calcnot, chkcond);
+                               return TS_execute(curitem + 1, checkval, flags, chkcond);
 
                case OP_PHRASE:
-                       return TS_phrase_execute(curitem, checkval, calcnot, NULL, chkcond);
+                       /*
+                        * do not check TS_EXEC_PHRASE_AS_AND here because chkcond()
+                        * could do something more if it's called from TS_phrase_execute()
+                        */
+                       return TS_phrase_execute(curitem, checkval, flags, NULL, chkcond);
 
                default:
                        elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
@@ -1633,7 +1636,7 @@ ts_match_vq(PG_FUNCTION_ARGS)
        result = TS_execute(
                                                GETQUERY(query),
                                                &chkval,
-                                               true,
+                                               TS_EXEC_CALC_NOT,
                                                checkcondition_str
                );
 
index e16ddaf72f4d1c24e08ceb3aaa5e2d1f0e96ea98..e09a9c636f961bef1915668d9b71fdcf225f4a4f 100644 (file)
@@ -111,8 +111,25 @@ typedef struct ExecPhraseData
        WordEntryPos *pos;
 } ExecPhraseData;
 
-extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
+/*
+ * Evaluates tsquery, flags are followe below
+ */
+extern bool TS_execute(QueryItem *curitem, void *checkval, uint32 flags,
                   bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *));
+
+#define TS_EXEC_EMPTY                  (0x00)
+/*
+ * if TS_EXEC_CALC_NOT is not set then NOT expression evaluated to be true,
+ * used in cases where NOT cannot be accurately computed (GiST) or
+ * it isn't important (ranking)
+ */
+#define TS_EXEC_CALC_NOT               (0x01)
+/*
+ * Treat OP_PHRASE as OP_AND. Used when posiotional information is not
+ * accessible, like in consistent methods of GIN/GiST indexes
+ */
+#define TS_EXEC_PHRASE_AS_AND  (0x02)
+
 extern bool tsquery_requires_match(QueryItem *curitem);
 
 /*
index 2c3aa1a2937fb746fd3dcba4b67fc0ed267e2038..3a13ad985ff8eda8f450ad8a26061a765e78215a 100644 (file)
@@ -1459,13 +1459,14 @@ select * from pendtest where 'ipi:*'::tsquery @@ ts;
 
 --check OP_PHRASE on index
 create temp table phrase_index_test(fts tsvector);
-insert into phrase_index_test values('A fat cat has just eaten a rat.');
+insert into phrase_index_test values ('A fat cat has just eaten a rat.');
+insert into phrase_index_test values (to_tsvector('english', 'A fat cat has just eaten a rat.'));
 create index phrase_index_test_idx on phrase_index_test using gin(fts);
 set enable_seqscan = off;
 select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');
-                       fts                       
--------------------------------------------------
- 'A' 'a' 'cat' 'eaten' 'fat' 'has' 'just' 'rat.'
+                fts                
+-----------------------------------
+ 'cat':3 'eaten':6 'fat':2 'rat':8
 (1 row)
 
 set enable_seqscan = on;
index 34b46fa32433bd69c91c70123bcff3bfebfc032c..5f3d335fc393c724e297d79f576193315b5df8a7 100644 (file)
@@ -482,7 +482,8 @@ select * from pendtest where 'ipi:*'::tsquery @@ ts;
 
 --check OP_PHRASE on index
 create temp table phrase_index_test(fts tsvector);
-insert into phrase_index_test values('A fat cat has just eaten a rat.');
+insert into phrase_index_test values ('A fat cat has just eaten a rat.');
+insert into phrase_index_test values (to_tsvector('english', 'A fat cat has just eaten a rat.'));
 create index phrase_index_test_idx on phrase_index_test using gin(fts);
 set enable_seqscan = off;
 select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');