Make exact distance match for FTS phrase operator

author Teodor Sigaev <teodor@sigaev.ru>

Mon, 27 Jun 2016 17:41:00 +0000 (20:41 +0300)

committer Teodor Sigaev <teodor@sigaev.ru>

Mon, 27 Jun 2016 17:41:00 +0000 (20:41 +0300)
author Teodor Sigaev <teodor@sigaev.ru>
Mon, 27 Jun 2016 17:41:00 +0000 (20:41 +0300)
committer Teodor Sigaev <teodor@sigaev.ru>
Mon, 27 Jun 2016 17:41:00 +0000 (20:41 +0300)
diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml

index 9028bedd1bbf1301f8c928d89016b9f91eb6afc8..72bef9f49e715d8af6a567958b5e0d354e1705a0 100644 (file)
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@@ -346,10 +346,10 @@ SELECT to_tsvector('error is not fatal') @@ to_tsquery('fatal &lt;-&gt; error');
  
      There is a more general version of the FOLLOWED BY operator having the
      form <literal>&lt;<replaceable>N</>&gt;</literal>,
-    where <replaceable>N</> is an integer standing for the greatest distance
+    where <replaceable>N</> is an integer standing for the exact distance
      allowed between the matching lexemes.  <literal>&lt;1&gt;</literal> is
      the same as <literal>&lt;-&gt;</>, while <literal>&lt;2&gt;</literal>
-    allows one other lexeme to optionally appear between the matches, and so
+    allows one other lexeme to appear between the matches, and so
      on.  The <literal>phraseto_tsquery</> function makes use of this
      operator to construct a <literal>tsquery</> that can match a multi-word
      phrase when some of the words are stop words.  For example:
@@ -1529,7 +1529,7 @@ SELECT to_tsquery('fat') &lt;-&gt; to_tsquery('cat | rat');
        <para>
         Returns a query that searches for a match to the first given query
         followed by a match to the second given query at a distance of at
-       most <replaceable>distance</replaceable> lexemes, using
+       <replaceable>distance</replaceable> lexemes, using
         the <literal>&lt;<replaceable>N</>&gt;</literal>
         <type>tsquery</> operator.  For example:
  
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c

index 6117ba9b3e4a083527bba6059df71983c04facda..04718829a0b960afaa560969e6ce22da6956c49c 100644 (file)
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -1375,6 +1375,7 @@ TS_phrase_execute(QueryItem *curitem,
                 ExecPhraseData Ldata = {0, false, NULL},
                                         Rdata = {0, false, NULL};
                 WordEntryPos *Lpos,
+                                  *LposStart,
                                    *Rpos,
                                    *pos_iter = NULL;
  
@@ -1416,52 +1417,60 @@ TS_phrase_execute(QueryItem *curitem,
                         pos_iter = data->pos;
                 }
  
-               Lpos = Ldata.pos;
-               Rpos = Rdata.pos;
-
                 /*
                  * Find matches by distance, WEP_GETPOS() is needed because
                  * ExecPhraseData->data can point to the tsvector's WordEntryPosVector
                  */
  
+               Rpos = Rdata.pos;
+               LposStart = Ldata.pos;
                 while (Rpos < Rdata.pos + Rdata.npos)
                 {
+                       /*
+                        * We need to check all possible distances, so reset Lpos
+                        * to guranteed not yet satisfied position.
+                        */
+                       Lpos = LposStart;
                         while (Lpos < Ldata.pos + Ldata.npos)
                         {
-                               if (WEP_GETPOS(*Lpos) <= WEP_GETPOS(*Rpos))
+                               if (WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) ==
+                                       curitem->qoperator.distance)
                                 {
-                                       /*
-                                        * Lpos is behind the Rpos, so we have to check the
-                                        * distance condition
-                                        */
-                                       if (WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) <= curitem->qoperator.distance)
+                                       /* MATCH! */
+                                       if (data)
                                         {
-                                               /* MATCH! */
-                                               if (data)
-                                               {
-                                                       *pos_iter = WEP_GETPOS(*Rpos);
-                                                       pos_iter++;
-
-                                                       break;          /* We need to build a unique result
-                                                                                * array, so go to the next Rpos */
-                                               }
-                                               else
-                                               {
-                                                       /*
-                                                        * We are in the root of the phrase tree and hence
-                                                        * we don't have to store the resulting positions
-                                                        */
-                                                       return true;
-                                               }
+                                               /* Store position for upper phrase operator */
+                                               *pos_iter = WEP_GETPOS(*Rpos);
+                                               pos_iter++;
+
+                                               /*
+                                                * Set left start position to next, because current one
+                                                * could not satisfy distance for any other right
+                                                * position
+                                                */
+                                               LposStart = Lpos + 1;
+                                               break;
+                                       }
+                                       else
+                                       {
+                                               /*
+                                                * We are in the root of the phrase tree and hence
+                                                * we don't have to store the resulting positions
+                                                */
+                                               return true;
                                         }
+
                                 }
-                               else
+                               else if (WEP_GETPOS(*Rpos) <= WEP_GETPOS(*Lpos) ||
+                                                WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) <
+                                                       curitem->qoperator.distance)
                                 {
                                         /*
-                                        * Go to the next Rpos, because Lpos is ahead of the
-                                        * current Rpos
+                                        * Go to the next Rpos, because Lpos is ahead or on less
+                                        * distance than required by current operator
                                          */
                                         break;
+
                                 }
  
                                 Lpos++;
diff --git a/src/test/regress/expected/tstypes.out b/src/test/regress/expected/tstypes.out

index 64d6de6050a6ef768976681df8d5d630e82aed45..781be70736bf03c0b4fdaa5695b1329787dea4a2 100644 (file)
--- a/src/test/regress/expected/tstypes.out
+++ b/src/test/regress/expected/tstypes.out
@@ -665,10 +665,10 @@ SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 2' AS "true";
   t
  (1 row)
  
-SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "true";
- true 
-------
- t
+SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "false";
+ false 
+-------
+ f
  (1 row)
  
  SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 3' AS "false";
@@ -683,6 +683,12 @@ SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 3' AS "true";
   t
  (1 row)
  
+SELECT to_tsvector('simple', '1 2 1 2') @@ '1 <3> 2' AS "true";
+ true 
+------
+ t
+(1 row)
+
  SELECT to_tsvector('simple', '1 2 11 3') @@ '1 <-> 3' AS "false";
   false 
  -------
@@ -897,7 +903,7 @@ SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:*');
  SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:A');
   ts_rank_cd 
  ------------
-  0.0714286
+          0
  (1 row)
  
  SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:B');
@@ -924,10 +930,10 @@ SELECT 'a:1 b:2'::tsvector @@ 'a <1> b'::tsquery AS "true";
   t
  (1 row)
  
-SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "true";
- true 
-------
- t
+SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "false";
+ false 
+-------
+ f
  (1 row)
  
  SELECT 'a:1 b:3'::tsvector @@ 'a <-> b'::tsquery AS "false";
@@ -954,10 +960,10 @@ SELECT 'a:1 b:3'::tsvector @@ 'a <2> b'::tsquery AS "true";
   t
  (1 row)
  
-SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "true";
- true 
-------
- t
+SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "false";
+ false 
+-------
+ f
  (1 row)
  
  -- tsvector editing operations
diff --git a/src/test/regress/sql/tstypes.sql b/src/test/regress/sql/tstypes.sql

index 738ec824b99ab4e9d62077aeec5e7f5cb6c97050..abcf1504ce5386539b91295c3d113c166b27be1e 100644 (file)
--- a/src/test/regress/sql/tstypes.sql
+++ b/src/test/regress/sql/tstypes.sql
@@ -130,9 +130,10 @@ SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
  
  --phrase search
  SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 2' AS "true";
-SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "true";
+SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "false";
  SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 3' AS "false";
  SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 3' AS "true";
+SELECT to_tsvector('simple', '1 2 1 2') @@ '1 <3> 2' AS "true";
  
  SELECT to_tsvector('simple', '1 2 11 3') @@ '1 <-> 3' AS "false";
  SELECT to_tsvector('simple', '1 2 11 3') @@ '1:* <-> 3' AS "true";
@@ -180,12 +181,12 @@ SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:B');
  SELECT 'a:1 b:2'::tsvector @@ 'a <-> b'::tsquery AS "true";
  SELECT 'a:1 b:2'::tsvector @@ 'a <0> b'::tsquery AS "false";
  SELECT 'a:1 b:2'::tsvector @@ 'a <1> b'::tsquery AS "true";
-SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "true";
+SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "false";
  SELECT 'a:1 b:3'::tsvector @@ 'a <-> b'::tsquery AS "false";
  SELECT 'a:1 b:3'::tsvector @@ 'a <0> b'::tsquery AS "false";
  SELECT 'a:1 b:3'::tsvector @@ 'a <1> b'::tsquery AS "false";
  SELECT 'a:1 b:3'::tsvector @@ 'a <2> b'::tsquery AS "true";
-SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "true";
+SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "false";
  
  -- tsvector editing operations
author	Teodor Sigaev <teodor@sigaev.ru>
	Mon, 27 Jun 2016 17:41:00 +0000 (20:41 +0300)
committer	Teodor Sigaev <teodor@sigaev.ru>
	Mon, 27 Jun 2016 17:41:00 +0000 (20:41 +0300)
doc/src/sgml/textsearch.sgml		patch \| blob \| history
src/backend/utils/adt/tsvector_op.c		patch \| blob \| history
src/test/regress/expected/tstypes.out		patch \| blob \| history
src/test/regress/sql/tstypes.sql		patch \| blob \| history