]> granicus.if.org Git - postgresql/commitdiff
Phrase full text search.
authorTeodor Sigaev <teodor@sigaev.ru>
Thu, 7 Apr 2016 15:44:18 +0000 (18:44 +0300)
committerTeodor Sigaev <teodor@sigaev.ru>
Thu, 7 Apr 2016 15:44:18 +0000 (18:44 +0300)
Patch introduces new text search operator (<-> or <DISTANCE>) into tsquery.
On-disk and binary in/out format of tsquery are backward compatible.
It has two side effect:
- change order for tsquery, so, users, who has a btree index over tsquery,
  should reindex it
- less number of parenthesis in tsquery output, and tsquery becomes more
  readable

Authors: Teodor Sigaev, Oleg Bartunov, Dmitry Ivanov
Reviewers: Alexander Korotkov, Artur Zakirov

30 files changed:
contrib/tsearch2/expected/tsearch2.out
doc/src/sgml/datatype.sgml
doc/src/sgml/func.sgml
doc/src/sgml/textsearch.sgml
src/backend/tsearch/to_tsany.c
src/backend/tsearch/ts_parse.c
src/backend/tsearch/ts_selfuncs.c
src/backend/tsearch/wparser_def.c
src/backend/utils/adt/tsginidx.c
src/backend/utils/adt/tsgistidx.c
src/backend/utils/adt/tsquery.c
src/backend/utils/adt/tsquery_cleanup.c
src/backend/utils/adt/tsquery_op.c
src/backend/utils/adt/tsquery_util.c
src/backend/utils/adt/tsrank.c
src/backend/utils/adt/tsvector.c
src/backend/utils/adt/tsvector_op.c
src/backend/utils/adt/tsvector_parser.c
src/include/catalog/catversion.h
src/include/catalog/pg_operator.h
src/include/catalog/pg_proc.h
src/include/tsearch/ts_public.h
src/include/tsearch/ts_type.h
src/include/tsearch/ts_utils.h
src/test/regress/expected/tsdicts.out
src/test/regress/expected/tsearch.out
src/test/regress/expected/tstypes.out
src/test/regress/sql/tsdicts.sql
src/test/regress/sql/tsearch.sql
src/test/regress/sql/tstypes.sql

index 972f764c14abf7a8e0b8518ae45822827c71f666..97379e7185ac1af20270607a0c3f8c29a7bc82f5 100644 (file)
@@ -278,15 +278,15 @@ SELECT '(!1|2)&3'::tsquery;
 (1 row)
 
 SELECT '1|(2|(4|(5|6)))'::tsquery;
-                 tsquery                 
------------------------------------------
- '1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
+           tsquery           
+-----------------------------
+ '1' | '2' | '4' | '5' | '6'
 (1 row)
 
 SELECT '1|2|4|5|6'::tsquery;
-                 tsquery                 
------------------------------------------
( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
+           tsquery           
+-----------------------------
'1' | '2' | '4' | '5' | '6'
 (1 row)
 
 SELECT '1&(2&(4&(5&6)))'::tsquery;
@@ -340,7 +340,7 @@ select 'a' > 'b & c'::tsquery;
 select 'a | f' < 'b & c'::tsquery;
  ?column? 
 ----------
- t
+ f
 (1 row)
 
 select 'a | ff' < 'b & c'::tsquery;
@@ -443,9 +443,9 @@ select count(*) from test_tsquery where keyword >  'new & york';
 
 set enable_seqscan=on;
 select rewrite('foo & bar & qq & new & york',  'new & york'::tsquery, 'big & apple | nyc | new & york & city');
-                                     rewrite                                      
-----------------------------------------------------------------------------------
- 'foo' & 'bar' & 'qq' & ( 'city' & 'new' & 'york' | ( 'nyc' | 'big' & 'apple' ) )
+                                   rewrite                                    
+------------------------------------------------------------------------------
+ 'foo' & 'bar' & 'qq' & ( 'nyc' | 'big' & 'apple' | 'city' & 'new' & 'york' )
 (1 row)
 
 select rewrite('moscow', 'select keyword, sample from test_tsquery'::text );
@@ -461,9 +461,9 @@ select rewrite('moscow & hotel', 'select keyword, sample from test_tsquery'::tex
 (1 row)
 
 select rewrite('bar &  new & qq & foo & york', 'select keyword, sample from test_tsquery'::text );
-                                       rewrite                                       
--------------------------------------------------------------------------------------
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+                                     rewrite                                     
+---------------------------------------------------------------------------------
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
 (1 row)
 
 select rewrite( ARRAY['moscow', keyword, sample] ) from test_tsquery;
@@ -479,9 +479,9 @@ select rewrite( ARRAY['moscow & hotel', keyword, sample] ) from test_tsquery;
 (1 row)
 
 select rewrite( ARRAY['bar &  new & qq & foo & york', keyword, sample] ) from test_tsquery;
-                                       rewrite                                       
--------------------------------------------------------------------------------------
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+                                     rewrite                                     
+---------------------------------------------------------------------------------
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
 (1 row)
 
 select keyword from test_tsquery where keyword @> 'new';
@@ -520,9 +520,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
 (1 row)
 
 select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar &  new & qq & foo & york') as query where keyword <@ query;
-                                       rewrite                                       
--------------------------------------------------------------------------------------
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+                                     rewrite                                     
+---------------------------------------------------------------------------------
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
 (1 row)
 
 select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'moscow') as query where query @> keyword;
@@ -538,9 +538,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
 (1 row)
 
 select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar &  new & qq & foo & york') as query where query @> keyword;
-                                       rewrite                                       
--------------------------------------------------------------------------------------
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+                                     rewrite                                     
+---------------------------------------------------------------------------------
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
 (1 row)
 
 create index qq on test_tsquery using gist (keyword gist_tp_tsquery_ops);
@@ -581,9 +581,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
 (1 row)
 
 select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar &  new & qq & foo & york') as query where keyword <@ query;
-                                       rewrite                                       
--------------------------------------------------------------------------------------
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+                                     rewrite                                     
+---------------------------------------------------------------------------------
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
 (1 row)
 
 select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'moscow') as query where query @> keyword;
@@ -599,9 +599,9 @@ select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('e
 (1 row)
 
 select rewrite( ARRAY[query, keyword, sample] ) from test_tsquery, to_tsquery('english', 'bar &  new & qq & foo & york') as query where query @> keyword;
-                                       rewrite                                       
--------------------------------------------------------------------------------------
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+                                     rewrite                                     
+---------------------------------------------------------------------------------
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
 (1 row)
 
 set enable_seqscan='on';
index 7c3ef92cd2e8e27f3cc95c3e606ff75f634198a4..0b60c61d480a602efc5042cd9b6091435f0ef646 100644 (file)
@@ -3924,8 +3924,9 @@ SELECT to_tsvector('english', 'The Fat Rats');
     <para>
      A <type>tsquery</type> value stores lexemes that are to be
      searched for, and combines them honoring the Boolean operators
-     <literal>&amp;</literal> (AND), <literal>|</literal> (OR), and
-     <literal>!</> (NOT).  Parentheses can be used to enforce grouping
+     <literal>&amp;</literal> (AND), <literal>|</literal> (OR),
+     <literal>!</> (NOT) and <literal>&lt;-&gt;</> (FOLLOWED BY) phrase search
+     operator.  Parentheses can be used to enforce grouping
      of the operators:
 
 <programlisting>
@@ -3946,8 +3947,8 @@ SELECT 'fat &amp; rat &amp; ! cat'::tsquery;
 </programlisting>
 
      In the absence of parentheses, <literal>!</> (NOT) binds most tightly,
-     and <literal>&amp;</literal> (AND) binds more tightly than
-     <literal>|</literal> (OR).
+     and <literal>&amp;</literal> (AND) and <literal>&lt;-&gt;</literal> (FOLLOWED BY)
+     both bind more tightly than <literal>|</literal> (OR).
     </para>
 
     <para>
index 15b6b4eb3d570c841a41692f4d58ea2b0a79fc19..9b0778baa9978eb20b2d5fbe638871a756e7464d 100644 (file)
@@ -9127,6 +9127,12 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
         <entry><literal>!! 'cat'::tsquery</literal></entry>
         <entry><literal>!'cat'</literal></entry>
        </row>
+       <row>
+        <entry> <literal>&lt;-&gt;</literal> </entry>
+        <entry><type>tsquery</> followed by <type>tsquery</></entry>
+        <entry><literal>to_tsquery('fat') &lt;-&gt; to_tsquery('rat')</literal></entry>
+        <entry><literal>'fat' &lt;-&gt; 'rat'</literal></entry>
+       </row>
        <row>
         <entry> <literal>@&gt;</literal> </entry>
         <entry><type>tsquery</> contains another ?</entry>
@@ -9219,6 +9225,18 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
         <entry><literal>plainto_tsquery('english', 'The Fat Rats')</literal></entry>
         <entry><literal>'fat' &amp; 'rat'</literal></entry>
        </row>
+       <row>
+        <entry>
+         <indexterm>
+          <primary>phraseto_tsquery</primary>
+         </indexterm>
+         <literal><function>phraseto_tsquery(<optional> <replaceable class="PARAMETER">config</> <type>regconfig</> , </optional> <replaceable class="PARAMETER">query</> <type>text</type>)</function></literal>
+        </entry>
+        <entry><type>tsquery</type></entry>
+        <entry>produce <type>tsquery</> ignoring punctuation</entry>
+        <entry><literal>phraseto_tsquery('english', 'The Fat Rats')</literal></entry>
+        <entry><literal>'fat' &lt;-&gt; 'rat'</literal></entry>
+       </row>
        <row>
         <entry>
          <indexterm>
@@ -9421,6 +9439,27 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
         <entry><literal>SELECT ts_rewrite('a &amp; b'::tsquery, 'SELECT t,s FROM aliases')</literal></entry>
         <entry><literal>'b' &amp; ( 'foo' | 'bar' )</literal></entry>
        </row>
+       <row>
+        <entry>
+         <indexterm>
+          <primary>tsquery_phrase</primary>
+         </indexterm>
+         <literal><function>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</>)</function></literal>
+        </entry>
+        <entry><type>tsquery</type></entry>
+        <entry>implementation of <literal>&lt;-&gt;</> (FOLLOWED BY) operator</entry>
+        <entry><literal>tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'))</literal></entry>
+        <entry><literal>'fat' &lt;-&gt; 'cat'</literal></entry>
+       </row>
+       <row>
+        <entry>
+         <literal><function>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">distance</replaceable> <type>integer</>)</function></literal>
+        </entry>
+        <entry><type>tsquery</type></entry>
+        <entry>phrase-concatenate with distance</entry>
+        <entry><literal>tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'), 10)</literal></entry>
+        <entry><literal>'fat' &lt;10&gt; 'cat'</literal></entry>
+       </row>
        <row>
         <entry>
          <indexterm>
index ea3abc9e15a3e6401a83ba18ad900aa27e687bcf..930c8f0a5dcce1bcf62b08f528ea31ba071f76e4 100644 (file)
@@ -263,9 +263,10 @@ SELECT 'fat &amp; cow'::tsquery @@ 'a fat cat sat on a mat and ate a fat rat'::t
     As the above example suggests, a <type>tsquery</type> is not just raw
     text, any more than a <type>tsvector</type> is.  A <type>tsquery</type>
     contains search terms, which must be already-normalized lexemes, and
-    may combine multiple terms using AND, OR, and NOT operators.
+    may combine multiple terms using AND, OR, NOT and FOLLOWED BY operators.
     (For details see <xref linkend="datatype-textsearch">.)  There are
-    functions <function>to_tsquery</> and <function>plainto_tsquery</>
+    functions <function>to_tsquery</>, <function>plainto_tsquery</>
+    and <function>phraseto_tsquery</>
     that are helpful in converting user-written text into a proper
     <type>tsquery</type>, for example by normalizing words appearing in
     the text.  Similarly, <function>to_tsvector</> is used to parse and
@@ -293,6 +294,35 @@ SELECT 'fat cats ate fat rats'::tsvector @@ to_tsquery('fat &amp; rat');
     already normalized, so <literal>rats</> does not match <literal>rat</>.
    </para>
 
+   <para>
+    Phrase search is made possible with the help of the <literal>&lt;-&gt;</>
+    (FOLLOWED BY) operator, which enforces lexeme order. This allows you
+    to discard strings not containing the desired phrase, for example:
+
+<programlisting>
+SELECT q @@ to_tsquery('fatal &lt;-&gt; error')
+FROM unnest(array[to_tsvector('fatal error'),
+                  to_tsvector('error is not fatal')]) AS q;
+ ?column?
+----------
+ t
+ f
+</programlisting>
+
+    A more generic version of the FOLLOWED BY operator takes form of
+    <literal>&lt;N&gt;</>, where N stands for the greatest allowed distance
+    between the specified lexemes. The <literal>phraseto_tsquery</>
+    function makes use of this behavior in order to construct a
+    <literal>tsquery</> capable of matching the provided phrase:
+
+<programlisting>
+SELECT phraseto_tsquery('cat ate some rats');
+       phraseto_tsquery
+-------------------------------
+ ( 'cat' &lt;-&gt; 'ate' ) &lt;2&gt; 'rat'
+</programlisting>
+   </para>
+
    <para>
     The <literal>@@</literal> operator also
     supports <type>text</type> input, allowing explicit conversion of a text
@@ -709,11 +739,14 @@ UPDATE tt SET ti =
 
    <para>
     <productname>PostgreSQL</productname> provides the
-    functions <function>to_tsquery</function> and
-    <function>plainto_tsquery</function> for converting a query to
-    the <type>tsquery</type> data type.  <function>to_tsquery</function>
-    offers access to more features than <function>plainto_tsquery</function>,
-    but is less forgiving about its input.
+    functions <function>to_tsquery</function>,
+    <function>plainto_tsquery</function> and
+    <function>phraseto_tsquery</function>
+    for converting a query to the <type>tsquery</type> data type.
+    <function>to_tsquery</function> offers access to more features
+    than both <function>plainto_tsquery</function> and
+    <function>phraseto_tsquery</function>, but is less forgiving
+    about its input.
    </para>
 
    <indexterm>
@@ -728,7 +761,8 @@ to_tsquery(<optional> <replaceable class="PARAMETER">config</replaceable> <type>
     <function>to_tsquery</function> creates a <type>tsquery</> value from
     <replaceable>querytext</replaceable>, which must consist of single tokens
     separated by the Boolean operators <literal>&amp;</literal> (AND),
-    <literal>|</literal> (OR) and <literal>!</literal> (NOT).  These operators
+    <literal>|</literal> (OR), <literal>!</literal> (NOT), and also the
+    <literal>&lt;-&gt;</literal> (FOLLOWED BY) phrase search operator. These operators
     can be grouped using parentheses.  In other words, the input to
     <function>to_tsquery</function> must already follow the general rules for
     <type>tsquery</> input, as described in <xref
@@ -814,8 +848,8 @@ SELECT plainto_tsquery('english', 'The Fat Rats');
 </screen>
 
     Note that <function>plainto_tsquery</> cannot
-    recognize Boolean operators, weight labels, or prefix-match labels
-    in its input:
+    recognize Boolean and phrase search operators, weight labels,
+    or prefix-match labels in its input:
 
 <screen>
 SELECT plainto_tsquery('english', 'The Fat &amp; Rats:C');
@@ -827,6 +861,57 @@ SELECT plainto_tsquery('english', 'The Fat &amp; Rats:C');
     Here, all the input punctuation was discarded as being space symbols.
    </para>
 
+   <indexterm>
+    <primary>phraseto_tsquery</primary>
+   </indexterm>
+
+<synopsis>
+phraseto_tsquery(<optional> <replaceable class="PARAMETER">config</replaceable> <type>regconfig</>, </optional> <replaceable class="PARAMETER">querytext</replaceable> <type>text</>) returns <type>tsquery</>
+</synopsis>
+
+   <para>
+    <function>phraseto_tsquery</> behaves much like
+    <function>plainto_tsquery</>, with the exception
+    that it utilizes the <literal>&lt;-&gt;</literal> (FOLLOWED BY) phrase search
+    operator instead of the <literal>&amp;</literal> (AND) Boolean operator.
+    This is particularly useful when searching for exact lexeme sequences,
+    since the phrase search operator helps to maintain lexeme order.
+   </para>
+
+   <para>
+    Example:
+
+<screen>
+SELECT phraseto_tsquery('english', 'The Fat Rats');
+ phraseto_tsquery
+------------------
+ 'fat' &lt;-&gt; 'rat'
+</screen>
+
+    Just like the <function>plainto_tsquery</>, the
+    <function>phraseto_tsquery</> function cannot
+    recognize Boolean and phrase search operators, weight labels,
+    or prefix-match labels in its input:
+
+<screen>
+SELECT phraseto_tsquery('english', 'The Fat &amp; Rats:C');
+      phraseto_tsquery
+-----------------------------
+ ( 'fat' &lt;-&gt; 'rat' ) &lt;-&gt; 'c'
+</screen>
+
+    It is possible to specify the configuration to be used to parse the document,
+    for example, we could create a new one using the hunspell dictionary
+    (namely 'eng_hunspell') in order to match phrases with different word forms:
+
+<screen>
+SELECT phraseto_tsquery('eng_hunspell', 'developer of the building which collapsed');
+                                      phraseto_tsquery
+--------------------------------------------------------------------------------------------
+ ( 'developer' &lt;3&gt; 'building' ) &lt;2&gt; 'collapse' | ( 'developer' &lt;3&gt; 'build' ) &lt;2&gt; 'collapse'
+</screen>
+   </para>
+
   </sect2>
 
   <sect2 id="textsearch-ranking">
@@ -1387,6 +1472,81 @@ FROM (SELECT id, body, q, ts_rank_cd(ti, q) AS rank
 
     </varlistentry>
 
+    <varlistentry>
+
+     <term>
+      <literal><type>tsquery</> &lt;-&gt; <type>tsquery</></literal>
+     </term>
+
+     <listitem>
+      <para>
+       Returns the phrase-concatenation of the two given queries.
+
+<screen>
+SELECT to_tsquery('fat') &lt;-&gt; to_tsquery('cat | rat');
+             ?column?
+-----------------------------------
+ 'fat' &lt;-&gt; 'cat' | 'fat' &lt;-&gt; 'rat'
+</screen>
+      </para>
+     </listitem>
+
+    </varlistentry>
+
+    <varlistentry>
+
+     <term>
+     <indexterm>
+      <primary>tsquery_phrase</primary>
+     </indexterm>
+
+      <literal>tsquery_phrase(<replaceable class="PARAMETER">query1</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">query2</replaceable> <type>tsquery</> [, <replaceable class="PARAMETER">distance</replaceable> <type>integer</> ]) returns <type>tsquery</></literal>
+     </term>
+
+     <listitem>
+      <para>
+       Returns the distanced phrase-concatenation of the two given queries.
+       This function lies in the implementation of the <literal>&lt;-&gt;</> operator.
+
+<screen>
+SELECT tsquery_phrase(to_tsquery('fat'), to_tsquery('cat'), 10);
+  tsquery_phrase
+------------------
+ 'fat' &lt;10&gt; 'cat'
+</screen>
+      </para>
+     </listitem>
+
+    </varlistentry>
+
+    <varlistentry>
+
+     <term>
+     <indexterm>
+      <primary>setweight</primary>
+     </indexterm>
+
+      <literal>setweight(<replaceable class="PARAMETER">query</replaceable> <type>tsquery</>, <replaceable class="PARAMETER">weight</replaceable> <type>"char"</>) returns <type>tsquery</></literal>
+     </term>
+
+     <listitem>
+      <para>
+       <function>setweight</> returns a copy of the input query in which every
+       position has been labeled with the given <replaceable>weight</>(s), either
+       <literal>A</literal>, <literal>B</literal>, <literal>C</literal>,
+       <literal>D</literal> or their combination. These labels are retained when
+       queries are concatenated, allowing words from different parts of a document
+       to be weighted differently by ranking functions.
+      </para>
+
+      <para>
+       Note that weight labels apply to <emphasis>positions</>, not
+       <emphasis>lexemes</>.  If the input query has been stripped of
+       positions then <function>setweight</> does nothing.
+      </para>
+     </listitem>
+    </varlistentry>
+
     <varlistentry>
 
      <term>
@@ -2428,7 +2588,7 @@ more sample word(s) : more indexed word(s)
 
    <para>
     Specific stop words recognized by the subdictionary cannot be
-    specified;  instead use <literal>?</> to mark the location where any
+    specified;  instead use <literal>&lt;-&gt;</> to mark the location where any
     stop word can appear.  For example, assuming that <literal>a</> and
     <literal>the</> are stop words according to the subdictionary:
 
index aa77ec07281bbb7957d3dfa826802d04dafcd0ff..3f69d747028f4ddf371974d8782d48f5ed0a386f 100644 (file)
 #include "utils/builtins.h"
 
 
+typedef struct MorphOpaque
+{
+       Oid             cfg_id;
+       int             qoperator;      /* query operator */
+} MorphOpaque;
+
+
 Datum
 get_current_ts_config(PG_FUNCTION_ARGS)
 {
@@ -262,60 +269,81 @@ to_tsvector(PG_FUNCTION_ARGS)
  * to the stack.
  *
  * All words belonging to the same variant are pushed as an ANDed list,
- * and different variants are ORred together.
+ * and different variants are ORed together.
  */
 static void
 pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
 {
-       int32           count = 0;
-       ParsedText      prs;
-       uint32          variant,
-                               pos,
-                               cntvar = 0,
-                               cntpos = 0,
-                               cnt = 0;
-       Oid                     cfg_id = DatumGetObjectId(opaque);              /* the input is actually
-                                                                                                                * an Oid, not a pointer */
+       int32                   count = 0;
+       ParsedText              prs;
+       uint32                  variant,
+                                       pos = 0,
+                                       cntvar = 0,
+                                       cntpos = 0,
+                                       cnt = 0;
+       MorphOpaque        *data = (MorphOpaque *) DatumGetPointer(opaque);
 
        prs.lenwords = 4;
        prs.curwords = 0;
        prs.pos = 0;
        prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
 
-       parsetext(cfg_id, &prs, strval, lenval);
+       parsetext(data->cfg_id, &prs, strval, lenval);
 
        if (prs.curwords > 0)
        {
-
                while (count < prs.curwords)
                {
-                       pos = prs.words[count].pos.pos;
+                       /*
+                        * Were any stop words removed? If so, fill empty positions
+                        * with placeholders linked by an appropriate operator.
+                        */
+                       if (pos > 0 && pos + 1 < prs.words[count].pos.pos)
+                       {
+                               while (pos + 1 < prs.words[count].pos.pos)
+                               {
+                                       /* put placeholders for each missing stop word */
+                                       pushStop(state);
+                                       if (cntpos)
+                                               pushOperator(state, data->qoperator, 1);
+                                       cntpos++;
+                                       pos++;
+                               }
+                       }
+
+                       pos = prs.words[count].pos.pos; /* save current word's position */
+
+                       /* Go through all variants obtained from this token */
                        cntvar = 0;
                        while (count < prs.curwords && pos == prs.words[count].pos.pos)
                        {
                                variant = prs.words[count].nvariant;
 
+                               /* Push all words belonging to the same variant */
                                cnt = 0;
-                               while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
+                               while (count < prs.curwords &&
+                                          pos == prs.words[count].pos.pos &&
+                                          variant == prs.words[count].nvariant)
                                {
-
-                                       pushValue(state, prs.words[count].word, prs.words[count].len, weight,
-                                                         ((prs.words[count].flags & TSL_PREFIX) || prefix) ? true : false);
+                                       pushValue(state,
+                                                         prs.words[count].word,
+                                                         prs.words[count].len,
+                                                         weight,
+                                                         ((prs.words[count].flags & TSL_PREFIX) || prefix));
                                        pfree(prs.words[count].word);
                                        if (cnt)
-                                               pushOperator(state, OP_AND);
+                                               pushOperator(state, OP_AND, 0);
                                        cnt++;
                                        count++;
                                }
 
                                if (cntvar)
-                                       pushOperator(state, OP_OR);
+                                       pushOperator(state, OP_OR, 0);
                                cntvar++;
                        }
 
                        if (cntpos)
-                               pushOperator(state, OP_AND);
-
+                               pushOperator(state, data->qoperator, 1); /* distance may be useful */
                        cntpos++;
                }
 
@@ -329,44 +357,18 @@ pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval,
 Datum
 to_tsquery_byid(PG_FUNCTION_ARGS)
 {
-       Oid                     cfgid = PG_GETARG_OID(0);
-       text       *in = PG_GETARG_TEXT_P(1);
-       TSQuery         query;
-       QueryItem  *res;
-       int32           len;
-
-       query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), false);
-
-       if (query->size == 0)
-               PG_RETURN_TSQUERY(query);
-
-       /* clean out any stopword placeholders from the tree */
-       res = clean_fakeval(GETQUERY(query), &len);
-       if (!res)
-       {
-               SET_VARSIZE(query, HDRSIZETQ);
-               query->size = 0;
-               PG_RETURN_POINTER(query);
-       }
-       memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
+       text               *in = PG_GETARG_TEXT_P(1);
+       TSQuery                 query;
+       MorphOpaque             data;
 
-       /*
-        * Removing the stopword placeholders might've resulted in fewer
-        * QueryItems. If so, move the operands up accordingly.
-        */
-       if (len != query->size)
-       {
-               char       *oldoperand = GETOPERAND(query);
-               int32           lenoperand = VARSIZE(query) - (oldoperand - (char *) query);
+       data.cfg_id = PG_GETARG_OID(0);
+       data.qoperator = OP_AND;
 
-               Assert(len < query->size);
-
-               query->size = len;
-               memmove((void *) GETOPERAND(query), oldoperand, VARSIZE(query) - (oldoperand - (char *) query));
-               SET_VARSIZE(query, COMPUTESIZE(len, lenoperand));
-       }
+       query = parse_tsquery(text_to_cstring(in),
+                                                 pushval_morph,
+                                                 PointerGetDatum(&data),
+                                                 false);
 
-       pfree(res);
        PG_RETURN_TSQUERY(query);
 }
 
@@ -385,55 +387,60 @@ to_tsquery(PG_FUNCTION_ARGS)
 Datum
 plainto_tsquery_byid(PG_FUNCTION_ARGS)
 {
-       Oid                     cfgid = PG_GETARG_OID(0);
-       text       *in = PG_GETARG_TEXT_P(1);
-       TSQuery         query;
-       QueryItem  *res;
-       int32           len;
+       text               *in = PG_GETARG_TEXT_P(1);
+       TSQuery                 query;
+       MorphOpaque             data;
 
-       query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), true);
+       data.cfg_id = PG_GETARG_OID(0);
+       data.qoperator = OP_AND;
 
-       if (query->size == 0)
-               PG_RETURN_TSQUERY(query);
+       query = parse_tsquery(text_to_cstring(in),
+                                                 pushval_morph,
+                                                 PointerGetDatum(&data),
+                                                 true);
 
-       /* clean out any stopword placeholders from the tree */
-       res = clean_fakeval(GETQUERY(query), &len);
-       if (!res)
-       {
-               SET_VARSIZE(query, HDRSIZETQ);
-               query->size = 0;
-               PG_RETURN_POINTER(query);
-       }
-       memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
+       PG_RETURN_POINTER(query);
+}
 
-       /*
-        * Removing the stopword placeholders might've resulted in fewer
-        * QueryItems. If so, move the operands up accordingly.
-        */
-       if (len != query->size)
-       {
-               char       *oldoperand = GETOPERAND(query);
-               int32           lenoperand = VARSIZE(query) - (oldoperand - (char *) query);
+Datum
+plainto_tsquery(PG_FUNCTION_ARGS)
+{
+       text       *in = PG_GETARG_TEXT_P(0);
+       Oid                     cfgId;
+
+       cfgId = getTSCurrentConfig(true);
+       PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid,
+                                                                               ObjectIdGetDatum(cfgId),
+                                                                               PointerGetDatum(in)));
+}
 
-               Assert(len < query->size);
 
-               query->size = len;
-               memmove((void *) GETOPERAND(query), oldoperand, lenoperand);
-               SET_VARSIZE(query, COMPUTESIZE(len, lenoperand));
-       }
+Datum
+phraseto_tsquery_byid(PG_FUNCTION_ARGS)
+{
+       text               *in = PG_GETARG_TEXT_P(1);
+       TSQuery                 query;
+       MorphOpaque             data;
 
-       pfree(res);
-       PG_RETURN_POINTER(query);
+       data.cfg_id = PG_GETARG_OID(0);
+       data.qoperator = OP_PHRASE;
+
+       query = parse_tsquery(text_to_cstring(in),
+                                                 pushval_morph,
+                                                 PointerGetDatum(&data),
+                                                 true);
+
+       PG_RETURN_TSQUERY(query);
 }
 
 Datum
-plainto_tsquery(PG_FUNCTION_ARGS)
+phraseto_tsquery(PG_FUNCTION_ARGS)
 {
        text       *in = PG_GETARG_TEXT_P(0);
        Oid                     cfgId;
 
        cfgId = getTSCurrentConfig(true);
-       PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid,
+       PG_RETURN_DATUM(DirectFunctionCall2(phraseto_tsquery_byid,
                                                                                ObjectIdGetDatum(cfgId),
                                                                                PointerGetDatum(in)));
 }
index 64cf906a5ab1dc261b94fc70c689652d1ce46172..f0e4269e8438712fd0856885190d51499d7fb224 100644 (file)
@@ -454,7 +454,7 @@ hladdword(HeadlineParsedText *prs, char *buf, int buflen, int type)
 }
 
 static void
-hlfinditem(HeadlineParsedText *prs, TSQuery query, char *buf, int buflen)
+hlfinditem(HeadlineParsedText *prs, TSQuery query, int32 pos, char *buf, int buflen)
 {
        int                     i;
        QueryItem  *item = GETQUERY(query);
@@ -467,6 +467,7 @@ hlfinditem(HeadlineParsedText *prs, TSQuery query, char *buf, int buflen)
        }
 
        word = &(prs->words[prs->curwords - 1]);
+       word->pos = LIMITPOS(pos);
        for (i = 0; i < query->size; i++)
        {
                if (item->type == QI_VAL &&
@@ -492,17 +493,20 @@ addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme
 {
        ParsedLex  *tmplexs;
        TSLexeme   *ptr;
+       int32           savedpos;
 
        while (lexs)
        {
-
                if (lexs->type > 0)
                        hladdword(prs, lexs->lemm, lexs->lenlemm, lexs->type);
 
                ptr = norms;
+               savedpos = prs->vectorpos;
                while (ptr && ptr->lexeme)
                {
-                       hlfinditem(prs, query, ptr->lexeme, strlen(ptr->lexeme));
+                       if (ptr->flags & TSL_ADDPOS)
+                               savedpos++;
+                       hlfinditem(prs, query, savedpos, ptr->lexeme, strlen(ptr->lexeme));
                        ptr++;
                }
 
@@ -516,6 +520,8 @@ addHLParsedLex(HeadlineParsedText *prs, TSQuery query, ParsedLex *lexs, TSLexeme
                ptr = norms;
                while (ptr->lexeme)
                {
+                       if (ptr->flags & TSL_ADDPOS)
+                               prs->vectorpos++;
                        pfree(ptr->lexeme);
                        ptr++;
                }
@@ -575,7 +581,10 @@ hlparsetext(Oid cfgId, HeadlineParsedText *prs, TSQuery query, char *buf, int bu
                do
                {
                        if ((norms = LexizeExec(&ldata, &lexs)) != NULL)
+                       {
+                               prs->vectorpos++;
                                addHLParsedLex(prs, query, lexs, norms);
+                       }
                        else
                                addHLParsedLex(prs, query, lexs, NULL);
                } while (norms);
index 7462888b5ca24247207cfe73a208eade7723fc63..c4118f1db2655c3112ba32b8fe84400a67f31030 100644 (file)
@@ -261,7 +261,7 @@ mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem,
 /*
  * Traverse the tsquery in preorder, calculating selectivity as:
  *
- *      selec(left_oper) * selec(right_oper) in AND nodes,
+ *      selec(left_oper) * selec(right_oper) in AND & PHRASE nodes,
  *
  *      selec(left_oper) + selec(right_oper) -
  *             selec(left_oper) * selec(right_oper) in OR nodes,
@@ -400,6 +400,7 @@ tsquery_opr_selec(QueryItem *item, char *operand,
                                                                                                lookup, length, minfreq);
                                break;
 
+                       case OP_PHRASE:
                        case OP_AND:
                                s1 = tsquery_opr_selec(item + 1, operand,
                                                                           lookup, length, minfreq);
index 4a28ce7545a04703a13554c393bbb61a65a79538..2faa15ebd4cb6fbd7f5bf7f402abdbcf9a2b68ae 100644 (file)
@@ -2030,15 +2030,36 @@ typedef struct
 } hlCheck;
 
 static bool
-checkcondition_HL(void *checkval, QueryOperand *val)
+checkcondition_HL(void *opaque, QueryOperand *val, ExecPhraseData *data)
 {
        int                     i;
+       hlCheck    *checkval = (hlCheck *) opaque;
 
-       for (i = 0; i < ((hlCheck *) checkval)->len; i++)
+       for (i = 0; i < checkval->len; i++)
        {
-               if (((hlCheck *) checkval)->words[i].item == val)
-                       return true;
+               if (checkval->words[i].item == val)
+               {
+                       /* don't need to find all positions */
+                       if (!data)
+                               return true;
+
+                       if (!data->pos)
+                       {
+                               data->pos = palloc(sizeof(WordEntryPos) * checkval->len);
+                               data->allocated = true;
+                               data->npos = 1;
+                               data->pos[0] = checkval->words[i].pos;
+                       }
+                       else if (data->pos[data->npos - 1] < checkval->words[i].pos)
+                       {
+                               data->pos[data->npos++] = checkval->words[i].pos;
+                       }
+               }
        }
+
+       if (data && data->npos > 0)
+               return true;
+
        return false;
 }
 
@@ -2400,7 +2421,7 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, int highlight,
 
                        if (poslen < bestlen && !(NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword))
                        {
-                               /* best already finded, so try one more cover */
+                               /* best already found, so try one more cover */
                                p++;
                                continue;
                        }
index fef594700aa0ec02fa85e4944a1bfa994672fe42..fc0686ee66b894d65c75ab6bebdd99b3ec0b778f 100644 (file)
@@ -179,14 +179,16 @@ typedef struct
 } GinChkVal;
 
 static GinTernaryValue
-checkcondition_gin(void *checkval, QueryOperand *val)
+checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *data)
 {
-       GinChkVal  *gcv = (GinChkVal *) checkval;
        int                     j;
 
-       /* if any val requiring a weight is used, set recheck flag */
-       if (val->weight != 0)
-               *(gcv->need_recheck) = true;
+       /*
+        * if any val requiring a weight is used or caller
+        * needs position information then set recheck flag
+        */
+       if (val->weight != 0 || data != NULL)
+               *gcv->need_recheck = true;
 
        /* convert item's number to corresponding entry's (operand's) number */
        j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
@@ -195,16 +197,22 @@ checkcondition_gin(void *checkval, QueryOperand *val)
        return gcv->check[j];
 }
 
+/*
+ * Wrapper of check condition function for TS_execute.
+ */
+static bool
+checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
+{
+       return checkcondition_gin_internal((GinChkVal *) checkval,
+                                                                          val,
+                                                                          data) != GIN_FALSE;
+}
+
 /*
  * Evaluate tsquery boolean expression using ternary logic.
- *
- * chkcond is a callback function used to evaluate each VAL node in the query.
- * checkval can be used to pass information to the callback. TS_execute doesn't
- * do anything with it.
  */
 static GinTernaryValue
-TS_execute_ternary(QueryItem *curitem, void *checkval,
-                         GinTernaryValue (*chkcond) (void *checkval, QueryOperand *val))
+TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem)
 {
        GinTernaryValue val1,
                                val2,
@@ -214,22 +222,30 @@ TS_execute_ternary(QueryItem *curitem, void *checkval,
        check_stack_depth();
 
        if (curitem->type == QI_VAL)
-               return chkcond(checkval, (QueryOperand *) curitem);
+               return checkcondition_gin_internal(gcv,
+                                                                                  (QueryOperand *) curitem,
+                                                                                  NULL /* don't have any position info */);
 
        switch (curitem->qoperator.oper)
        {
                case OP_NOT:
-                       result = TS_execute_ternary(curitem + 1, checkval, chkcond);
+                       result = TS_execute_ternary(gcv, curitem + 1);
                        if (result == GIN_MAYBE)
                                return result;
                        return !result;
 
+               case OP_PHRASE:
+                       /*
+                        * GIN doesn't contain any information about positions,
+                        * treat OP_PHRASE as OP_AND with recheck requirement
+                        */
+                       *gcv->need_recheck = true;
+
                case OP_AND:
-                       val1 = TS_execute_ternary(curitem + curitem->qoperator.left,
-                                                                         checkval, chkcond);
+                       val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left);
                        if (val1 == GIN_FALSE)
                                return GIN_FALSE;
-                       val2 = TS_execute_ternary(curitem + 1, checkval, chkcond);
+                       val2 = TS_execute_ternary(gcv, curitem + 1);
                        if (val2 == GIN_FALSE)
                                return GIN_FALSE;
                        if (val1 == GIN_TRUE && val2 == GIN_TRUE)
@@ -238,11 +254,10 @@ TS_execute_ternary(QueryItem *curitem, void *checkval,
                                return GIN_MAYBE;
 
                case OP_OR:
-                       val1 = TS_execute_ternary(curitem + curitem->qoperator.left,
-                                                                         checkval, chkcond);
+                       val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left);
                        if (val1 == GIN_TRUE)
                                return GIN_TRUE;
-                       val2 = TS_execute_ternary(curitem + 1, checkval, chkcond);
+                       val2 = TS_execute_ternary(gcv, curitem + 1);
                        if (val2 == GIN_TRUE)
                                return GIN_TRUE;
                        if (val1 == GIN_FALSE && val2 == GIN_FALSE)
@@ -327,9 +342,7 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
                gcv.map_item_operand = (int *) (extra_data[0]);
                gcv.need_recheck = &recheck;
 
-               res = TS_execute_ternary(GETQUERY(query),
-                                                                &gcv,
-                                                                checkcondition_gin);
+               res = TS_execute_ternary(&gcv, GETQUERY(query));
 
                if (res == GIN_TRUE && recheck)
                        res = GIN_MAYBE;
index 0100cf4f37289c375d36f6e27a5fe3b0f67a7f5b..cdd5d43fce5f33c3ee9ab013fcc0a0cf16b612fb 100644 (file)
@@ -298,7 +298,7 @@ typedef struct
  * is there value 'val' in array or not ?
  */
 static bool
-checkcondition_arr(void *checkval, QueryOperand *val)
+checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data)
 {
        int32      *StopLow = ((CHKVAL *) checkval)->arrb;
        int32      *StopHigh = ((CHKVAL *) checkval)->arre;
@@ -327,7 +327,7 @@ checkcondition_arr(void *checkval, QueryOperand *val)
 }
 
 static bool
-checkcondition_bit(void *checkval, QueryOperand *val)
+checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data)
 {
        /*
         * we are not able to find a prefix in signature tree
index 0732060678449bf8a3476a706ac37239125bb882..257b5d33456910aaf0c7496d407197ce0995515f 100644 (file)
@@ -56,7 +56,7 @@ struct TSQueryParserStateData
 
 /*
  * subroutine to parse the modifiers (weight and prefix flag currently)
- * part, like ':1AB' of a query.
+ * part, like ':AB*' of a query.
  */
 static char *
 get_modifiers(char *buf, int16 *weight, bool *prefix)
@@ -100,6 +100,94 @@ get_modifiers(char *buf, int16 *weight, bool *prefix)
        return buf;
 }
 
+/*
+ * Parse phrase operator. The operator
+ * may take the following forms:
+ *
+ *             a <X> b (distance is no greater than X)
+ *             a <-> b (default distance = 1)
+ *
+ * The buffer should begin with '<' char
+ */
+static char *
+parse_phrase_operator(char *buf, int16 *distance)
+{
+       enum
+       {
+               PHRASE_OPEN = 0,
+               PHRASE_DIST,
+               PHRASE_CLOSE,
+               PHRASE_ERR,
+               PHRASE_FINISH
+       }               state = PHRASE_OPEN;
+
+       char   *ptr = buf;
+       char   *endptr;
+       long    l = 1;
+
+       while (*ptr)
+       {
+               switch(state)
+               {
+                       case PHRASE_OPEN:
+                               Assert(t_iseq(ptr, '<'));
+                               state = PHRASE_DIST;
+                               ptr++;
+                               break;
+
+                       case PHRASE_DIST:
+                               if (t_iseq(ptr, '-'))
+                               {
+                                       state = PHRASE_CLOSE;
+                                       ptr++;
+                                       break;
+                               }
+                               else if (!t_isdigit(ptr))
+                               {
+                                       state = PHRASE_ERR;
+                                       break;
+                               }
+
+                               l = strtol(ptr, &endptr, 10);
+                               if (ptr == endptr)
+                                       state = PHRASE_ERR;
+                               else if (errno == ERANGE || l > MAXENTRYPOS)
+                                       ereport(ERROR,
+                                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                                        errmsg("distance in phrase operator should not be greater than %d",
+                                                                       MAXENTRYPOS)));
+                               else
+                               {
+                                       state = PHRASE_CLOSE;
+                                       ptr = endptr;
+                               }
+                               break;
+
+                       case PHRASE_CLOSE:
+                               if (t_iseq(ptr, '>'))
+                               {
+                                       state = PHRASE_FINISH;
+                                       ptr++;
+                               }
+                               else
+                                       state = PHRASE_ERR;
+                               break;
+
+                       case PHRASE_FINISH:
+                               *distance = (int16) l;
+                               return ptr;
+
+                       case PHRASE_ERR:
+                       default:
+                               goto err;
+               }
+       }
+
+       err:
+       *distance = -1;
+       return buf;
+}
+
 /*
  * token types for parsing
  */
@@ -116,8 +204,10 @@ typedef enum
 /*
  * get token from query string
  *
- * *operator is filled in with OP_* when return values is PT_OPR
+ * *operator is filled in with OP_* when return values is PT_OPR,
+ * but *weight could contain a distance value in case of phrase operator.
  * *strval, *lenval and *weight are filled in when return value is PT_VAL
+ *
  */
 static ts_tokentype
 gettoken_query(TSQueryParserState state,
@@ -185,13 +275,23 @@ gettoken_query(TSQueryParserState state,
                                        (state->buf)++;
                                        return PT_OPR;
                                }
-                               if (t_iseq(state->buf, '|'))
+                               else if (t_iseq(state->buf, '|'))
                                {
                                        state->state = WAITOPERAND;
                                        *operator = OP_OR;
                                        (state->buf)++;
                                        return PT_OPR;
                                }
+                               else if (t_iseq(state->buf, '<'))
+                               {
+                                       state->state = WAITOPERAND;
+                                       *operator = OP_PHRASE;
+                                       /* weight var is used as storage for distance */
+                                       state->buf = parse_phrase_operator(state->buf, weight);
+                                       if (*weight < 0)
+                                               return PT_ERR;
+                                       return PT_OPR;
+                               }
                                else if (t_iseq(state->buf, ')'))
                                {
                                        (state->buf)++;
@@ -223,15 +323,16 @@ gettoken_query(TSQueryParserState state,
  * Push an operator to state->polstr
  */
 void
-pushOperator(TSQueryParserState state, int8 oper)
+pushOperator(TSQueryParserState state, int8 oper, int16 distance)
 {
        QueryOperator *tmp;
 
-       Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR);
+       Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR || oper == OP_PHRASE);
 
        tmp = (QueryOperator *) palloc0(sizeof(QueryOperator));
        tmp->type = QI_OPR;
        tmp->oper = oper;
+       tmp->distance = (oper == OP_PHRASE) ? distance : 0;
        /* left is filled in later with findoprnd */
 
        state->polstr = lcons(tmp, state->polstr);
@@ -330,14 +431,18 @@ makepol(TSQueryParserState state,
                PushFunction pushval,
                Datum opaque)
 {
-       int8            operator = 0;
-       ts_tokentype type;
-       int                     lenval = 0;
-       char       *strval = NULL;
-       int8            opstack[STACKDEPTH];
-       int                     lenstack = 0;
-       int16           weight = 0;
-       bool            prefix;
+       int8                    operator = 0;
+       ts_tokentype    type;
+       int                             lenval = 0;
+       char               *strval = NULL;
+       struct
+       {
+               int8    op;
+               int16   distance;
+       }                               opstack[STACKDEPTH];
+       int                             lenstack = 0;
+       int16                   weight = 0;
+       bool                    prefix;
 
        /* since this function recurses, it could be driven to stack overflow */
        check_stack_depth();
@@ -348,39 +453,48 @@ makepol(TSQueryParserState state,
                {
                        case PT_VAL:
                                pushval(opaque, state, strval, lenval, weight, prefix);
-                               while (lenstack && (opstack[lenstack - 1] == OP_AND ||
-                                                                       opstack[lenstack - 1] == OP_NOT))
+                               while (lenstack && (opstack[lenstack - 1].op == OP_AND ||
+                                                                       opstack[lenstack - 1].op == OP_PHRASE ||
+                                                                       opstack[lenstack - 1].op == OP_NOT))
                                {
                                        lenstack--;
-                                       pushOperator(state, opstack[lenstack]);
+                                       pushOperator(state,
+                                                                opstack[lenstack].op,
+                                                                opstack[lenstack].distance);
                                }
                                break;
                        case PT_OPR:
                                if (lenstack && operator == OP_OR)
-                                       pushOperator(state, OP_OR);
+                                       pushOperator(state, OP_OR, 0);
                                else
                                {
                                        if (lenstack == STACKDEPTH) /* internal error */
                                                elog(ERROR, "tsquery stack too small");
-                                       opstack[lenstack] = operator;
+                                       opstack[lenstack].op = operator;
+                                       opstack[lenstack].distance = weight;
                                        lenstack++;
                                }
                                break;
                        case PT_OPEN:
                                makepol(state, pushval, opaque);
 
-                               while (lenstack && (opstack[lenstack - 1] == OP_AND ||
-                                                                       opstack[lenstack - 1] == OP_NOT))
+                               while (lenstack && (opstack[lenstack - 1].op == OP_AND ||
+                                                                       opstack[lenstack - 1].op == OP_PHRASE ||
+                                                                       opstack[lenstack - 1].op == OP_NOT))
                                {
                                        lenstack--;
-                                       pushOperator(state, opstack[lenstack]);
+                                       pushOperator(state,
+                                                                opstack[lenstack].op,
+                                                                opstack[lenstack].distance);
                                }
                                break;
                        case PT_CLOSE:
                                while (lenstack)
                                {
                                        lenstack--;
-                                       pushOperator(state, opstack[lenstack]);
+                                       pushOperator(state,
+                                                                opstack[lenstack].op,
+                                                                opstack[lenstack].distance);
                                };
                                return;
                        case PT_ERR:
@@ -394,12 +508,14 @@ makepol(TSQueryParserState state,
        while (lenstack)
        {
                lenstack--;
-               pushOperator(state, opstack[lenstack]);
+               pushOperator(state,
+                                        opstack[lenstack].op,
+                                        opstack[lenstack].distance);
        }
 }
 
 static void
-findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes)
+findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup)
 {
        /* since this function recurses, it could be driven to stack overflow. */
        check_stack_depth();
@@ -407,10 +523,13 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes)
        if (*pos >= nnodes)
                elog(ERROR, "malformed tsquery: operand not found");
 
-       if (ptr[*pos].type == QI_VAL ||
-               ptr[*pos].type == QI_VALSTOP)   /* need to handle VALSTOP here, they
-                                                                                * haven't been cleaned away yet. */
+       if (ptr[*pos].type == QI_VAL)
+       {
+               (*pos)++;
+       }
+       else if (ptr[*pos].type == QI_VALSTOP)
        {
+               *needcleanup = true; /* we'll have to remove stop words */
                (*pos)++;
        }
        else
@@ -419,21 +538,32 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes)
 
                if (ptr[*pos].qoperator.oper == OP_NOT)
                {
-                       ptr[*pos].qoperator.left = 1;
+                       ptr[*pos].qoperator.left = 1; /* fixed offset */
                        (*pos)++;
-                       findoprnd_recurse(ptr, pos, nnodes);
+
+                       /* process the only argument */
+                       findoprnd_recurse(ptr, pos, nnodes, needcleanup);
                }
                else
                {
-                       QueryOperator *curitem = &ptr[*pos].qoperator;
-                       int                     tmp = *pos;
+                       QueryOperator  *curitem = &ptr[*pos].qoperator;
+                       int                             tmp = *pos; /* save current position */
+
+                       Assert(curitem->oper == OP_AND ||
+                                  curitem->oper == OP_OR ||
+                                  curitem->oper == OP_PHRASE);
 
-                       Assert(curitem->oper == OP_AND || curitem->oper == OP_OR);
+                       if (curitem->oper == OP_PHRASE)
+                               *needcleanup = true; /* push OP_PHRASE down later */
 
                        (*pos)++;
-                       findoprnd_recurse(ptr, pos, nnodes);
-                       curitem->left = *pos - tmp;
-                       findoprnd_recurse(ptr, pos, nnodes);
+
+                       /* process RIGHT argument */
+                       findoprnd_recurse(ptr, pos, nnodes, needcleanup);
+                       curitem->left = *pos - tmp; /* set LEFT arg's offset */
+
+                       /* process LEFT argument */
+                       findoprnd_recurse(ptr, pos, nnodes, needcleanup);
                }
        }
 }
@@ -444,12 +574,13 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes)
  * QueryItems must be in polish (prefix) notation.
  */
 static void
-findoprnd(QueryItem *ptr, int size)
+findoprnd(QueryItem *ptr, int size, bool *needcleanup)
 {
        uint32          pos;
 
+       *needcleanup = false;
        pos = 0;
-       findoprnd_recurse(ptr, &pos, size);
+       findoprnd_recurse(ptr, &pos, size, needcleanup);
 
        if (pos != size)
                elog(ERROR, "malformed tsquery: extra nodes");
@@ -466,9 +597,6 @@ findoprnd(QueryItem *ptr, int size)
  *
  * opaque is passed on to pushval as is, pushval can use it to store its
  * private state.
- *
- * The returned query might contain QI_STOPVAL nodes. The caller is responsible
- * for cleaning them up (with clean_fakeval)
  */
 TSQuery
 parse_tsquery(char *buf,
@@ -482,6 +610,7 @@ parse_tsquery(char *buf,
        int                     commonlen;
        QueryItem  *ptr;
        ListCell   *cell;
+       bool            needcleanup;
 
        /* init state */
        state.buffer = buf;
@@ -531,7 +660,7 @@ parse_tsquery(char *buf,
        i = 0;
        foreach(cell, state.polstr)
        {
-               QueryItem  *item = (QueryItem *) lfirst(cell);
+               QueryItem *item = (QueryItem *) lfirst(cell);
 
                switch (item->type)
                {
@@ -555,7 +684,14 @@ parse_tsquery(char *buf,
        pfree(state.op);
 
        /* Set left operand pointers for every operator. */
-       findoprnd(ptr, query->size);
+       findoprnd(ptr, query->size, &needcleanup);
+
+       /*
+        * QI_VALSTOP nodes should be cleaned and
+        * and OP_PHRASE should be pushed down
+        */
+       if (needcleanup)
+               return cleanup_fakeval_and_phrase(query);
 
        return query;
 }
@@ -600,12 +736,15 @@ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
        (inf)->cur = (inf)->buf + len; \
 }
 
+#define PRINT_PRIORITY(x) \
+       ( (QO_PRIORITY(x) == OP_NOT) ? OP_NOT_PHRASE : QO_PRIORITY(x) )
+
 /*
- * recursive walk on tree and print it in
- * infix (human-readable) view
+ * recursively traverse the tree and
+ * print it in infix (human-readable) form
  */
 static void
-infix(INFIX *in, bool first)
+infix(INFIX *in, int parentPriority)
 {
        /* since this function recurses, it could be driven to stack overflow. */
        check_stack_depth();
@@ -674,24 +813,22 @@ infix(INFIX *in, bool first)
        }
        else if (in->curpol->qoperator.oper == OP_NOT)
        {
-               bool            isopr = false;
+               int             priority = PRINT_PRIORITY(in->curpol);
 
-               RESIZEBUF(in, 1);
-               *(in->cur) = '!';
-               in->cur++;
-               *(in->cur) = '\0';
-               in->curpol++;
-
-               if (in->curpol->type == QI_OPR)
+               if (priority < parentPriority)
                {
-                       isopr = true;
                        RESIZEBUF(in, 2);
                        sprintf(in->cur, "( ");
                        in->cur = strchr(in->cur, '\0');
                }
+               RESIZEBUF(in, 1);
+               *(in->cur) = '!';
+               in->cur++;
+               *(in->cur) = '\0';
+               in->curpol++;
 
-               infix(in, isopr);
-               if (isopr)
+               infix(in, priority);
+               if (priority < parentPriority)
                {
                        RESIZEBUF(in, 2);
                        sprintf(in->cur, " )");
@@ -701,11 +838,18 @@ infix(INFIX *in, bool first)
        else
        {
                int8            op = in->curpol->qoperator.oper;
+               int                     priority = PRINT_PRIORITY(in->curpol);
+               int16           distance = in->curpol->qoperator.distance;
                INFIX           nrm;
+               bool            needParenthesis = false;
 
                in->curpol++;
-               if (op == OP_OR && !first)
+               if (priority < parentPriority ||
+                       (op == OP_PHRASE &&
+                               (priority == parentPriority || /* phrases are not commutative! */
+                                       parentPriority == OP_PRIORITY(OP_AND))))
                {
+                       needParenthesis = true;
                        RESIZEBUF(in, 2);
                        sprintf(in->cur, "( ");
                        in->cur = strchr(in->cur, '\0');
@@ -717,14 +861,14 @@ infix(INFIX *in, bool first)
                nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
 
                /* get right operand */
-               infix(&nrm, false);
+               infix(&nrm, priority);
 
                /* get & print left operand */
                in->curpol = nrm.curpol;
-               infix(in, false);
+               infix(in, priority);
 
                /* print operator & right operand */
-               RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
+               RESIZEBUF(in, 3 + (2 + 10 /* distance */) + (nrm.cur - nrm.buf));
                switch (op)
                {
                        case OP_OR:
@@ -733,6 +877,12 @@ infix(INFIX *in, bool first)
                        case OP_AND:
                                sprintf(in->cur, " & %s", nrm.buf);
                                break;
+                       case OP_PHRASE:
+                               if (distance != 1)
+                                       sprintf(in->cur, " <%d> %s", distance, nrm.buf);
+                               else
+                                       sprintf(in->cur, " <-> %s", nrm.buf);
+                               break;
                        default:
                                /* OP_NOT is handled in above if-branch */
                                elog(ERROR, "unrecognized operator type: %d", op);
@@ -740,7 +890,7 @@ infix(INFIX *in, bool first)
                in->cur = strchr(in->cur, '\0');
                pfree(nrm.buf);
 
-               if (op == OP_OR && !first)
+               if (needParenthesis)
                {
                        RESIZEBUF(in, 2);
                        sprintf(in->cur, " )");
@@ -749,7 +899,6 @@ infix(INFIX *in, bool first)
        }
 }
 
-
 Datum
 tsqueryout(PG_FUNCTION_ARGS)
 {
@@ -768,7 +917,7 @@ tsqueryout(PG_FUNCTION_ARGS)
        nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
        *(nrm.cur) = '\0';
        nrm.op = GETOPERAND(query);
-       infix(&nrm, true);
+       infix(&nrm, -1 /* lowest priority */);
 
        PG_FREE_IF_COPY(query, 0);
        PG_RETURN_CSTRING(nrm.buf);
@@ -789,7 +938,8 @@ tsqueryout(PG_FUNCTION_ARGS)
  *
  * For each operator:
  * uint8       type, QI_OPR
- * uint8       operator, one of OP_AND, OP_OR, OP_NOT.
+ * uint8       operator, one of OP_AND, OP_PHRASE OP_OR, OP_NOT.
+ * uint16      distance (only for OP_PHRASE)
  */
 Datum
 tsquerysend(PG_FUNCTION_ARGS)
@@ -815,6 +965,9 @@ tsquerysend(PG_FUNCTION_ARGS)
                                break;
                        case QI_OPR:
                                pq_sendint(&buf, item->qoperator.oper, sizeof(item->qoperator.oper));
+                               if (item->qoperator.oper == OP_PHRASE)
+                                       pq_sendint(&buf, item->qoperator.distance,
+                                                          sizeof(item->qoperator.distance));
                                break;
                        default:
                                elog(ERROR, "unrecognized tsquery node type: %d", item->type);
@@ -830,15 +983,16 @@ tsquerysend(PG_FUNCTION_ARGS)
 Datum
 tsqueryrecv(PG_FUNCTION_ARGS)
 {
-       StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
-       TSQuery         query;
-       int                     i,
-                               len;
-       QueryItem  *item;
-       int                     datalen;
-       char       *ptr;
-       uint32          size;
-       const char **operands;
+       StringInfo              buf = (StringInfo) PG_GETARG_POINTER(0);
+       TSQuery                 query;
+       int                             i,
+                                       len;
+       QueryItem          *item;
+       int                             datalen;
+       char               *ptr;
+       uint32                  size;
+       const char        **operands;
+       bool                    needcleanup;
 
        size = pq_getmsgint(buf, sizeof(uint32));
        if (size > (MaxAllocSize / sizeof(QueryItem)))
@@ -907,13 +1061,15 @@ tsqueryrecv(PG_FUNCTION_ARGS)
                        int8            oper;
 
                        oper = (int8) pq_getmsgint(buf, sizeof(int8));
-                       if (oper != OP_NOT && oper != OP_OR && oper != OP_AND)
+                       if (oper != OP_NOT && oper != OP_OR && oper != OP_AND && oper != OP_PHRASE)
                                elog(ERROR, "invalid tsquery: unrecognized operator type %d",
                                         (int) oper);
                        if (i == size - 1)
                                elog(ERROR, "invalid pointer to right operand");
 
                        item->qoperator.oper = oper;
+                       if (oper == OP_PHRASE)
+                               item->qoperator.distance = (int16) pq_getmsgint(buf, sizeof(int16));
                }
                else
                        elog(ERROR, "unrecognized tsquery node type: %d", item->type);
@@ -930,7 +1086,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
         * Fill in the left-pointers. Checks that the tree is well-formed as a
         * side-effect.
         */
-       findoprnd(item, size);
+       findoprnd(item, size, &needcleanup);
 
        /* Copy operands to output struct */
        for (i = 0; i < size; i++)
@@ -949,7 +1105,10 @@ tsqueryrecv(PG_FUNCTION_ARGS)
 
        SET_VARSIZE(query, len + datalen);
 
-       PG_RETURN_TSVECTOR(query);
+       if (needcleanup)
+               PG_RETURN_TSQUERY(cleanup_fakeval_and_phrase(query));
+
+       PG_RETURN_TSQUERY(query);
 }
 
 /*
index 333789be3c5a5e9ce7a67e5a55dcedcaa7647105..126795504ad19a320613b793eecfa7bcbcf6625f 100644 (file)
@@ -25,6 +25,12 @@ typedef struct NODE
        QueryItem  *valnode;
 } NODE;
 
+/* Non-operator nodes have fake (but highest) priority */
+#define NODE_PRIORITY(x) \
+       ( ((x)->valnode->qoperator.type == QI_OPR) ? \
+                       QO_PRIORITY((x)->valnode) : \
+                       TOP_PRIORITY )
+
 /*
  * make query tree from plain view of query
  */
@@ -160,7 +166,8 @@ clean_NOT_intree(NODE *node)
        {
                NODE       *res = node;
 
-               Assert(node->valnode->qoperator.oper == OP_AND);
+               Assert(node->valnode->qoperator.oper == OP_AND ||
+                          node->valnode->qoperator.oper == OP_PHRASE);
 
                node->left = clean_NOT_intree(node->left);
                node->right = clean_NOT_intree(node->right);
@@ -212,18 +219,20 @@ clean_NOT(QueryItem *ptr, int *len)
 #define V_STOP         3                       /* the expression is a stop word */
 
 /*
- * Clean query tree from values which is always in
- * text (stopword)
+ * Remove QI_VALSTOP (stopword nodes) from query tree.
  */
 static NODE *
-clean_fakeval_intree(NODE *node, char *result)
+clean_fakeval_intree(NODE *node, char *result, int *adddistance)
 {
-       char            lresult = V_UNKNOWN,
-                               rresult = V_UNKNOWN;
+       char    lresult = V_UNKNOWN,
+                       rresult = V_UNKNOWN;
 
        /* since this function recurses, it could be driven to stack overflow. */
        check_stack_depth();
 
+       if (adddistance)
+               *adddistance = 0;
+
        if (node->valnode->type == QI_VAL)
                return node;
        else if (node->valnode->type == QI_VALSTOP)
@@ -237,7 +246,7 @@ clean_fakeval_intree(NODE *node, char *result)
 
        if (node->valnode->qoperator.oper == OP_NOT)
        {
-               node->right = clean_fakeval_intree(node->right, &rresult);
+               node->right = clean_fakeval_intree(node->right, &rresult, NULL);
                if (!node->right)
                {
                        *result = V_STOP;
@@ -247,13 +256,30 @@ clean_fakeval_intree(NODE *node, char *result)
        }
        else
        {
-               NODE       *res = node;
+               NODE   *res = node;
+               int             ndistance, ldistance = 0, rdistance = 0;
+
+               ndistance = (node->valnode->qoperator.oper == OP_PHRASE) ?
+                                               node->valnode->qoperator.distance :
+                                               0;
 
-               node->left = clean_fakeval_intree(node->left, &lresult);
-               node->right = clean_fakeval_intree(node->right, &rresult);
+               node->left  = clean_fakeval_intree(node->left,
+                                                                                  &lresult,
+                                                                                  ndistance ? &ldistance : NULL);
+
+               node->right = clean_fakeval_intree(node->right,
+                                                                                  &rresult,
+                                                                                  ndistance ? &rdistance : NULL);
+
+               /*
+                * ndistance, ldistance and rdistance are greater than zero
+                * if their corresponding nodes are OP_PHRASE
+                */
 
                if (lresult == V_STOP && rresult == V_STOP)
                {
+                       if (adddistance && ndistance)
+                               *adddistance = ldistance + ndistance + rdistance;
                        freetree(node);
                        *result = V_STOP;
                        return NULL;
@@ -261,33 +287,333 @@ clean_fakeval_intree(NODE *node, char *result)
                else if (lresult == V_STOP)
                {
                        res = node->right;
+                       /*
+                        * propagate distance from current node to the
+                        * right upper subtree.
+                        */
+                       if (adddistance && ndistance)
+                               *adddistance = rdistance;
                        pfree(node);
                }
                else if (rresult == V_STOP)
                {
                        res = node->left;
+                       /*
+                        * propagate distance from current node to the upper tree.
+                        */
+                       if (adddistance && ndistance)
+                               *adddistance = ndistance + ldistance;
                        pfree(node);
                }
+               else if (ndistance)
+               {
+                       node->valnode->qoperator.distance += ldistance;
+                       if (adddistance)
+                               *adddistance = 0;
+               }
+               else if (adddistance)
+               {
+                       *adddistance = 0;
+               }
+
                return res;
        }
        return node;
 }
 
-QueryItem *
-clean_fakeval(QueryItem *ptr, int *len)
+static NODE *
+copyNODE(NODE *node)
 {
-       NODE       *root = maketree(ptr);
+       NODE *cnode = palloc(sizeof(NODE));
+
+       /* since this function recurses, it could be driven to stack overflow. */
+       check_stack_depth();
+
+       cnode->valnode = palloc(sizeof(QueryItem));
+       *(cnode->valnode) = *(node->valnode);
+
+       if (node->valnode->type == QI_OPR)
+       {
+               cnode->right = copyNODE(node->right);
+               if (node->valnode->qoperator.oper != OP_NOT)
+                       cnode->left = copyNODE(node->left);
+       }
+
+       return cnode;
+}
+
+static NODE *
+makeNODE(int8 op, NODE *left, NODE *right)
+{
+       NODE *node = palloc(sizeof(NODE));
+
+       node->valnode = palloc(sizeof(QueryItem));
+
+       node->valnode->qoperator.type = QI_OPR;
+       node->valnode->qoperator.oper = op;
+
+       node->left = left;
+       node->right = right;
+
+       return node;
+}
+
+/*
+ * Move operation with high priority to the leaves. This guarantees
+ * that the phrase operator will be near the bottom of the tree.
+ * An idea behind is do not store position of lexemes during execution
+ * of ordinary operations (AND, OR, NOT) because it could be expensive.
+ * Actual transformation will be performed only on subtrees under the
+ * <-> (<n>) operation since it's needed solely for the phrase operator.
+ *
+ * Rules:
+ *       a      <->  (b | c)   =>      (a <-> b)  |   (a <-> c)
+ *   (a | b)  <->       c         =>   (a <-> c)  |   (b <-> c)
+ *       a      <->    !b         =>           a         &  !(a <-> b)
+ *      !a      <->     b         =>           b         &  !(a <-> b)
+ *
+ * Warnings for readers:
+ *               a <-> b          !=      b <-> a
+ *
+ *       a <n> (b <n> c)   !=   (a <n> b) <n> c since the phrase lengths are:
+ *                      n                                      2n-1
+ */
+static NODE *
+normalize_phrase_tree(NODE *node)
+{
+       /* there should be no stop words at this point */
+       Assert(node->valnode->type != QI_VALSTOP);
+
+       if (node->valnode->type == QI_VAL)
+               return node;
+
+       /* since this function recurses, it could be driven to stack overflow. */
+       check_stack_depth();
+
+       Assert(node->valnode->type == QI_OPR);
+
+       if (node->valnode->qoperator.oper == OP_NOT)
+       {
+               /* eliminate NOT sequence */
+               while (node->valnode->type == QI_OPR &&
+                          node->valnode->qoperator.oper == node->right->valnode->qoperator.oper)
+               {
+                       node = node->right->right;
+               }
+
+               node->right = normalize_phrase_tree(node->right);
+       }
+       else if (node->valnode->qoperator.oper == OP_PHRASE)
+       {
+               int16    distance;
+               NODE    *X;
+
+               node->left = normalize_phrase_tree(node->left);
+               node->right = normalize_phrase_tree(node->right);
+
+               if (NODE_PRIORITY(node) <= NODE_PRIORITY(node->right) &&
+                       NODE_PRIORITY(node) <= NODE_PRIORITY(node->left))
+                               return node;
+
+               /*
+                * We can't swap left-right and works only with left child
+                * because of a <-> b  !=  b <-> a
+                */
+
+               distance = node->valnode->qoperator.distance;
+
+               if (node->right->valnode->type == QI_OPR)
+               {
+                       switch (node->right->valnode->qoperator.oper)
+                       {
+                               case OP_AND:
+                                       /* a <-> (b & c)  =>  (a <-> b) & (a <-> c) */
+                                       node = makeNODE(OP_AND,
+                                                                       makeNODE(OP_PHRASE,
+                                                                                        node->left,
+                                                                                        node->right->left),
+                                                                       makeNODE(OP_PHRASE,
+                                                                                        copyNODE(node->left),
+                                                                                        node->right->right));
+                                       node->left->valnode->qoperator.distance =
+                                               node->right->valnode->qoperator.distance = distance;
+                                       break;
+                               case OP_OR:
+                                       /* a <-> (b | c)  =>  (a <-> b) | (a <-> c) */
+                                       node = makeNODE(OP_OR,
+                                                                       makeNODE(OP_PHRASE,
+                                                                                        node->left,
+                                                                                        node->right->left),
+                                                                       makeNODE(OP_PHRASE,
+                                                                                        copyNODE(node->left),
+                                                                                        node->right->right));
+                                       node->left->valnode->qoperator.distance =
+                                               node->right->valnode->qoperator.distance = distance;
+                                       break;
+                               case OP_NOT:
+                                       /* a <-> !b  =>  a & !(a <-> b) */
+                                       X = node->right;
+                                       node->right = node->right->right;
+                                       X->right = node;
+                                       node = makeNODE(OP_AND,
+                                                                       copyNODE(node->left),
+                                                                       X);
+                                       break;
+                               case OP_PHRASE:
+                                       /* no-op */
+                                       break;
+                               default:
+                                       elog(ERROR,"Wrong type of tsquery node: %d",
+                                                               node->right->valnode->qoperator.oper);
+                       }
+               }
+
+               if (node->left->valnode->type == QI_OPR &&
+                       node->valnode->qoperator.oper == OP_PHRASE)
+               {
+                       /*
+                        * if the node is still OP_PHRASE, check the left subtree,
+                        * otherwise the whole node will be transformed later.
+                        */
+                       switch(node->left->valnode->qoperator.oper)
+                       {
+                               case OP_AND:
+                                       /*  (a & b) <-> c  =>  (a <-> c) & (b <-> c) */
+                                       node = makeNODE(OP_AND,
+                                                                       makeNODE(OP_PHRASE,
+                                                                                        node->left->left,
+                                                                                        node->right),
+                                                                       makeNODE(OP_PHRASE,
+                                                                                        node->left->right,
+                                                                                        copyNODE(node->right)));
+                                       node->left->valnode->qoperator.distance =
+                                               node->right->valnode->qoperator.distance = distance;
+                                       break;
+                               case OP_OR:
+                                       /* (a | b) <-> c  =>  (a <-> c) | (b <-> c) */
+                                       node = makeNODE(OP_OR,
+                                                                       makeNODE(OP_PHRASE,
+                                                                                        node->left->left,
+                                                                                        node->right),
+                                                                       makeNODE(OP_PHRASE,
+                                                                                        node->left->right,
+                                                                                        copyNODE(node->right)));
+                                       node->left->valnode->qoperator.distance =
+                                               node->right->valnode->qoperator.distance = distance;
+                                       break;
+                               case OP_NOT:
+                                       /* !a <-> b  =>  b & !(a <-> b) */
+                                       X = node->left;
+                                       node->left = node->left->right;
+                                       X->right = node;
+                                       node = makeNODE(OP_AND,
+                                                                       X,
+                                                                       copyNODE(node->right));
+                                       break;
+                               case OP_PHRASE:
+                                       /* no-op */
+                                       break;
+                               default:
+                                       elog(ERROR,"Wrong type of tsquery node: %d",
+                                                               node->left->valnode->qoperator.oper);
+                       }
+               }
+
+               /* continue transformation */
+               node = normalize_phrase_tree(node);
+       }
+       else /* AND or OR */
+       {
+               node->left = normalize_phrase_tree(node->left);
+               node->right = normalize_phrase_tree(node->right);
+       }
+
+       return node;
+}
+
+/*
+ * Number of elements in query tree
+ */
+static int32
+calcstrlen(NODE *node)
+{
+       int32   size = 0;
+
+       if (node->valnode->type == QI_VAL)
+       {
+               size = node->valnode->qoperand.length + 1;
+       }
+       else
+       {
+               Assert(node->valnode->type == QI_OPR);
+
+               size = calcstrlen(node->right);
+               if (node->valnode->qoperator.oper != OP_NOT)
+                       size += calcstrlen(node->left);
+       }
+
+       return size;
+}
+
+TSQuery
+cleanup_fakeval_and_phrase(TSQuery in)
+{
+       int32           len,
+                               lenstr,
+                               commonlen,
+                               i;
+       NODE       *root;
        char            result = V_UNKNOWN;
-       NODE       *resroot;
+       TSQuery         out;
+       QueryItem  *items;
+       char       *operands;
 
-       resroot = clean_fakeval_intree(root, &result);
+       if (in->size == 0)
+               return in;
+
+       /* eliminate stop words */
+       root = clean_fakeval_intree(maketree(GETQUERY(in)), &result, NULL);
        if (result != V_UNKNOWN)
        {
                ereport(NOTICE,
                                (errmsg("text-search query contains only stop words or doesn't contain lexemes, ignored")));
-               *len = 0;
-               return NULL;
+               out = palloc(HDRSIZETQ);
+               out->size = 0;
+               SET_VARSIZE(out, HDRSIZETQ);
+               return out;
+       }
+
+       /* push OP_PHRASE nodes down */
+       root = normalize_phrase_tree(root);
+
+       /*
+        * Build TSQuery from plain view
+        */
+
+       lenstr = calcstrlen(root);
+       items = plaintree(root, &len);
+       commonlen = COMPUTESIZE(len, lenstr);
+
+       out = palloc(commonlen);
+       SET_VARSIZE(out, commonlen);
+       out->size = len;
+
+       memcpy(GETQUERY(out), items, len * sizeof(QueryItem));
+
+       items = GETQUERY(out);
+       operands = GETOPERAND(out);
+       for (i = 0; i < out->size; i++)
+       {
+               QueryOperand *op = (QueryOperand *) &items[i];
+
+               if (op->type != QI_VAL)
+                       continue;
+
+               memcpy(operands, GETOPERAND(in) + op->distance, op->length);
+               operands[op->length] = '\0';
+               op->distance = operands - GETOPERAND(out);
+               operands += op->length + 1;
        }
 
-       return plaintree(resroot, len);
+       return out;
 }
index 9cdf1fe10b2d13eb208bb2d7ded2371a37d8c8f9..30d3faf3e2278cf17d0ce334e734e01f3d6e08fa 100644 (file)
@@ -27,7 +27,7 @@ tsquery_numnode(PG_FUNCTION_ARGS)
 }
 
 static QTNode *
-join_tsqueries(TSQuery a, TSQuery b, int8 operator)
+join_tsqueries(TSQuery a, TSQuery b, int8 operator, uint16 distance)
 {
        QTNode     *res = (QTNode *) palloc0(sizeof(QTNode));
 
@@ -36,6 +36,8 @@ join_tsqueries(TSQuery a, TSQuery b, int8 operator)
        res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
        res->valnode->type = QI_OPR;
        res->valnode->qoperator.oper = operator;
+       if (operator == OP_PHRASE)
+               res->valnode->qoperator.distance = distance;
 
        res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
        res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b));
@@ -64,7 +66,7 @@ tsquery_and(PG_FUNCTION_ARGS)
                PG_RETURN_POINTER(a);
        }
 
-       res = join_tsqueries(a, b, OP_AND);
+       res = join_tsqueries(a, b, OP_AND, 0);
 
        query = QTN2QT(res);
 
@@ -94,7 +96,7 @@ tsquery_or(PG_FUNCTION_ARGS)
                PG_RETURN_POINTER(a);
        }
 
-       res = join_tsqueries(a, b, OP_OR);
+       res = join_tsqueries(a, b, OP_OR, 0);
 
        query = QTN2QT(res);
 
@@ -105,6 +107,52 @@ tsquery_or(PG_FUNCTION_ARGS)
        PG_RETURN_POINTER(query);
 }
 
+Datum
+tsquery_phrase_distance(PG_FUNCTION_ARGS)
+{
+       TSQuery         a = PG_GETARG_TSQUERY_COPY(0);
+       TSQuery         b = PG_GETARG_TSQUERY_COPY(1);
+       QTNode     *res;
+       TSQuery         query;
+       int32           distance = PG_GETARG_INT32(2);
+
+       if (distance < 0 || distance > MAXENTRYPOS)
+               ereport(ERROR,
+                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("distance in phrase operator should be non-negative and less than %d",
+                                           MAXENTRYPOS)));
+       if (a->size == 0)
+       {
+               PG_FREE_IF_COPY(a, 1);
+               PG_RETURN_POINTER(b);
+       }
+       else if (b->size == 0)
+       {
+               PG_FREE_IF_COPY(b, 1);
+               PG_RETURN_POINTER(a);
+       }
+
+       res = join_tsqueries(a, b, OP_PHRASE, (uint16) distance);
+
+       query = QTN2QT(res);
+
+       QTNFree(res);
+       PG_FREE_IF_COPY(a, 0);
+       PG_FREE_IF_COPY(b, 1);
+
+       PG_RETURN_POINTER(cleanup_fakeval_and_phrase(query));
+}
+
+Datum
+tsquery_phrase(PG_FUNCTION_ARGS)
+{
+       PG_RETURN_POINTER(DirectFunctionCall3(
+                                                       tsquery_phrase_distance,
+                                                       PG_GETARG_DATUM(0),
+                                                       PG_GETARG_DATUM(1),
+                                                       Int32GetDatum(1)));
+}
+
 Datum
 tsquery_not(PG_FUNCTION_ARGS)
 {
index fe26ad52dd2ed0c2ea9d1993742a048a2ab915e8..0f338aa653d8e8a9336ad53ca0fc05f4cb9f4969 100644 (file)
@@ -110,6 +110,10 @@ QTNodeCompare(QTNode *an, QTNode *bn)
                                if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
                                        return res;
                }
+
+               if (ao->oper == OP_PHRASE && ao->distance != bo->distance)
+                       return (ao->distance > bo->distance) ? -1 : 1;
+
                return 0;
        }
        else if (an->valnode->type == QI_VAL)
@@ -150,7 +154,7 @@ QTNSort(QTNode *in)
 
        for (i = 0; i < in->nchild; i++)
                QTNSort(in->child[i]);
-       if (in->nchild > 1)
+       if (in->nchild > 1 && in->valnode->qoperator.oper != OP_PHRASE)
                qsort((void *) in->child, in->nchild, sizeof(QTNode *), cmpQTN);
 }
 
@@ -190,7 +194,10 @@ QTNTernary(QTNode *in)
        {
                QTNode     *cc = in->child[i];
 
-               if (cc->valnode->type == QI_OPR && in->valnode->qoperator.oper == cc->valnode->qoperator.oper)
+               /* OP_Phrase isn't associative */
+               if (cc->valnode->type == QI_OPR &&
+                       in->valnode->qoperator.oper == cc->valnode->qoperator.oper &&
+                       in->valnode->qoperator.oper != OP_PHRASE)
                {
                        int                     oldnchild = in->nchild;
 
index 53f678a3bfbb0e8a1217c2353706ad70365b8b40..ab47b763eeb88a15f83abb592bd69c55a69dec02 100644 (file)
@@ -364,8 +364,10 @@ calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
                return 0.0;
 
        /* XXX: What about NOT? */
-       res = (item->type == QI_OPR && item->qoperator.oper == OP_AND) ?
-               calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
+       res = (item->type == QI_OPR && (item->qoperator.oper == OP_AND ||
+                                                                       item->qoperator.oper == OP_PHRASE)) ?
+                       calc_rank_and(w, t, q) :
+                       calc_rank_or(w, t, q);
 
        if (res < 0)
                res = 1e-20f;
@@ -496,10 +498,17 @@ ts_rank_tt(PG_FUNCTION_ARGS)
 
 typedef struct
 {
-       QueryItem **item;
-       int16           nitem;
-       uint8           wclass;
-       int32           pos;
+       union {
+               struct { /* compiled doc representation */
+                       QueryItem **items;
+                       int16           nitem;
+               } query;
+               struct { /* struct is used for preparing doc representation */
+                       QueryItem  *item;
+                       WordEntry  *entry;
+               } map;
+       } data;
+       WordEntryPos            pos;
 } DocRepresentation;
 
 static int
@@ -508,26 +517,59 @@ compareDocR(const void *va, const void *vb)
        const DocRepresentation *a = (const DocRepresentation *) va;
        const DocRepresentation *b = (const DocRepresentation *) vb;
 
-       if (a->pos == b->pos)
-               return 0;
-       return (a->pos > b->pos) ? 1 : -1;
+       if (WEP_GETPOS(a->pos) == WEP_GETPOS(b->pos))
+       {
+               if (WEP_GETWEIGHT(a->pos) == WEP_GETWEIGHT(b->pos))
+               {
+                       if (a->data.map.entry == b->data.map.entry)
+                               return 0;
+
+                       return (a->data.map.entry > b->data.map.entry) ? 1 : -1;
+               }
+
+               return (WEP_GETWEIGHT(a->pos) > WEP_GETWEIGHT(b->pos)) ? 1 : -1;
+       }
+
+       return (WEP_GETPOS(a->pos) > WEP_GETPOS(b->pos)) ? 1 : -1;
 }
 
+#define MAXQROPOS      MAXENTRYPOS
+typedef struct
+{
+       bool                    operandexists;
+       bool                    reverseinsert; /* indicates insert order,
+                                                                         true means descending order */
+       uint32                  npos;
+       WordEntryPos    pos[MAXQROPOS];
+} QueryRepresentationOperand;
+
 typedef struct
 {
-       TSQuery         query;
-       bool       *operandexist;
+       TSQuery                                         query;
+       QueryRepresentationOperand *operandData;
 } QueryRepresentation;
 
-#define QR_GET_OPERAND_EXISTS(q, v)            ( (q)->operandexist[ ((QueryItem*)(v)) - GETQUERY((q)->query) ] )
-#define QR_SET_OPERAND_EXISTS(q, v)  QR_GET_OPERAND_EXISTS(q,v) = true
+#define QR_GET_OPERAND_DATA(q, v) \
+       ( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) )
 
 static bool
-checkcondition_QueryOperand(void *checkval, QueryOperand *val)
+checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *data)
 {
-       QueryRepresentation *qr = (QueryRepresentation *) checkval;
+       QueryRepresentation                     *qr = (QueryRepresentation *) checkval;
+       QueryRepresentationOperand      *opData = QR_GET_OPERAND_DATA(qr, val);
 
-       return QR_GET_OPERAND_EXISTS(qr, val);
+       if (!opData->operandexists)
+               return false;
+
+       if (data)
+       {
+               data->npos = opData->npos;
+               data->pos = opData->pos;
+               if (opData->reverseinsert)
+                       data->pos += MAXQROPOS - opData->npos;
+       }
+
+       return true;
 }
 
 typedef struct
@@ -539,14 +581,65 @@ typedef struct
        DocRepresentation *end;
 } CoverExt;
 
+static void
+resetQueryRepresentation(QueryRepresentation *qr, bool reverseinsert)
+{
+       int i;
+
+       for(i = 0; i < qr->query->size; i++)
+       {
+               qr->operandData[i].operandexists = false;
+               qr->operandData[i].reverseinsert = reverseinsert;
+               qr->operandData[i].npos = 0;
+       }
+}
+
+static void
+fillQueryRepresentationData(QueryRepresentation *qr, DocRepresentation *entry)
+{
+       int                                                     i;
+       int                                                     lastPos;
+       QueryRepresentationOperand *opData;
+
+       for (i = 0; i < entry->data.query.nitem; i++)
+       {
+               if (entry->data.query.items[i]->type != QI_VAL)
+                       continue;
+
+               opData = QR_GET_OPERAND_DATA(qr, entry->data.query.items[i]);
+
+               opData->operandexists = true;
+
+               if (opData->npos == 0)
+               {
+                       lastPos = (opData->reverseinsert) ? (MAXQROPOS - 1) : 0;
+                       opData->pos[lastPos] = entry->pos;
+                       opData->npos++;
+                       continue;
+               }
+
+               lastPos = opData->reverseinsert ?
+                                       (MAXQROPOS - opData->npos) :
+                                       (opData->npos - 1);
+
+               if (WEP_GETPOS(opData->pos[lastPos]) != WEP_GETPOS(entry->pos))
+               {
+                       lastPos = opData->reverseinsert ?
+                                               (MAXQROPOS - 1 - opData->npos) :
+                                               (opData->npos);
+
+                       opData->pos[lastPos] = entry->pos;
+                       opData->npos++;
+               }
+       }
+}
 
 static bool
 Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
 {
-       DocRepresentation *ptr;
-       int                     lastpos = ext->pos;
-       int                     i;
-       bool            found = false;
+       DocRepresentation       *ptr;
+       int                                      lastpos = ext->pos;
+       bool                             found = false;
 
        /*
         * since this function recurses, it could be driven to stack overflow.
@@ -554,7 +647,7 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
         */
        check_stack_depth();
 
-       memset(qr->operandexist, 0, sizeof(bool) * qr->query->size);
+       resetQueryRepresentation(qr, false);
 
        ext->p = INT_MAX;
        ext->q = 0;
@@ -563,16 +656,13 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
        /* find upper bound of cover from current position, move up */
        while (ptr - doc < len)
        {
-               for (i = 0; i < ptr->nitem; i++)
-               {
-                       if (ptr->item[i]->type == QI_VAL)
-                               QR_SET_OPERAND_EXISTS(qr, ptr->item[i]);
-               }
+               fillQueryRepresentationData(qr, ptr);
+
                if (TS_execute(GETQUERY(qr->query), (void *) qr, false, checkcondition_QueryOperand))
                {
-                       if (ptr->pos > ext->q)
+                       if (WEP_GETPOS(ptr->pos) > ext->q)
                        {
-                               ext->q = ptr->pos;
+                               ext->q = WEP_GETPOS(ptr->pos);
                                ext->end = ptr;
                                lastpos = ptr - doc;
                                found = true;
@@ -585,22 +675,24 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
        if (!found)
                return false;
 
-       memset(qr->operandexist, 0, sizeof(bool) * qr->query->size);
+       resetQueryRepresentation(qr, true);
 
        ptr = doc + lastpos;
 
        /* find lower bound of cover from found upper bound, move down */
        while (ptr >= doc + ext->pos)
        {
-               for (i = 0; i < ptr->nitem; i++)
-                       if (ptr->item[i]->type == QI_VAL)
-                               QR_SET_OPERAND_EXISTS(qr, ptr->item[i]);
+               /*
+                * we scan doc from right to left, so pos info in reverse order!
+                */
+               fillQueryRepresentationData(qr, ptr);
+
                if (TS_execute(GETQUERY(qr->query), (void *) qr, true, checkcondition_QueryOperand))
                {
-                       if (ptr->pos < ext->p)
+                       if (WEP_GETPOS(ptr->pos) < ext->p)
                        {
                                ext->begin = ptr;
-                               ext->p = ptr->pos;
+                               ext->p = WEP_GETPOS(ptr->pos);
                        }
                        break;
                }
@@ -628,18 +720,20 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
        WordEntry  *entry,
                           *firstentry;
        WordEntryPos *post;
-       int32           dimt,
+       int32           dimt,   /* number of 'post' items */
                                j,
                                i,
                                nitem;
        int                     len = qr->query->size * 4,
                                cur = 0;
        DocRepresentation *doc;
-       char       *operand;
 
        doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
-       operand = GETOPERAND(qr->query);
 
+       /*
+        * Iterate through query to make DocRepresentaion for words and it's entries
+        * satisfied by query
+        */
        for (i = 0; i < qr->query->size; i++)
        {
                QueryOperand *curoperand;
@@ -649,13 +743,11 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
 
                curoperand = &item[i].qoperand;
 
-               if (QR_GET_OPERAND_EXISTS(qr, &item[i]))
-                       continue;
-
                firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
                if (!entry)
                        continue;
 
+               /* iterations over entries in tsvector */
                while (entry - firstentry < nitem)
                {
                        if (entry->haspos)
@@ -676,53 +768,67 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
                                doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
                        }
 
+                       /* iterations over entry's positions */
                        for (j = 0; j < dimt; j++)
                        {
-                               if (j == 0)
-                               {
-                                       int                     k;
-
-                                       doc[cur].nitem = 0;
-                                       doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size);
-
-                                       for (k = 0; k < qr->query->size; k++)
-                                       {
-                                               QueryOperand *kptr = &item[k].qoperand;
-                                               QueryOperand *iptr = &item[i].qoperand;
-
-                                               if (k == i ||
-                                                       (item[k].type == QI_VAL &&
-                                                        compareQueryOperand(&kptr, &iptr, operand) == 0))
-                                               {
-                                                       /*
-                                                        * if k == i, we've already checked above that
-                                                        * it's type == Q_VAL
-                                                        */
-                                                       doc[cur].item[doc[cur].nitem] = item + k;
-                                                       doc[cur].nitem++;
-                                                       QR_SET_OPERAND_EXISTS(qr, item + k);
-                                               }
-                                       }
-                               }
-                               else
+                               if (curoperand->weight == 0 ||
+                                       curoperand->weight & (1 << WEP_GETWEIGHT(post[j])))
                                {
-                                       doc[cur].nitem = doc[cur - 1].nitem;
-                                       doc[cur].item = doc[cur - 1].item;
+                                       doc[cur].pos = post[j];
+                                       doc[cur].data.map.entry = entry;
+                                       doc[cur].data.map.item = (QueryItem *) curoperand;
+                                       cur++;
                                }
-                               doc[cur].pos = WEP_GETPOS(post[j]);
-                               doc[cur].wclass = WEP_GETWEIGHT(post[j]);
-                               cur++;
                        }
 
                        entry++;
                }
        }
 
-       *doclen = cur;
-
        if (cur > 0)
        {
+               DocRepresentation  *rptr = doc + 1,
+                                                  *wptr = doc,
+                                                       storage;
+
+               /*
+                * Sort representation in ascending order by pos and entry
+                */
                qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+
+               /*
+                * Join QueryItem per WordEntry and it's position
+                */
+               storage.pos = doc->pos;
+               storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
+               storage.data.query.items[0] = doc->data.map.item;
+               storage.data.query.nitem = 1;
+
+               while (rptr - doc < cur)
+               {
+                       if (rptr->pos == (rptr-1)->pos &&
+                               rptr->data.map.entry == (rptr-1)->data.map.entry)
+                       {
+                               storage.data.query.items[storage.data.query.nitem] = rptr->data.map.item;
+                               storage.data.query.nitem++;
+                       }
+                       else
+                       {
+                               *wptr = storage;
+                               wptr++;
+                               storage.pos = rptr->pos;
+                               storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
+                               storage.data.query.items[0] = rptr->data.map.item;
+                               storage.data.query.nitem = 1;
+                       }
+
+                       rptr++;
+               }
+
+               *wptr = storage;
+               wptr++;
+
+               *doclen = wptr - doc;
                return doc;
        }
 
@@ -758,12 +864,13 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
        }
 
        qr.query = query;
-       qr.operandexist = (bool *) palloc0(sizeof(bool) * query->size);
+       qr.operandData = (QueryRepresentationOperand *)
+                                               palloc0(sizeof(QueryRepresentationOperand) * query->size);
 
        doc = get_docrep(txt, &qr, &doclen);
        if (!doc)
        {
-               pfree(qr.operandexist);
+               pfree(qr.operandData);
                return 0.0;
        }
 
@@ -777,7 +884,7 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
 
                while (ptr <= ext.end)
                {
-                       InvSum += invws[ptr->wclass];
+                       InvSum += invws[WEP_GETWEIGHT(ptr->pos)];
                        ptr++;
                }
 
@@ -827,7 +934,7 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
 
        pfree(doc);
 
-       pfree(qr.operandexist);
+       pfree(qr.operandData);
 
        return (float4) Wdoc;
 }
index 12043bf3f5ffae96efc401b8289468eedb79b22f..2a26c46551cbde88adf5f8767a7496ccdf39e260 100644 (file)
@@ -28,7 +28,7 @@ typedef struct
 
 
 /* Compare two WordEntryPos values for qsort */
-static int
+int
 comparePos(const void *a, const void *b)
 {
        int                     apos = WEP_GETPOS(*(const WordEntryPos *) a);
index f6d3fb5d7b4a607bc7eb8702c948045b59ac14e2..e363f2a02332d22051beb708d1914ce06bb30c68 100644 (file)
@@ -1121,35 +1121,124 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
 }
 
 /*
- * check weight info
+ * Check weight info or/and fill 'data' with the required positions
  */
 static bool
-checkclass_str(CHKVAL *chkval, WordEntry *val, QueryOperand *item)
+checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
+                          ExecPhraseData *data)
 {
-       WordEntryPosVector *posvec;
-       WordEntryPos *ptr;
-       uint16          len;
+       bool result = false;
 
-       posvec = (WordEntryPosVector *)
-               (chkval->values + SHORTALIGN(val->pos + val->len));
+       if (entry->haspos && (val->weight || data))
+       {
+               WordEntryPosVector      *posvec;
 
-       len = posvec->npos;
-       ptr = posvec->pos;
+               /*
+                * We can't use the _POSVECPTR macro here because the pointer to the
+                * tsvector's lexeme storage is already contained in chkval->values.
+                */
+               posvec = (WordEntryPosVector *)
+                       (chkval->values + SHORTALIGN(entry->pos + entry->len));
 
-       while (len--)
+               if (val->weight && data)
+               {
+                       WordEntryPos    *posvec_iter = posvec->pos;
+                       WordEntryPos    *dptr;
+
+                       /*
+                        * Filter position information by weights
+                        */
+                       dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
+                       data->allocated = true;
+
+                       /* Is there a position with a matching weight? */
+                       while (posvec_iter < posvec->pos + posvec->npos)
+                       {
+                               /* If true, append this position to the data->pos */
+                               if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
+                               {
+                                       *dptr = WEP_GETPOS(*posvec_iter);
+                                       dptr++;
+                               }
+
+                               posvec_iter++;
+                       }
+
+                       data->npos = dptr - data->pos;
+
+                       if (data->npos > 0)
+                               result = true;
+               }
+               else if (val->weight)
+               {
+                       WordEntryPos    *posvec_iter = posvec->pos;
+
+                       /* Is there a position with a matching weight? */
+                       while (posvec_iter < posvec->pos + posvec->npos)
+                       {
+                               if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
+                               {
+                                       result = true;
+                                       break; /* no need to go further */
+                               }
+
+                               posvec_iter++;
+                       }
+               }
+               else /* data != NULL */
+               {
+                       data->npos = posvec->npos;
+                       data->pos  = posvec->pos;
+                       data->allocated = false;
+                       result = true;
+               }
+       }
+       else
        {
-               if (item->weight & (1 << WEP_GETWEIGHT(*ptr)))
-                       return true;
-               ptr++;
+               result = true;
        }
-       return false;
+
+       return result;
+}
+
+/*
+ * Removes duplicate pos entries. We can't use uniquePos() from
+ * tsvector.c because array might be longer than MAXENTRYPOS
+ *
+ * Returns new length.
+ */
+static int
+uniqueLongPos(WordEntryPos *pos, int npos)
+{
+       WordEntryPos *pos_iter,
+                                *result;
+
+       if (npos <= 1)
+               return npos;
+
+       qsort((void *) pos, npos, sizeof(WordEntryPos), comparePos);
+
+       result = pos;
+       pos_iter = pos + 1;
+       while (pos_iter < pos + npos)
+       {
+               if (WEP_GETPOS(*pos_iter) != WEP_GETPOS(*result))
+               {
+                       result++;
+                       *result = WEP_GETPOS(*pos_iter);
+               }
+
+               pos_iter++;
+       }
+
+       return result + 1 - pos;
 }
 
 /*
  * is there value 'val' in array or not ?
  */
 static bool
-checkcondition_str(void *checkval, QueryOperand *val)
+checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
 {
        CHKVAL     *chkval = (CHKVAL *) checkval;
        WordEntry  *StopLow = chkval->arrb;
@@ -1162,14 +1251,16 @@ checkcondition_str(void *checkval, QueryOperand *val)
        while (StopLow < StopHigh)
        {
                StopMiddle = StopLow + (StopHigh - StopLow) / 2;
-               difference = tsCompareString(chkval->operand + val->distance, val->length,
-                                                  chkval->values + StopMiddle->pos, StopMiddle->len,
+               difference = tsCompareString(chkval->operand + val->distance,
+                                                                        val->length,
+                                                                        chkval->values + StopMiddle->pos,
+                                                                        StopMiddle->len,
                                                                         false);
 
                if (difference == 0)
                {
-                       res = (val->weight && StopMiddle->haspos) ?
-                               checkclass_str(chkval, StopMiddle, val) : true;
+                       /* Check weight info & fill 'data' with positions */
+                       res = checkclass_str(chkval, StopMiddle, val, data);
                        break;
                }
                else if (difference > 0)
@@ -1178,30 +1269,199 @@ checkcondition_str(void *checkval, QueryOperand *val)
                        StopHigh = StopMiddle;
        }
 
-       if (!res && val->prefix)
+       if ((!res || data) && val->prefix)
        {
+               WordEntryPos       *allpos = NULL;
+               int                                     npos = 0,
+                                                       totalpos = 0;
                /*
                 * there was a failed exact search, so we should scan further to find
-                * a prefix match.
+                * a prefix match. We also need to do so if caller needs position info
                 */
                if (StopLow >= StopHigh)
                        StopMiddle = StopHigh;
 
-               while (res == false && StopMiddle < chkval->arre &&
-                          tsCompareString(chkval->operand + val->distance, val->length,
-                                                  chkval->values + StopMiddle->pos, StopMiddle->len,
+               while ((!res || data) && StopMiddle < chkval->arre &&
+                          tsCompareString(chkval->operand + val->distance,
+                                                          val->length,
+                                                          chkval->values + StopMiddle->pos,
+                                                          StopMiddle->len,
                                                           true) == 0)
                {
-                       res = (val->weight && StopMiddle->haspos) ?
-                               checkclass_str(chkval, StopMiddle, val) : true;
+                       if (data)
+                       {
+                               /*
+                                * We need to join position information
+                                */
+                               res = checkclass_str(chkval, StopMiddle, val, data);
+
+                               if (res)
+                               {
+                                       while (npos + data->npos >= totalpos)
+                                       {
+                                               if (totalpos == 0)
+                                               {
+                                                       totalpos = 256;
+                                                       allpos = palloc(sizeof(WordEntryPos) * totalpos);
+                                               }
+                                               else
+                                               {
+                                                       totalpos *= 2;
+                                                       allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
+                                               }
+                                       }
+
+                                       memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
+                                       npos += data->npos;
+                               }
+                       }
+                       else
+                       {
+                               res = checkclass_str(chkval, StopMiddle, val, NULL);
+                       }
 
                        StopMiddle++;
                }
+
+               if (res && data)
+               {
+                       /* Sort and make unique array of found positions */
+                       data->pos = allpos;
+                       data->npos = uniqueLongPos(allpos, npos);
+                       data->allocated = true;
+               }
        }
 
        return res;
 }
 
+/*
+ * Check for phrase condition. Fallback to the AND operation
+ * if there is no positional information.
+ */
+static bool
+TS_phrase_execute(QueryItem *curitem,
+                                 void *checkval, bool calcnot, ExecPhraseData *data,
+                                 bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *))
+{
+       /* since this function recurses, it could be driven to stack overflow */
+       check_stack_depth();
+
+       if (curitem->type == QI_VAL)
+       {
+               return chkcond(checkval, (QueryOperand *) curitem, data);
+       }
+       else
+       {
+               ExecPhraseData  Ldata = {0, false, NULL},
+                                               Rdata = {0, false, NULL};
+               WordEntryPos   *Lpos,
+                                          *Rpos,
+                                          *pos_iter = NULL;
+
+               Assert(curitem->qoperator.oper == OP_PHRASE);
+
+               if (!TS_phrase_execute(curitem + curitem->qoperator.left,
+                                                          checkval, calcnot, &Ldata, chkcond))
+                       return false;
+
+               if (!TS_phrase_execute(curitem + 1, checkval, calcnot, &Rdata, chkcond))
+                       return false;
+
+               /*
+                * if at least one of the operands has no position
+                * information, fallback to AND operation.
+                */
+               if (Ldata.npos == 0 || Rdata.npos == 0)
+                       return true;
+
+               /*
+                * Result of the operation is a list of the
+                * corresponding positions of RIGHT operand.
+                */
+               if (data)
+               {
+                       if (!Rdata.allocated)
+                               /*
+                                * OP_PHRASE is based on the OP_AND, so the number of resulting
+                                * positions could not be greater than the total amount of operands.
+                                */
+                               data->pos = palloc(sizeof(WordEntryPos) * Min(Ldata.npos, Rdata.npos));
+                       else
+                               data->pos = Rdata.pos;
+
+                       data->allocated = true;
+                       data->npos = 0;
+                       pos_iter = data->pos;
+               }
+
+               Lpos = Ldata.pos;
+               Rpos = Rdata.pos;
+
+               /*
+                * Find matches by distance, WEP_GETPOS() is needed because
+                * ExecPhraseData->data can point to the tsvector's WordEntryPosVector
+                */
+
+               while (Rpos < Rdata.pos + Rdata.npos)
+               {
+                       while (Lpos < Ldata.pos + Ldata.npos)
+                       {
+                               if (WEP_GETPOS(*Lpos) <= WEP_GETPOS(*Rpos))
+                               {
+                                       /*
+                                        * Lpos is behind the Rpos, so we have to check the
+                                        * distance condition
+                                        */
+                                       if (WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) <= curitem->qoperator.distance)
+                                       {
+                                               /* MATCH! */
+                                               if (data)
+                                               {
+                                                       *pos_iter = WEP_GETPOS(*Rpos);
+                                                       pos_iter++;
+
+                                                       break; /* We need to build a unique result
+                                                                       * array, so go to the next Rpos */
+                                               }
+                                               else
+                                               {
+                                                       /*
+                                                        * We are in the root of the phrase tree and hence
+                                                        * we don't have to store the resulting positions
+                                                        */
+                                                       return true;
+                                               }
+                                       }
+                               }
+                               else
+                               {
+                                       /*
+                                        * Go to the next Rpos, because Lpos
+                                        * is ahead of the current Rpos
+                                        */
+                                       break;
+                               }
+
+                               Lpos++;
+                       }
+
+                       Rpos++;
+               }
+
+               if (data)
+               {
+                       data->npos = pos_iter - data->pos;
+
+                       if (data->npos > 0)
+                               return true;
+               }
+       }
+
+       return false;
+}
+
+
 /*
  * Evaluate tsquery boolean expression.
  *
@@ -1210,16 +1470,19 @@ checkcondition_str(void *checkval, QueryOperand *val)
  * do anything with it.
  * if calcnot is false, NOT expressions are always evaluated to be true. This
  * is used in ranking.
+ * It believes that ordinary operators are always closier to root than phrase
+ * operator, so, TS_execute() may not take care of lexeme's position at all.
  */
 bool
 TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
-                  bool (*chkcond) (void *checkval, QueryOperand *val))
+                  bool (*chkcond) (void *checkval, QueryOperand *val, ExecPhraseData *data))
 {
        /* since this function recurses, it could be driven to stack overflow */
        check_stack_depth();
 
        if (curitem->type == QI_VAL)
-               return chkcond(checkval, (QueryOperand *) curitem);
+               return chkcond(checkval, (QueryOperand *) curitem,
+                                          NULL /* we don't need position info */);
 
        switch (curitem->qoperator.oper)
        {
@@ -1241,6 +1504,9 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
                        else
                                return TS_execute(curitem + 1, checkval, calcnot, chkcond);
 
+               case OP_PHRASE:
+                       return TS_phrase_execute(curitem, checkval, calcnot, NULL, chkcond);
+
                default:
                        elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
        }
@@ -1277,6 +1543,10 @@ tsquery_requires_match(QueryItem *curitem)
                         */
                        return false;
 
+               case OP_PHRASE:
+                       /*
+                        * Treat OP_PHRASE as OP_AND here
+                        */
                case OP_AND:
                        /* If either side requires a match, we're good */
                        if (tsquery_requires_match(curitem + curitem->qoperator.left))
index e2817082db8660b831387aaf2bcce93c9722b1c8..d99405824d7c17edb68913ee0f9b4dd72d2fd306 100644 (file)
@@ -89,7 +89,15 @@ do { \
        } \
 } while (0)
 
-#define ISOPERATOR(x)  ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
+/* phrase operator begins with '<' */
+#define ISOPERATOR(x) \
+       ( pg_mblen(x) == 1 && ( *(x) == '!' ||  \
+                                                       *(x) == '&' ||  \
+                                                       *(x) == '|' ||  \
+                                                       *(x) == '(' ||  \
+                                                       *(x) == ')' ||  \
+                                                       *(x) == '<'             \
+                                                 ) )
 
 /* Fills gettoken_tsvector's output parameters, and returns true */
 #define RETURN_TOKEN \
index 0edc6cbafe740dac1949d2b6b0f1dc791a0fbc9c..6d254ba133cf42e225cd75e9daf03afa5774c37c 100644 (file)
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     201604062
+#define CATALOG_VERSION_NO     201604071
 
 #endif
index b3daff28e3f71bff1b3116ce74af11b5d1633c11..a5e4a02ebc83b2cdb40e47a39cc7c1c64408a13b 100644 (file)
@@ -1675,6 +1675,9 @@ DATA(insert OID = 3680 (  "&&"       PGNSP PGUID b f f 3615        3615    3615  0        0        tsque
 DESCR("AND-concatenate");
 DATA(insert OID = 3681 (  "||"    PGNSP PGUID b f f 3615        3615    3615  0        0        tsquery_or   -         -         ));
 DESCR("OR-concatenate");
+/* <-> operation calls tsquery_phrase, but function is polymorphic. So, point to OID of the tsquery_phrase */
+DATA(insert OID = 5005 (  "<->"           PGNSP PGUID b f f 3615    3615    3615  0    0    5003   -       -     ));
+DESCR("phrase-concatenate");
 DATA(insert OID = 3682 (  "!!"    PGNSP PGUID l f f 0           3615    3615  0        0        tsquery_not   -        -         ));
 DESCR("NOT tsquery");
 DATA(insert OID = 3693 (  "@>"    PGNSP PGUID b f f 3615        3615    16 3694        0        tsq_mcontains  contsel    contjoinsel   ));
index d7dbc7392805769a673809ec16272bf0adc52dd2..c351594be468ca2a80cde5997b278ccb514f0819 100644 (file)
@@ -4607,6 +4607,9 @@ DESCR("less-equal-greater");
 
 DATA(insert OID = 3669 (  tsquery_and          PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_and _null_ _null_ _null_ ));
 DATA(insert OID = 3670 (  tsquery_or           PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_or _null_ _null_ _null_ ));
+DATA(insert OID = 5003 (  tsquery_phrase       PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3615 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsquery_phrase _null_ _null_ _null_ ));
+DATA(insert OID = 5004 (  tsquery_phrase       PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 3615 "3615 3615 23" _null_ _null_ _null_ _null_ _null_ tsquery_phrase_distance _null_ _null_ _null_ ));
+DESCR("phrase-concatenate with distance");
 DATA(insert OID = 3671 (  tsquery_not          PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3615 "3615" _null_ _null_ _null_ _null_ _null_ tsquery_not _null_ _null_ _null_ ));
 
 DATA(insert OID = 3691 (  tsq_mcontains                PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3615" _null_ _null_ _null_ _null_ _null_ tsq_mcontains _null_ _null_ _null_ ));
@@ -4726,12 +4729,16 @@ DATA(insert OID = 3746 (  to_tsquery            PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2
 DESCR("make tsquery");
 DATA(insert OID = 3747 (  plainto_tsquery      PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ plainto_tsquery_byid _null_ _null_ _null_ ));
 DESCR("transform to tsquery");
+DATA(insert OID = 5006 (  phraseto_tsquery     PGNSP PGUID 12 100 0 0 0 f f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery_byid _null_ _null_ _null_ ));
+DESCR("transform to tsquery");
 DATA(insert OID = 3749 (  to_tsvector          PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3614 "25" _null_ _null_ _null_ _null_ _null_ to_tsvector _null_ _null_ _null_ ));
 DESCR("transform to tsvector");
 DATA(insert OID = 3750 (  to_tsquery           PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ to_tsquery _null_ _null_ _null_ ));
 DESCR("make tsquery");
 DATA(insert OID = 3751 (  plainto_tsquery      PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ plainto_tsquery _null_ _null_ _null_ ));
 DESCR("transform to tsquery");
+DATA(insert OID = 5001 (  phraseto_tsquery     PGNSP PGUID 12 100 0 0 0 f f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
+DESCR("transform to tsquery");
 
 DATA(insert OID = 3752 (  tsvector_update_trigger                      PGNSP PGUID 12 1 0 0 0 f f f f f f v s 0 0 2279 "" _null_ _null_ _null_ _null_ _null_ tsvector_update_trigger_byid _null_ _null_ _null_ ));
 DESCR("trigger for automatic update of tsvector column");
index 6f7a891ae8401be85bfc8db8fd3b59db1cc04296..9364eee438cf72ada213c85fcfec00896d28ed60 100644 (file)
@@ -34,16 +34,17 @@ typedef struct
  */
 typedef struct
 {
-       uint32          selected:1,
-                               in:1,
-                               replace:1,
-                               repeated:1,
-                               skip:1,
-                               unused:3,
-                               type:8,
-                               len:16;
-       char       *word;
-       QueryOperand *item;
+       uint32                  selected:       1,
+                                       in:             1,
+                                       replace:        1,
+                                       repeated:       1,
+                                       skip:           1,
+                                       unused:         3,
+                                       type:           8,
+                                       len:            16;
+       WordEntryPos    pos;
+       char               *word;
+       QueryOperand   *item;
 } HeadlineWordEntry;
 
 typedef struct
@@ -51,6 +52,7 @@ typedef struct
        HeadlineWordEntry *words;
        int32           lenwords;
        int32           curwords;
+       int32           vectorpos; /* positions a-la tsvector */
        char       *startsel;
        char       *stopsel;
        char       *fragdelim;
index bc99524dc082575338534419e8eeb56a1fc193ab..5f4e5961939f2ef4fab7380ad5f6a0b0647e4180 100644 (file)
@@ -49,6 +49,8 @@ typedef struct
 #define MAXSTRLEN ( (1<<11) - 1)
 #define MAXSTRPOS ( (1<<20) - 1)
 
+extern int comparePos(const void *a, const void *b);
+
 /*
  * Equivalent to
  * typedef struct {
@@ -213,15 +215,33 @@ typedef struct
 } QueryOperand;
 
 
-/* Legal values for QueryOperator.operator */
-#define OP_NOT 1
-#define OP_AND 2
-#define OP_OR  3
+/*
+ * Legal values for QueryOperator.operator.
+ * They should be ordered by priority! We assume that phrase
+ * has highest priority, but this agreement is only
+ * for query transformation! That's need to simplify
+ * algorithm of query transformation.
+ */
+#define OP_OR                  1
+#define OP_AND                 2
+#define OP_NOT                 3
+#define OP_PHRASE              4
+#define OP_NOT_PHRASE  5       /*
+                                                        * OP_PHRASE negation operations must have greater
+                                                        * priority in order to force infix() to surround
+                                                        * the whole OP_PHRASE expression with parentheses.
+                                                        */
+
+#define TOP_PRIORITY   6       /* highest priority for val nodes */
+
+#define        OP_PRIORITY(x)  (x)
+#define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper)
 
 typedef struct
 {
        QueryItemType type;
        int8            oper;                   /* see above */
+       int16           distance;               /* distance between agrs for OP_PHRASE */
        uint32          left;                   /* pointer to left operand. Right operand is
                                                                 * item + 1, left operand is placed
                                                                 * item+item->left */
@@ -304,6 +324,8 @@ extern Datum tsquery_numnode(PG_FUNCTION_ARGS);
 
 extern Datum tsquery_and(PG_FUNCTION_ARGS);
 extern Datum tsquery_or(PG_FUNCTION_ARGS);
+extern Datum tsquery_phrase(PG_FUNCTION_ARGS);
+extern Datum tsquery_phrase_distance(PG_FUNCTION_ARGS);
 extern Datum tsquery_not(PG_FUNCTION_ARGS);
 
 extern Datum tsquery_rewrite(PG_FUNCTION_ARGS);
index 88533a64235bee3bc3c97294238a1a4c61316202..855bbfecd64b580aa28b4a3d0d833d18d72943c4 100644 (file)
@@ -55,7 +55,7 @@ extern TSQuery parse_tsquery(char *buf,
 extern void pushValue(TSQueryParserState state,
                  char *strval, int lenval, int16 weight, bool prefix);
 extern void pushStop(TSQueryParserState state);
-extern void pushOperator(TSQueryParserState state, int8 oper);
+extern void pushOperator(TSQueryParserState state, int8 oper, int16 distance);
 
 /*
  * parse plain text and lexize words
@@ -104,8 +104,15 @@ extern text *generateHeadline(HeadlineParsedText *prs);
 /*
  * Common check function for tsvector @@ tsquery
  */
+typedef struct ExecPhraseData
+{
+       int                             npos;
+       bool                    allocated;
+       WordEntryPos   *pos;
+} ExecPhraseData;
+
 extern bool TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
-                  bool (*chkcond) (void *checkval, QueryOperand *val));
+                  bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *));
 extern bool tsquery_requires_match(QueryItem *curitem);
 
 /*
@@ -120,6 +127,8 @@ extern Datum to_tsquery_byid(PG_FUNCTION_ARGS);
 extern Datum to_tsquery(PG_FUNCTION_ARGS);
 extern Datum plainto_tsquery_byid(PG_FUNCTION_ARGS);
 extern Datum plainto_tsquery(PG_FUNCTION_ARGS);
+extern Datum phraseto_tsquery_byid(PG_FUNCTION_ARGS);
+extern Datum phraseto_tsquery(PG_FUNCTION_ARGS);
 
 /*
  * GiST support function
@@ -169,7 +178,7 @@ extern Datum gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS);
  * TSQuery Utilities
  */
 extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len);
-extern QueryItem *clean_fakeval(QueryItem *ptr, int32 *len);
+extern TSQuery cleanup_fakeval_and_phrase(TSQuery in);
 
 typedef struct QTNode
 {
index ef86295f8841cd2f807422c32e8d7985e3d0b243..5ddbe8023466bc714347db3f9ac0ed24c7ee8139 100644 (file)
@@ -434,9 +434,9 @@ SELECT to_tsvector('ispell_tst', 'Booking the skies after rebookings for footbal
 (1 row)
 
 SELECT to_tsquery('ispell_tst', 'footballklubber');
-                                  to_tsquery                                  
-------------------------------------------------------------------------------
( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber'
+                                to_tsquery                                
+--------------------------------------------------------------------------
'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
 (1 row)
 
 SELECT to_tsquery('ispell_tst', 'footballyklubber:b & rebookings:A & sky');
@@ -458,9 +458,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb
 (1 row)
 
 SELECT to_tsquery('hunspell_tst', 'footballklubber');
-                                  to_tsquery                                  
-------------------------------------------------------------------------------
( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber'
+                                to_tsquery                                
+--------------------------------------------------------------------------
'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
 (1 row)
 
 SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
@@ -469,6 +469,18 @@ SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
  'foot':B & 'ball':B & 'klubber':B & ( 'booking':A | 'book':A ) & 'sky'
 (1 row)
 
+SELECT to_tsquery('hunspell_tst', 'footballyklubber:b <-> sky');
+                                 to_tsquery                                  
+-----------------------------------------------------------------------------
+ ( 'foot':B <-> 'sky' ) & ( 'ball':B <-> 'sky' ) & ( 'klubber':B <-> 'sky' )
+(1 row)
+
+SELECT phraseto_tsquery('hunspell_tst', 'footballyklubber sky');
+                           phraseto_tsquery                            
+-----------------------------------------------------------------------
+ ( 'foot' <-> 'sky' ) & ( 'ball' <-> 'sky' ) & ( 'klubber' <-> 'sky' )
+(1 row)
+
 -- Test ispell dictionary with hunspell affix with FLAG long in configuration
 ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
        REPLACE hunspell WITH hunspell_long;
@@ -479,9 +491,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb
 (1 row)
 
 SELECT to_tsquery('hunspell_tst', 'footballklubber');
-                                  to_tsquery                                  
-------------------------------------------------------------------------------
( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber'
+                                to_tsquery                                
+--------------------------------------------------------------------------
'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
 (1 row)
 
 SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
@@ -500,9 +512,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb
 (1 row)
 
 SELECT to_tsquery('hunspell_tst', 'footballklubber');
-                                  to_tsquery                                  
-------------------------------------------------------------------------------
( 'footballklubber' | 'foot' & 'ball' & 'klubber' ) | 'football' & 'klubber'
+                                to_tsquery                                
+--------------------------------------------------------------------------
'footballklubber' | 'foot' & 'ball' & 'klubber' | 'football' & 'klubber'
 (1 row)
 
 SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
index 3811250fb71a97efd6f102cd1880a53c339bc84b..558f00cc4e459cf2dab4bfce600a4f68ad20f451 100644 (file)
@@ -554,6 +554,235 @@ SELECT plainto_tsquery('english', 'foo bar') && 'asd | fg';
  'foo' & 'bar' & ( 'asd' | 'fg' )
 (1 row)
 
+-- Check stop word deletion, a and s are stop-words
+SELECT to_tsquery('english', '(1 <-> 2) <-> a');
+ to_tsquery  
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(1 <-> a) <-> 2');
+ to_tsquery  
+-------------
+ '1' <2> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(a <-> 1) <-> 2');
+ to_tsquery  
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', 'a <-> (1 <-> 2)');
+ to_tsquery  
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '1 <-> (a <-> 2)');
+ to_tsquery  
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '1 <-> (2 <-> a)');
+ to_tsquery  
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(1 <-> 2) <3> a');
+ to_tsquery  
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(1 <-> a) <3> 2');
+ to_tsquery  
+-------------
+ '1' <4> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(a <-> 1) <3> 2');
+ to_tsquery  
+-------------
+ '1' <3> '2'
+(1 row)
+
+SELECT to_tsquery('english', 'a <3> (1 <-> 2)');
+ to_tsquery  
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '1 <3> (a <-> 2)');
+ to_tsquery  
+-------------
+ '1' <3> '2'
+(1 row)
+
+SELECT to_tsquery('english', '1 <3> (2 <-> a)');
+ to_tsquery  
+-------------
+ '1' <3> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(1 <3> 2) <-> a');
+ to_tsquery  
+-------------
+ '1' <3> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(1 <3> a) <-> 2');
+ to_tsquery  
+-------------
+ '1' <4> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(a <3> 1) <-> 2');
+ to_tsquery  
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', 'a <-> (1 <3> 2)');
+ to_tsquery  
+-------------
+ '1' <3> '2'
+(1 row)
+
+SELECT to_tsquery('english', '1 <-> (a <3> 2)');
+ to_tsquery  
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '1 <-> (2 <3> a)');
+ to_tsquery  
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '((a <-> 1) <-> 2) <-> s');
+ to_tsquery  
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(2 <-> (a <-> 1)) <-> s');
+ to_tsquery  
+-------------
+ '2' <-> '1'
+(1 row)
+
+SELECT to_tsquery('english', '((1 <-> a) <-> 2) <-> s');
+ to_tsquery  
+-------------
+ '1' <2> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(2 <-> (1 <-> a)) <-> s');
+ to_tsquery  
+-------------
+ '2' <-> '1'
+(1 row)
+
+SELECT to_tsquery('english', 's <-> ((a <-> 1) <-> 2)');
+ to_tsquery  
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', 's <-> (2 <-> (a <-> 1))');
+ to_tsquery  
+-------------
+ '2' <-> '1'
+(1 row)
+
+SELECT to_tsquery('english', 's <-> ((1 <-> a) <-> 2)');
+ to_tsquery  
+-------------
+ '1' <2> '2'
+(1 row)
+
+SELECT to_tsquery('english', 's <-> (2 <-> (1 <-> a))');
+ to_tsquery  
+-------------
+ '2' <-> '1'
+(1 row)
+
+SELECT to_tsquery('english', '((a <-> 1) <-> s) <-> 2');
+ to_tsquery  
+-------------
+ '1' <2> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(s <-> (a <-> 1)) <-> 2');
+ to_tsquery  
+-------------
+ '1' <-> '2'
+(1 row)
+
+SELECT to_tsquery('english', '((1 <-> a) <-> s) <-> 2');
+ to_tsquery  
+-------------
+ '1' <3> '2'
+(1 row)
+
+SELECT to_tsquery('english', '(s <-> (1 <-> a)) <-> 2');
+ to_tsquery  
+-------------
+ '1' <2> '2'
+(1 row)
+
+SELECT to_tsquery('english', '2 <-> ((a <-> 1) <-> s)');
+ to_tsquery  
+-------------
+ '2' <-> '1'
+(1 row)
+
+SELECT to_tsquery('english', '2 <-> (s <-> (a <-> 1))');
+ to_tsquery  
+-------------
+ '2' <-> '1'
+(1 row)
+
+SELECT to_tsquery('english', '2 <-> ((1 <-> a) <-> s)');
+ to_tsquery  
+-------------
+ '2' <-> '1'
+(1 row)
+
+SELECT to_tsquery('english', '2 <-> (s <-> (1 <-> a))');
+ to_tsquery  
+-------------
+ '2' <-> '1'
+(1 row)
+
+SELECT to_tsquery('foo <-> (a <-> (the <-> bar))');
+   to_tsquery    
+-----------------
+ 'foo' <-> 'bar'
+(1 row)
+
+SELECT to_tsquery('((foo <-> a) <-> the) <-> bar');
+   to_tsquery    
+-----------------
+ 'foo' <3> 'bar'
+(1 row)
+
+SELECT to_tsquery('foo <-> a <-> the <-> bar');
+   to_tsquery    
+-----------------
+ 'foo' <3> 'bar'
+(1 row)
+
+SELECT phraseto_tsquery('PostgreSQL can be extended by the user in many ways');
+                           phraseto_tsquery                            
+-----------------------------------------------------------------------
+ ( ( ( 'postgresql' <3> 'extend' ) <3> 'user' ) <2> 'mani' ) <-> 'way'
+(1 row)
+
 SELECT ts_rank_cd(to_tsvector('english', '
 Day after day, day after day,
   We stuck, nor breath nor motion,
@@ -602,6 +831,22 @@ S. T. Coleridge (1772-1834)
         0.1
 (1 row)
 
+SELECT ts_rank_cd(to_tsvector('english', '
+Day after day, day after day,
+  We stuck, nor breath nor motion,
+As idle as a painted Ship
+  Upon a painted Ocean.
+Water, water, every where
+  And all the boards did shrink;
+Water, water, every where,
+  Nor any drop to drink.
+S. T. Coleridge (1772-1834)
+'), to_tsquery('english', 'painted <-> Ship'));
+ ts_rank_cd 
+------------
+        0.1
+(1 row)
+
 SELECT ts_rank_cd(strip(to_tsvector('both stripped')),
                   to_tsquery('both & stripped'));
  ts_rank_cd 
@@ -674,6 +919,44 @@ S. T. Coleridge (1772-1834)
  Water, water, every where
 (1 row)
 
+SELECT ts_headline('english', '
+Day after day, day after day,
+  We stuck, nor breath nor motion,
+As idle as a painted Ship
+  Upon a painted Ocean.
+Water, water, every where
+  And all the boards did shrink;
+Water, water, every where,
+  Nor any drop to drink.
+S. T. Coleridge (1772-1834)
+', phraseto_tsquery('english', 'painted Ocean'));
+           ts_headline            
+----------------------------------
+ <b>painted</b> <b>Ocean</b>.    +
+ Water, water, every where       +
+   And all the boards did shrink;+
+ Water, water, every
+(1 row)
+
+SELECT ts_headline('english', '
+Day after day, day after day,
+  We stuck, nor breath nor motion,
+As idle as a painted Ship
+  Upon a painted Ocean.
+Water, water, every where
+  And all the boards did shrink;
+Water, water, every where,
+  Nor any drop to drink.
+S. T. Coleridge (1772-1834)
+', phraseto_tsquery('english', 'idle as a painted Ship'));
+                 ts_headline                 
+---------------------------------------------
+ <b>idle</b> as a <b>painted</b> <b>Ship</b>+
+   Upon a <b>painted</b> Ocean.             +
+ Water, water, every where                  +
+   And all the boards
+(1 row)
+
 SELECT ts_headline('english', '
 <html>
 <!-- some comment -->
@@ -703,6 +986,24 @@ to_tsquery('english', 'sea&foo'), 'HighlightAll=true');
  </html>
 (1 row)
 
+SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 <-> 3', 'MaxWords=2, MinWords=1');
+    ts_headline    
+-------------------
+ <b>1</b> <b>3</b>
+(1 row)
+
+SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 & 3', 'MaxWords=4, MinWords=1');
+         ts_headline          
+------------------------------
+ <b>1</b> 2 <b>3</b> <b>1</b>
+(1 row)
+
+SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 <-> 3', 'MaxWords=4, MinWords=1');
+    ts_headline    
+-------------------
+ <b>1</b> <b>3</b>
+(1 row)
+
 --Check if headline fragments work
 SELECT ts_headline('english', '
 Day after day, day after day,
@@ -805,13 +1106,13 @@ UPDATE test_tsquery SET sample = to_tsquery('english', txtsample::text);
 SELECT COUNT(*) FROM test_tsquery WHERE keyword <  'new & york';
  count 
 -------
-     1
+     2
 (1 row)
 
 SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new & york';
  count 
 -------
-     2
+     3
 (1 row)
 
 SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new & york';
@@ -823,13 +1124,13 @@ SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new & york';
 SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new & york';
  count 
 -------
-     3
+     4
 (1 row)
 
 SELECT COUNT(*) FROM test_tsquery WHERE keyword >  'new & york';
  count 
 -------
-     2
+     3
 (1 row)
 
 CREATE UNIQUE INDEX bt_tsq ON test_tsquery (keyword);
@@ -837,13 +1138,13 @@ SET enable_seqscan=OFF;
 SELECT COUNT(*) FROM test_tsquery WHERE keyword <  'new & york';
  count 
 -------
-     1
+     2
 (1 row)
 
 SELECT COUNT(*) FROM test_tsquery WHERE keyword <= 'new & york';
  count 
 -------
-     2
+     3
 (1 row)
 
 SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new & york';
@@ -855,20 +1156,20 @@ SELECT COUNT(*) FROM test_tsquery WHERE keyword = 'new & york';
 SELECT COUNT(*) FROM test_tsquery WHERE keyword >= 'new & york';
  count 
 -------
-     3
+     4
 (1 row)
 
 SELECT COUNT(*) FROM test_tsquery WHERE keyword >  'new & york';
  count 
 -------
-     2
+     3
 (1 row)
 
 RESET enable_seqscan;
 SELECT ts_rewrite('foo & bar & qq & new & york',  'new & york'::tsquery, 'big & apple | nyc | new & york & city');
-                                    ts_rewrite                                    
-----------------------------------------------------------------------------------
- 'foo' & 'bar' & 'qq' & ( 'city' & 'new' & 'york' | ( 'nyc' | 'big' & 'apple' ) )
+                                  ts_rewrite                                  
+------------------------------------------------------------------------------
+ 'foo' & 'bar' & 'qq' & ( 'nyc' | 'big' & 'apple' | 'city' & 'new' & 'york' )
 (1 row)
 
 SELECT ts_rewrite('moscow', 'SELECT keyword, sample FROM test_tsquery'::text );
@@ -884,9 +1185,9 @@ SELECT ts_rewrite('moscow & hotel', 'SELECT keyword, sample FROM test_tsquery'::
 (1 row)
 
 SELECT ts_rewrite('bar & new & qq & foo & york', 'SELECT keyword, sample FROM test_tsquery'::text );
-                                     ts_rewrite                                      
--------------------------------------------------------------------------------------
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+                                   ts_rewrite                                    
+---------------------------------------------------------------------------------
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
 (1 row)
 
 SELECT ts_rewrite( 'moscow', 'SELECT keyword, sample FROM test_tsquery');
@@ -902,9 +1203,33 @@ SELECT ts_rewrite( 'moscow & hotel', 'SELECT keyword, sample FROM test_tsquery')
 (1 row)
 
 SELECT ts_rewrite( 'bar & new & qq & foo & york', 'SELECT keyword, sample FROM test_tsquery');
-                                     ts_rewrite                                      
--------------------------------------------------------------------------------------
- 'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+                                   ts_rewrite                                    
+---------------------------------------------------------------------------------
+ ( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
+(1 row)
+
+SELECT ts_rewrite('1 & (2 <-> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
+ ts_rewrite  
+-------------
+ '2' <-> '4'
+(1 row)
+
+SELECT ts_rewrite('1 & (2 <2> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
+      ts_rewrite       
+-----------------------
+ '1' & ( '2' <2> '3' )
+(1 row)
+
+SELECT ts_rewrite('5 <-> (1 & (2 <-> 3))', 'SELECT keyword, sample FROM test_tsquery'::text );
+                  ts_rewrite                   
+-----------------------------------------------
+ ( '5' <-> '1' ) & ( '5' <-> ( '2' <-> '3' ) )
+(1 row)
+
+SELECT ts_rewrite('5 <-> (6 | 8)', 'SELECT keyword, sample FROM test_tsquery'::text );
+        ts_rewrite         
+---------------------------
+ '5' <-> '7' | '5' <-> '8'
 (1 row)
 
 SELECT keyword FROM test_tsquery WHERE keyword @> 'new';
@@ -943,9 +1268,9 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t
 (1 row)
 
 SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar &  new & qq & foo & york') AS query;
-                                     ts_rewrite                                      
--------------------------------------------------------------------------------------
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+                                   ts_rewrite                                    
+---------------------------------------------------------------------------------
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
 (1 row)
 
 SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
@@ -961,9 +1286,9 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t
 (1 row)
 
 SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query;
-                                     ts_rewrite                                      
--------------------------------------------------------------------------------------
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+                                   ts_rewrite                                    
+---------------------------------------------------------------------------------
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
 (1 row)
 
 CREATE INDEX qq ON test_tsquery USING gist (keyword tsquery_ops);
@@ -1004,9 +1329,9 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t
 (1 row)
 
 SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query;
-                                     ts_rewrite                                      
--------------------------------------------------------------------------------------
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+                                   ts_rewrite                                    
+---------------------------------------------------------------------------------
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
 (1 row)
 
 SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow') AS query;
@@ -1022,9 +1347,9 @@ SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_t
 (1 row)
 
 SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar &  new & qq & foo & york') AS query;
-                                     ts_rewrite                                      
--------------------------------------------------------------------------------------
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | ( 'big' & 'appl' | 'new' & 'york' ) )
+                                   ts_rewrite                                    
+---------------------------------------------------------------------------------
( 'nyc' | 'big' & 'appl' | 'new' & 'york' ) & 'citi' & 'foo' & ( 'bar' | 'qq' )
 (1 row)
 
 RESET enable_seqscan;
@@ -1132,3 +1457,15 @@ select * from pendtest where 'ipi:*'::tsquery @@ ts;
 ----
 (0 rows)
 
+--check OP_PHRASE on index
+create temp table phrase_index_test(fts tsvector);
+insert into phrase_index_test values('A fat cat has just eaten a rat.');
+create index phrase_index_test_idx on phrase_index_test using gin(fts);
+set enable_seqscan = off;
+select * from phrase_index_test where fts @@ phraseto_tsquery('fat cat');
+                       fts                       
+-------------------------------------------------
+ 'A' 'a' 'cat' 'eaten' 'fat' 'has' 'just' 'rat.'
+(1 row)
+
+set enable_seqscan = on;
index a386a46361a8eee68ce6cc8599ecd60918c95223..c904c1c7054d5b33bff23223213ce98075dc1f4f 100644 (file)
@@ -277,15 +277,15 @@ SELECT '(!1|2)&3'::tsquery;
 (1 row)
 
 SELECT '1|(2|(4|(5|6)))'::tsquery;
-                 tsquery                 
------------------------------------------
- '1' | ( '2' | ( '4' | ( '5' | '6' ) ) )
+           tsquery           
+-----------------------------
+ '1' | '2' | '4' | '5' | '6'
 (1 row)
 
 SELECT '1|2|4|5|6'::tsquery;
-                 tsquery                 
------------------------------------------
( ( ( '1' | '2' ) | '4' ) | '5' ) | '6'
+           tsquery           
+-----------------------------
'1' | '2' | '4' | '5' | '6'
 (1 row)
 
 SELECT '1&(2&(4&(5&6)))'::tsquery;
@@ -325,11 +325,139 @@ SELECT $$'\\as'$$::tsquery;
 (1 row)
 
 SELECT 'a:* & nbb:*ac | doo:a* | goo'::tsquery;
+               tsquery                
+--------------------------------------
+ 'a':* & 'nbb':*AC | 'doo':*A | 'goo'
+(1 row)
+
+-- phrase transformation
+SELECT 'a <-> (b|c)'::tsquery;
+          tsquery          
+---------------------------
+ 'a' <-> 'b' | 'a' <-> 'c'
+(1 row)
+
+SELECT '(a|b) <-> c'::tsquery;
+          tsquery          
+---------------------------
+ 'a' <-> 'c' | 'b' <-> 'c'
+(1 row)
+
+SELECT '(a|b) <-> (d|c)'::tsquery;
+                        tsquery                        
+-------------------------------------------------------
+ 'a' <-> 'd' | 'b' <-> 'd' | 'a' <-> 'c' | 'b' <-> 'c'
+(1 row)
+
+SELECT 'a <-> (b&c)'::tsquery;
+              tsquery              
+-----------------------------------
+ ( 'a' <-> 'b' ) & ( 'a' <-> 'c' )
+(1 row)
+
+SELECT '(a&b) <-> c'::tsquery;
+              tsquery              
+-----------------------------------
+ ( 'a' <-> 'c' ) & ( 'b' <-> 'c' )
+(1 row)
+
+SELECT '(a&b) <-> (d&c)'::tsquery;
+                                tsquery                                
+-----------------------------------------------------------------------
+ ( 'a' <-> 'd' ) & ( 'b' <-> 'd' ) & ( 'a' <-> 'c' ) & ( 'b' <-> 'c' )
+(1 row)
+
+SELECT 'a <-> !b'::tsquery;
+        tsquery         
+------------------------
+ 'a' & !( 'a' <-> 'b' )
+(1 row)
+
+SELECT '!a <-> b'::tsquery;
+        tsquery         
+------------------------
+ !( 'a' <-> 'b' ) & 'b'
+(1 row)
+
+SELECT '!a <-> !b'::tsquery;
+              tsquery               
+------------------------------------
+ !'a' & !( !( 'a' <-> 'b' ) & 'b' )
+(1 row)
+
+SELECT 'a <-> !(b&c)'::tsquery;
+                   tsquery                    
+----------------------------------------------
+ 'a' & !( ( 'a' <-> 'b' ) & ( 'a' <-> 'c' ) )
+(1 row)
+
+SELECT 'a <-> !(b|c)'::tsquery;
+               tsquery                
+--------------------------------------
+ 'a' & !( 'a' <-> 'b' | 'a' <-> 'c' )
+(1 row)
+
+SELECT  '!(a&b) <-> c'::tsquery;
+                   tsquery                    
+----------------------------------------------
+ !( ( 'a' <-> 'c' ) & ( 'b' <-> 'c' ) ) & 'c'
+(1 row)
+
+SELECT  '!(a|b) <-> c'::tsquery;
+               tsquery                
+--------------------------------------
+ !( 'a' <-> 'c' | 'b' <-> 'c' ) & 'c'
+(1 row)
+
+SELECT  '(!a|b) <-> c'::tsquery;
+               tsquery                
+--------------------------------------
+ !( 'a' <-> 'c' ) & 'c' | 'b' <-> 'c'
+(1 row)
+
+SELECT  '(!a&b) <-> c'::tsquery;
                  tsquery                  
 ------------------------------------------
- ( 'a':* & 'nbb':*AC | 'doo':*A ) | 'goo'
+ !( 'a' <-> 'c' ) & 'c' & ( 'b' <-> 'c' )
+(1 row)
+
+SELECT  'c <-> (!a|b)'::tsquery;
+               tsquery                
+--------------------------------------
+ 'c' & !( 'c' <-> 'a' ) | 'c' <-> 'b'
+(1 row)
+
+SELECT  'c <-> (!a&b)'::tsquery;
+                 tsquery                  
+------------------------------------------
+ 'c' & !( 'c' <-> 'a' ) & ( 'c' <-> 'b' )
+(1 row)
+
+SELECT  '(a|b) <-> !c'::tsquery;
+                    tsquery                     
+------------------------------------------------
+ ( 'a' | 'b' ) & !( 'a' <-> 'c' | 'b' <-> 'c' )
+(1 row)
+
+SELECT  '(a&b) <-> !c'::tsquery;
+                      tsquery                       
+----------------------------------------------------
+ 'a' & 'b' & !( ( 'a' <-> 'c' ) & ( 'b' <-> 'c' ) )
+(1 row)
+
+SELECT  '!c <-> (a|b)'::tsquery;
+                     tsquery                     
+-------------------------------------------------
+ !( 'c' <-> 'a' ) & 'a' | !( 'c' <-> 'b' ) & 'b'
+(1 row)
+
+SELECT  '!c <-> (a&b)'::tsquery;
+                     tsquery                     
+-------------------------------------------------
+ !( 'c' <-> 'a' ) & 'a' & !( 'c' <-> 'b' ) & 'b'
 (1 row)
 
+--comparisons
 SELECT 'a' < 'b & c'::tsquery as "true";
  true 
 ------
@@ -342,10 +470,10 @@ SELECT 'a' > 'b & c'::tsquery as "false";
  f
 (1 row)
 
-SELECT 'a | f' < 'b & c'::tsquery as "true";
tru
-------
- t
+SELECT 'a | f' < 'b & c'::tsquery as "false";
fals
+-------
+ f
 (1 row)
 
 SELECT 'a | ff' < 'b & c'::tsquery as "false";
@@ -360,6 +488,7 @@ SELECT 'a | f | g' < 'b & c'::tsquery as "false";
  f
 (1 row)
 
+--concatenation
 SELECT numnode( 'new'::tsquery );
  numnode 
 ---------
@@ -402,6 +531,36 @@ SELECT 'foo & bar'::tsquery && 'asd | fg';
  'foo' & 'bar' & ( 'asd' | 'fg' )
 (1 row)
 
+SELECT 'a' <-> 'b & d'::tsquery;
+             ?column?              
+-----------------------------------
+ ( 'a' <-> 'b' ) & ( 'a' <-> 'd' )
+(1 row)
+
+SELECT 'a & g' <-> 'b & d'::tsquery;
+                               ?column?                                
+-----------------------------------------------------------------------
+ ( 'a' <-> 'b' ) & ( 'g' <-> 'b' ) & ( 'a' <-> 'd' ) & ( 'g' <-> 'd' )
+(1 row)
+
+SELECT 'a & g' <-> 'b | d'::tsquery;
+                               ?column?                                
+-----------------------------------------------------------------------
+ ( 'a' <-> 'b' ) & ( 'g' <-> 'b' ) | ( 'a' <-> 'd' ) & ( 'g' <-> 'd' )
+(1 row)
+
+SELECT 'a & g' <-> 'b <-> d'::tsquery;
+                         ?column?                          
+-----------------------------------------------------------
+ ( 'a' <-> ( 'b' <-> 'd' ) ) & ( 'g' <-> ( 'b' <-> 'd' ) )
+(1 row)
+
+SELECT tsquery_phrase('a <3> g', 'b & d', 10);
+                       tsquery_phrase                        
+-------------------------------------------------------------
+ ( ( 'a' <3> 'g' ) <10> 'b' ) & ( ( 'a' <3> 'g' ) <10> 'd' )
+(1 row)
+
 -- tsvector-tsquery operations
 SELECT 'a b:89  ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca' as "true";
  true 
@@ -499,6 +658,80 @@ SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
  t
 (1 row)
 
+--phrase search
+SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 2' AS "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 3' AS "false";
+ false 
+-------
+ f
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 3' AS "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 11 3') @@ '1 <-> 3' AS "false";
+ false 
+-------
+ f
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 11 3') @@ '1:* <-> 3' AS "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> (2 <-> 3)' AS "false";
+ false 
+-------
+ f
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <2> (2 <-> 3)' AS "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
+ true 
+------
+ t
+(1 row)
+
+--ranking
 SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
   ts_rank  
 -----------
@@ -613,6 +846,120 @@ SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s');
         0.1
 (1 row)
 
+SELECT ts_rank_cd(' a:1 s:2A d g'::tsvector, 'a <-> s');
+ ts_rank_cd 
+------------
+   0.181818
+(1 row)
+
+SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a <-> s');
+ ts_rank_cd 
+------------
+   0.133333
+(1 row)
+
+SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a <-> s');
+ ts_rank_cd 
+------------
+        0.1
+(1 row)
+
+SELECT ts_rank_cd(' a:1 s:2 d:2A g'::tsvector, 'a <-> s');
+ ts_rank_cd 
+------------
+        0.1
+(1 row)
+
+SELECT ts_rank_cd(' a:1 s:2,3A d:2A g'::tsvector, 'a <2> s:A');
+ ts_rank_cd 
+------------
+  0.0909091
+(1 row)
+
+SELECT ts_rank_cd(' a:1 b:2 s:3A d:2A g'::tsvector, 'a <2> s:A');
+ ts_rank_cd 
+------------
+  0.0909091
+(1 row)
+
+SELECT ts_rank_cd(' a:1 sa:2D sb:2A g'::tsvector, 'a <-> s:*');
+ ts_rank_cd 
+------------
+        0.1
+(1 row)
+
+SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:*');
+ ts_rank_cd 
+------------
+        0.1
+(1 row)
+
+SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:A');
+ ts_rank_cd 
+------------
+  0.0714286
+(1 row)
+
+SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:B');
+ ts_rank_cd 
+------------
+          0
+(1 row)
+
+SELECT 'a:1 b:2'::tsvector @@ 'a <-> b'::tsquery AS "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT 'a:1 b:2'::tsvector @@ 'a <0> b'::tsquery AS "false";
+ false 
+-------
+ f
+(1 row)
+
+SELECT 'a:1 b:2'::tsvector @@ 'a <1> b'::tsquery AS "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT 'a:1 b:3'::tsvector @@ 'a <-> b'::tsquery AS "false";
+ false 
+-------
+ f
+(1 row)
+
+SELECT 'a:1 b:3'::tsvector @@ 'a <0> b'::tsquery AS "false";
+ false 
+-------
+ f
+(1 row)
+
+SELECT 'a:1 b:3'::tsvector @@ 'a <1> b'::tsquery AS "false";
+ false 
+-------
+ f
+(1 row)
+
+SELECT 'a:1 b:3'::tsvector @@ 'a <2> b'::tsquery AS "true";
+ true 
+------
+ t
+(1 row)
+
+SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "true";
+ true 
+------
+ t
+(1 row)
+
 -- tsvector editing operations
 SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
      strip     
index d13ce2e3783acf5ce59370056bc95d68deeb48eb..4d0419e35a6d4c51bf31908068a000fd639ad71f 100644 (file)
@@ -142,6 +142,9 @@ SELECT to_tsvector('hunspell_tst', 'Booking the skies after rebookings for footb
 SELECT to_tsquery('hunspell_tst', 'footballklubber');
 SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
 
+SELECT to_tsquery('hunspell_tst', 'footballyklubber:b <-> sky');
+SELECT phraseto_tsquery('hunspell_tst', 'footballyklubber sky');
+
 -- Test ispell dictionary with hunspell affix with FLAG long in configuration
 ALTER TEXT SEARCH CONFIGURATION hunspell_tst ALTER MAPPING
        REPLACE hunspell WITH hunspell_long;
index 405278fb16226d5ab78d7f212a4536beab69be6f..ccd152591a8446383d410b344b35fba7fee95713 100644 (file)
@@ -129,6 +129,52 @@ SELECT plainto_tsquery('english', 'foo bar') || plainto_tsquery('english', 'asd
 SELECT plainto_tsquery('english', 'foo bar') || !!plainto_tsquery('english', 'asd fg');
 SELECT plainto_tsquery('english', 'foo bar') && 'asd | fg';
 
+-- Check stop word deletion, a and s are stop-words
+SELECT to_tsquery('english', '(1 <-> 2) <-> a');
+SELECT to_tsquery('english', '(1 <-> a) <-> 2');
+SELECT to_tsquery('english', '(a <-> 1) <-> 2');
+SELECT to_tsquery('english', 'a <-> (1 <-> 2)');
+SELECT to_tsquery('english', '1 <-> (a <-> 2)');
+SELECT to_tsquery('english', '1 <-> (2 <-> a)');
+
+SELECT to_tsquery('english', '(1 <-> 2) <3> a');
+SELECT to_tsquery('english', '(1 <-> a) <3> 2');
+SELECT to_tsquery('english', '(a <-> 1) <3> 2');
+SELECT to_tsquery('english', 'a <3> (1 <-> 2)');
+SELECT to_tsquery('english', '1 <3> (a <-> 2)');
+SELECT to_tsquery('english', '1 <3> (2 <-> a)');
+
+SELECT to_tsquery('english', '(1 <3> 2) <-> a');
+SELECT to_tsquery('english', '(1 <3> a) <-> 2');
+SELECT to_tsquery('english', '(a <3> 1) <-> 2');
+SELECT to_tsquery('english', 'a <-> (1 <3> 2)');
+SELECT to_tsquery('english', '1 <-> (a <3> 2)');
+SELECT to_tsquery('english', '1 <-> (2 <3> a)');
+
+SELECT to_tsquery('english', '((a <-> 1) <-> 2) <-> s');
+SELECT to_tsquery('english', '(2 <-> (a <-> 1)) <-> s');
+SELECT to_tsquery('english', '((1 <-> a) <-> 2) <-> s');
+SELECT to_tsquery('english', '(2 <-> (1 <-> a)) <-> s');
+SELECT to_tsquery('english', 's <-> ((a <-> 1) <-> 2)');
+SELECT to_tsquery('english', 's <-> (2 <-> (a <-> 1))');
+SELECT to_tsquery('english', 's <-> ((1 <-> a) <-> 2)');
+SELECT to_tsquery('english', 's <-> (2 <-> (1 <-> a))');
+
+SELECT to_tsquery('english', '((a <-> 1) <-> s) <-> 2');
+SELECT to_tsquery('english', '(s <-> (a <-> 1)) <-> 2');
+SELECT to_tsquery('english', '((1 <-> a) <-> s) <-> 2');
+SELECT to_tsquery('english', '(s <-> (1 <-> a)) <-> 2');
+SELECT to_tsquery('english', '2 <-> ((a <-> 1) <-> s)');
+SELECT to_tsquery('english', '2 <-> (s <-> (a <-> 1))');
+SELECT to_tsquery('english', '2 <-> ((1 <-> a) <-> s)');
+SELECT to_tsquery('english', '2 <-> (s <-> (1 <-> a))');
+
+SELECT to_tsquery('foo <-> (a <-> (the <-> bar))');
+SELECT to_tsquery('((foo <-> a) <-> the) <-> bar');
+SELECT to_tsquery('foo <-> a <-> the <-> bar');
+SELECT phraseto_tsquery('PostgreSQL can be extended by the user in many ways');
+
+
 SELECT ts_rank_cd(to_tsvector('english', '
 Day after day, day after day,
   We stuck, nor breath nor motion,
@@ -165,6 +211,18 @@ Water, water, every where,
 S. T. Coleridge (1772-1834)
 '), to_tsquery('english', 'ocean'));
 
+SELECT ts_rank_cd(to_tsvector('english', '
+Day after day, day after day,
+  We stuck, nor breath nor motion,
+As idle as a painted Ship
+  Upon a painted Ocean.
+Water, water, every where
+  And all the boards did shrink;
+Water, water, every where,
+  Nor any drop to drink.
+S. T. Coleridge (1772-1834)
+'), to_tsquery('english', 'painted <-> Ship'));
+
 SELECT ts_rank_cd(strip(to_tsvector('both stripped')),
                   to_tsquery('both & stripped'));
 
@@ -208,6 +266,30 @@ Water, water, every where,
 S. T. Coleridge (1772-1834)
 ', to_tsquery('english', 'ocean'));
 
+SELECT ts_headline('english', '
+Day after day, day after day,
+  We stuck, nor breath nor motion,
+As idle as a painted Ship
+  Upon a painted Ocean.
+Water, water, every where
+  And all the boards did shrink;
+Water, water, every where,
+  Nor any drop to drink.
+S. T. Coleridge (1772-1834)
+', phraseto_tsquery('english', 'painted Ocean'));
+
+SELECT ts_headline('english', '
+Day after day, day after day,
+  We stuck, nor breath nor motion,
+As idle as a painted Ship
+  Upon a painted Ocean.
+Water, water, every where
+  And all the boards did shrink;
+Water, water, every where,
+  Nor any drop to drink.
+S. T. Coleridge (1772-1834)
+', phraseto_tsquery('english', 'idle as a painted Ship'));
+
 SELECT ts_headline('english', '
 <html>
 <!-- some comment -->
@@ -222,6 +304,10 @@ ff-bg
 </html>',
 to_tsquery('english', 'sea&foo'), 'HighlightAll=true');
 
+SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 <-> 3', 'MaxWords=2, MinWords=1');
+SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 & 3', 'MaxWords=4, MinWords=1');
+SELECT ts_headline('simple', '1 2 3 1 3'::text, '1 <-> 3', 'MaxWords=4, MinWords=1');
+
 --Check if headline fragments work
 SELECT ts_headline('english', '
 Day after day, day after day,
@@ -283,6 +369,8 @@ CREATE TABLE test_tsquery (txtkeyword TEXT, txtsample TEXT);
 Moscow moskva | moscow
 'Sanct Peter'  Peterburg | peter | 'Sanct Peterburg'
 'foo bar qq'   foo & (bar | qq) & city
+1 & (2 <-> 3)  2 <-> 4
+5 <-> 6        5 <-> 7
 \.
 \set ECHO all
 
@@ -320,6 +408,11 @@ SELECT ts_rewrite( 'moscow', 'SELECT keyword, sample FROM test_tsquery');
 SELECT ts_rewrite( 'moscow & hotel', 'SELECT keyword, sample FROM test_tsquery');
 SELECT ts_rewrite( 'bar & new & qq & foo & york', 'SELECT keyword, sample FROM test_tsquery');
 
+SELECT ts_rewrite('1 & (2 <-> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
+SELECT ts_rewrite('1 & (2 <2> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
+SELECT ts_rewrite('5 <-> (1 & (2 <-> 3))', 'SELECT keyword, sample FROM test_tsquery'::text );
+SELECT ts_rewrite('5 <-> (6 | 8)', 'SELECT keyword, sample FROM test_tsquery'::text );
+
 
 SELECT keyword FROM test_tsquery WHERE keyword @> 'new';
 SELECT keyword FROM test_tsquery WHERE keyword @> 'moscow';
@@ -386,3 +479,11 @@ select * from pendtest where 'ipsa:*'::tsquery @@ ts;
 select * from pendtest where 'ips:*'::tsquery @@ ts;
 select * from pendtest where 'ipt:*'::tsquery @@ ts;
 select * from pendtest where 'ipi:*'::tsquery @@ ts;
+
+--check OP_PHRASE on index
+create temp table phrase_index_test(fts tsvector);
+insert into phrase_index_test values('A fat cat has just eaten a rat.');
+create index phrase_index_test_idx on phrase_index_test using gin(fts);
+set enable_seqscan = off;
+select * from phrase_index_test where fts @@ phraseto_tsquery('fat cat');
+set enable_seqscan = on;
index 38b7f65c2539a54b9f8e8813700c53a3917baa74..ecc71c85e68efb8a5900ee49777644b74db0e742 100644 (file)
@@ -58,12 +58,42 @@ SELECT E'1&(''2''&('' 4''&(\\|5 | ''6 \\'' !|&'')))'::tsquery;
 SELECT $$'\\as'$$::tsquery;
 SELECT 'a:* & nbb:*ac | doo:a* | goo'::tsquery;
 
+-- phrase transformation
+SELECT 'a <-> (b|c)'::tsquery;
+SELECT '(a|b) <-> c'::tsquery;
+SELECT '(a|b) <-> (d|c)'::tsquery;
+
+SELECT 'a <-> (b&c)'::tsquery;
+SELECT '(a&b) <-> c'::tsquery;
+SELECT '(a&b) <-> (d&c)'::tsquery;
+
+SELECT 'a <-> !b'::tsquery;
+SELECT '!a <-> b'::tsquery;
+SELECT '!a <-> !b'::tsquery;
+
+SELECT 'a <-> !(b&c)'::tsquery;
+SELECT 'a <-> !(b|c)'::tsquery;
+SELECT  '!(a&b) <-> c'::tsquery;
+SELECT  '!(a|b) <-> c'::tsquery;
+
+SELECT  '(!a|b) <-> c'::tsquery;
+SELECT  '(!a&b) <-> c'::tsquery;
+SELECT  'c <-> (!a|b)'::tsquery;
+SELECT  'c <-> (!a&b)'::tsquery;
+
+SELECT  '(a|b) <-> !c'::tsquery;
+SELECT  '(a&b) <-> !c'::tsquery;
+SELECT  '!c <-> (a|b)'::tsquery;
+SELECT  '!c <-> (a&b)'::tsquery;
+
+--comparisons
 SELECT 'a' < 'b & c'::tsquery as "true";
 SELECT 'a' > 'b & c'::tsquery as "false";
-SELECT 'a | f' < 'b & c'::tsquery as "true";
+SELECT 'a | f' < 'b & c'::tsquery as "false";
 SELECT 'a | ff' < 'b & c'::tsquery as "false";
 SELECT 'a | f | g' < 'b & c'::tsquery as "false";
 
+--concatenation
 SELECT numnode( 'new'::tsquery );
 SELECT numnode( 'new & york'::tsquery );
 SELECT numnode( 'new & york | qwery'::tsquery );
@@ -72,6 +102,11 @@ SELECT 'foo & bar'::tsquery && 'asd';
 SELECT 'foo & bar'::tsquery || 'asd & fg';
 SELECT 'foo & bar'::tsquery || !!'asd & fg'::tsquery;
 SELECT 'foo & bar'::tsquery && 'asd | fg';
+SELECT 'a' <-> 'b & d'::tsquery;
+SELECT 'a & g' <-> 'b & d'::tsquery;
+SELECT 'a & g' <-> 'b | d'::tsquery;
+SELECT 'a & g' <-> 'b <-> d'::tsquery;
+SELECT tsquery_phrase('a <3> g', 'b & d', 10);
 
 -- tsvector-tsquery operations
 
@@ -93,6 +128,23 @@ SELECT 'supernova'::tsvector @@ 'super:*'::tsquery AS "true";
 SELECT 'supeanova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
 SELECT 'supeznova supernova'::tsvector @@ 'super:*'::tsquery AS "true";
 
+--phrase search
+SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 2' AS "true";
+SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 2' AS "true";
+SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <-> 3' AS "false";
+SELECT to_tsvector('simple', '1 2 3 1') @@ '1 <2> 3' AS "true";
+
+SELECT to_tsvector('simple', '1 2 11 3') @@ '1 <-> 3' AS "false";
+SELECT to_tsvector('simple', '1 2 11 3') @@ '1:* <-> 3' AS "true";
+
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
+SELECT to_tsvector('simple', '1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> (2 <-> 3)' AS "false";
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <2> (2 <-> 3)' AS "true";
+SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
+SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
+
+--ranking
 SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
 SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s');
 SELECT ts_rank(' a:1 sa:2C d g'::tsvector, 'a | s:*');
@@ -114,6 +166,27 @@ SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a & s');
 SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a & s');
 SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s');
 
+SELECT ts_rank_cd(' a:1 s:2A d g'::tsvector, 'a <-> s');
+SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a <-> s');
+SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a <-> s');
+SELECT ts_rank_cd(' a:1 s:2 d:2A g'::tsvector, 'a <-> s');
+SELECT ts_rank_cd(' a:1 s:2,3A d:2A g'::tsvector, 'a <2> s:A');
+SELECT ts_rank_cd(' a:1 b:2 s:3A d:2A g'::tsvector, 'a <2> s:A');
+SELECT ts_rank_cd(' a:1 sa:2D sb:2A g'::tsvector, 'a <-> s:*');
+SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:*');
+SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:A');
+SELECT ts_rank_cd(' a:1 sa:2A sb:2D g'::tsvector, 'a <-> s:* <-> sa:B');
+
+SELECT 'a:1 b:2'::tsvector @@ 'a <-> b'::tsquery AS "true";
+SELECT 'a:1 b:2'::tsvector @@ 'a <0> b'::tsquery AS "false";
+SELECT 'a:1 b:2'::tsvector @@ 'a <1> b'::tsquery AS "true";
+SELECT 'a:1 b:2'::tsvector @@ 'a <2> b'::tsquery AS "true";
+SELECT 'a:1 b:3'::tsvector @@ 'a <-> b'::tsquery AS "false";
+SELECT 'a:1 b:3'::tsvector @@ 'a <0> b'::tsquery AS "false";
+SELECT 'a:1 b:3'::tsvector @@ 'a <1> b'::tsquery AS "false";
+SELECT 'a:1 b:3'::tsvector @@ 'a <2> b'::tsquery AS "true";
+SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "true";
+
 -- tsvector editing operations
 
 SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);