tsquery
------------------------
'fat' & 'rat' & !'cat'
-
-SELECT '(fat | rat) <-> cat'::tsquery;
- tsquery
------------------------------------
- 'fat' <-> 'cat' | 'rat' <-> 'cat'
</programlisting>
-
- The last example demonstrates that <type>tsquery</type> sometimes
- rearranges nested operators into a logically equivalent formulation.
</para>
<para>
text, any more than a <type>tsvector</type> is. A <type>tsquery</type>
contains search terms, which must be already-normalized lexemes, and
may combine multiple terms using AND, OR, NOT, and FOLLOWED BY operators.
- (For details see <xref linkend="datatype-tsquery">.) There are
+ (For syntax details see <xref linkend="datatype-tsquery">.) There are
functions <function>to_tsquery</>, <function>plainto_tsquery</>,
and <function>phraseto_tsquery</>
that are helpful in converting user-written text into a proper
at least one of its arguments must appear, while the <literal>!</> (NOT)
operator specifies that its argument must <emphasis>not</> appear in
order to have a match.
+ For example, the query <literal>fat & ! rat</> matches documents that
+ contain <literal>fat</> but not <literal>rat</>.
</para>
<para>
then <literal>&</literal>, then <literal><-></literal>,
and <literal>!</literal> most tightly.
</para>
+
+ <para>
+ It's worth noticing that the AND/OR/NOT operators mean something subtly
+ different when they are within the arguments of a FOLLOWED BY operator
+ than when they are not, because within FOLLOWED BY the exact position of
+ the match is significant. For example, normally <literal>!x</> matches
+ only documents that do not contain <literal>x</> anywhere.
+ But <literal>!x <-> y</> matches <literal>y</> if it is not
+ immediately after an <literal>x</>; an occurrence of <literal>x</>
+ elsewhere in the document does not prevent a match. Another example is
+ that <literal>x & y</> normally only requires that <literal>x</>
+ and <literal>y</> both appear somewhere in the document, but
+ <literal>(x & y) <-> z</> requires <literal>x</>
+ and <literal>y</> to match at the same place, immediately before
+ a <literal>z</>. Thus this query behaves differently from
+ <literal>x <-> z & y <-> z</>, which will match a
+ document containing two separate sequences <literal>x z</> and
+ <literal>y z</>. (This specific query is useless as written,
+ since <literal>x</> and <literal>y</> could not match at the same place;
+ but with more complex situations such as prefix-match patterns, a query
+ of this form could be useful.)
+ </para>
</sect2>
<sect2 id="textsearch-intro-configurations">
* Evaluate tsquery boolean expression using ternary logic.
*/
static GinTernaryValue
-TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem)
+TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem, bool in_phrase)
{
GinTernaryValue val1,
val2,
switch (curitem->qoperator.oper)
{
case OP_NOT:
- result = TS_execute_ternary(gcv, curitem + 1);
+ /* In phrase search, always return MAYBE since we lack positions */
+ if (in_phrase)
+ return GIN_MAYBE;
+ result = TS_execute_ternary(gcv, curitem + 1, in_phrase);
if (result == GIN_MAYBE)
return result;
return !result;
case OP_PHRASE:
/*
- * GIN doesn't contain any information about positions, treat
+ * GIN doesn't contain any information about positions, so treat
* OP_PHRASE as OP_AND with recheck requirement
*/
- *gcv->need_recheck = true;
+ *(gcv->need_recheck) = true;
+ /* Pass down in_phrase == true in case there's a NOT below */
+ in_phrase = true;
+
/* FALL THRU */
case OP_AND:
- val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left);
+ val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
+ in_phrase);
if (val1 == GIN_FALSE)
return GIN_FALSE;
- val2 = TS_execute_ternary(gcv, curitem + 1);
+ val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
if (val2 == GIN_FALSE)
return GIN_FALSE;
if (val1 == GIN_TRUE && val2 == GIN_TRUE)
return GIN_MAYBE;
case OP_OR:
- val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left);
+ val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left,
+ in_phrase);
if (val1 == GIN_TRUE)
return GIN_TRUE;
- val2 = TS_execute_ternary(gcv, curitem + 1);
+ val2 = TS_execute_ternary(gcv, curitem + 1, in_phrase);
if (val2 == GIN_TRUE)
return GIN_TRUE;
if (val1 == GIN_FALSE && val2 == GIN_FALSE)
res = TS_execute(GETQUERY(query),
&gcv,
- TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_AS_AND,
+ TS_EXEC_CALC_NOT | TS_EXEC_PHRASE_NO_POS,
checkcondition_gin);
}
gcv.map_item_operand = (int *) (extra_data[0]);
gcv.need_recheck = &recheck;
- res = TS_execute_ternary(&gcv, GETQUERY(query));
+ res = TS_execute_ternary(&gcv, GETQUERY(query), false);
if (res == GIN_TRUE && recheck)
res = GIN_MAYBE;
if (ISALLTRUE(key))
PG_RETURN_BOOL(true);
- PG_RETURN_BOOL(TS_execute(
- GETQUERY(query),
+ /* since signature is lossy, cannot specify CALC_NOT here */
+ PG_RETURN_BOOL(TS_execute(GETQUERY(query),
(void *) GETSIGN(key),
- TS_EXEC_PHRASE_AS_AND,
- checkcondition_bit
- ));
+ TS_EXEC_PHRASE_NO_POS,
+ checkcondition_bit));
}
else
{ /* only leaf pages */
chkval.arrb = GETARR(key);
chkval.arre = chkval.arrb + ARRNELEM(key);
- PG_RETURN_BOOL(TS_execute(
- GETQUERY(query),
+ PG_RETURN_BOOL(TS_execute(GETQUERY(query),
(void *) &chkval,
- TS_EXEC_PHRASE_AS_AND | TS_EXEC_CALC_NOT,
- checkcondition_arr
- ));
+ TS_EXEC_PHRASE_NO_POS | TS_EXEC_CALC_NOT,
+ checkcondition_arr));
}
}
curitem->oper == OP_OR ||
curitem->oper == OP_PHRASE);
- if (curitem->oper == OP_PHRASE)
- *needcleanup = true; /* push OP_PHRASE down later */
-
(*pos)++;
/* process RIGHT argument */
findoprnd_recurse(ptr, pos, nnodes, needcleanup);
+
curitem->left = *pos - tmp; /* set LEFT arg's offset */
/* process LEFT argument */
/*
- * Fills in the left-fields previously left unfilled. The input
- * QueryItems must be in polish (prefix) notation.
+ * Fill in the left-fields previously left unfilled.
+ * The input QueryItems must be in polish (prefix) notation.
+ * Also, set *needcleanup to true if there are any QI_VALSTOP nodes.
*/
static void
findoprnd(QueryItem *ptr, int size, bool *needcleanup)
memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
pfree(state.op);
- /* Set left operand pointers for every operator. */
+ /*
+ * Set left operand pointers for every operator. While we're at it,
+ * detect whether there are any QI_VALSTOP nodes.
+ */
findoprnd(ptr, query->size, &needcleanup);
/*
- * QI_VALSTOP nodes should be cleaned and OP_PHRASE should be pushed
- * down
+ * If there are QI_VALSTOP nodes, delete them and simplify the tree.
*/
if (needcleanup)
- return cleanup_fakeval_and_phrase(query);
+ query = cleanup_tsquery_stopwords(query);
return query;
}
*/
findoprnd(item, size, &needcleanup);
+ /* Can't have found any QI_VALSTOP nodes */
+ Assert(!needcleanup);
+
/* Copy operands to output struct */
for (i = 0; i < size; i++)
{
SET_VARSIZE(query, len + datalen);
- if (needcleanup)
- PG_RETURN_TSQUERY(cleanup_fakeval_and_phrase(query));
-
PG_RETURN_TSQUERY(query);
}
QueryItem *valnode;
} NODE;
-/*
- * To simplify walking on query tree and pushing down of phrase operator
- * we define some fake priority here: phrase operator has highest priority
- * of any other operators (and we believe here that OP_PHRASE is a highest
- * code of operations) and value node has ever highest priority.
- * Priority values of other operations don't matter until they are less than
- * phrase operator and value node.
- */
-#define VALUE_PRIORITY (OP_COUNT + 1)
-#define NODE_PRIORITY(x) \
- ( ((x)->valnode->qoperator.type == QI_OPR) ? \
- (x)->valnode->qoperator.oper : VALUE_PRIORITY )
-
/*
* make query tree from plain view of query
*/
return node;
}
-static NODE *
-copyNODE(NODE *node)
-{
- NODE *cnode = palloc(sizeof(NODE));
-
- /* since this function recurses, it could be driven to stack overflow. */
- check_stack_depth();
-
- cnode->valnode = palloc(sizeof(QueryItem));
- *(cnode->valnode) = *(node->valnode);
-
- if (node->valnode->type == QI_OPR)
- {
- cnode->right = copyNODE(node->right);
- if (node->valnode->qoperator.oper != OP_NOT)
- cnode->left = copyNODE(node->left);
- }
-
- return cnode;
-}
-
-static NODE *
-makeNODE(int8 op, NODE *left, NODE *right)
-{
- NODE *node = palloc(sizeof(NODE));
-
- /* zeroing allocation to prevent difference in unused bytes */
- node->valnode = palloc0(sizeof(QueryItem));
-
- node->valnode->qoperator.type = QI_OPR;
- node->valnode->qoperator.oper = op;
-
- node->left = left;
- node->right = right;
-
- return node;
-}
-
-/*
- * Move operation with high priority to the leaves. This guarantees
- * that the phrase operator will be near the bottom of the tree.
- * An idea behind is do not store position of lexemes during execution
- * of ordinary operations (AND, OR, NOT) because it could be expensive.
- * Actual transformation will be performed only on subtrees under the
- * <-> (<n>) operation since it's needed solely for the phrase operator.
- *
- * Rules:
- * a <-> (b | c) => (a <-> b) | (a <-> c)
- * (a | b) <-> c => (a <-> c) | (b <-> c)
- * a <-> !b => a & !(a <-> b)
- * !a <-> b => b & !(a <-> b)
- *
- * Warnings for readers:
- * a <-> b != b <-> a
- *
- * a <n> (b <n> c) != (a <n> b) <n> c since the phrase lengths are:
- * n 2n-1
- */
-static NODE *
-normalize_phrase_tree(NODE *node)
-{
- /* there should be no stop words at this point */
- Assert(node->valnode->type != QI_VALSTOP);
-
- if (node->valnode->type == QI_VAL)
- return node;
-
- /* since this function recurses, it could be driven to stack overflow. */
- check_stack_depth();
-
- Assert(node->valnode->type == QI_OPR);
-
- if (node->valnode->qoperator.oper == OP_NOT)
- {
- NODE *orignode = node;
-
- /* eliminate NOT sequence */
- while (node->valnode->type == QI_OPR &&
- node->valnode->qoperator.oper == node->right->valnode->qoperator.oper)
- {
- node = node->right->right;
- }
-
- if (orignode != node)
- /* current node isn't checked yet */
- node = normalize_phrase_tree(node);
- else
- node->right = normalize_phrase_tree(node->right);
- }
- else if (node->valnode->qoperator.oper == OP_PHRASE)
- {
- int16 distance;
- NODE *X;
-
- node->left = normalize_phrase_tree(node->left);
- node->right = normalize_phrase_tree(node->right);
-
- /*
- * if subtree contains only nodes with higher "priority" then we are
- * done. See comment near NODE_PRIORITY()
- */
- if (NODE_PRIORITY(node) <= NODE_PRIORITY(node->right) &&
- NODE_PRIORITY(node) <= NODE_PRIORITY(node->left))
- return node;
-
- /*
- * We can't swap left-right and works only with left child because of
- * a <-> b != b <-> a
- */
-
- distance = node->valnode->qoperator.distance;
-
- if (node->right->valnode->type == QI_OPR)
- {
- switch (node->right->valnode->qoperator.oper)
- {
- case OP_AND:
- /* a <-> (b & c) => (a <-> b) & (a <-> c) */
- node = makeNODE(OP_AND,
- makeNODE(OP_PHRASE,
- node->left,
- node->right->left),
- makeNODE(OP_PHRASE,
- copyNODE(node->left),
- node->right->right));
- node->left->valnode->qoperator.distance =
- node->right->valnode->qoperator.distance = distance;
- break;
- case OP_OR:
- /* a <-> (b | c) => (a <-> b) | (a <-> c) */
- node = makeNODE(OP_OR,
- makeNODE(OP_PHRASE,
- node->left,
- node->right->left),
- makeNODE(OP_PHRASE,
- copyNODE(node->left),
- node->right->right));
- node->left->valnode->qoperator.distance =
- node->right->valnode->qoperator.distance = distance;
- break;
- case OP_NOT:
- /* a <-> !b => a & !(a <-> b) */
- X = node->right;
- node->right = node->right->right;
- X->right = node;
- node = makeNODE(OP_AND,
- copyNODE(node->left),
- X);
- break;
- case OP_PHRASE:
- /* no-op */
- break;
- default:
- elog(ERROR, "Wrong type of tsquery node: %d",
- node->right->valnode->qoperator.oper);
- }
- }
-
- if (node->left->valnode->type == QI_OPR &&
- node->valnode->qoperator.oper == OP_PHRASE)
- {
- /*
- * if the node is still OP_PHRASE, check the left subtree,
- * otherwise the whole node will be transformed later.
- */
- switch (node->left->valnode->qoperator.oper)
- {
- case OP_AND:
- /* (a & b) <-> c => (a <-> c) & (b <-> c) */
- node = makeNODE(OP_AND,
- makeNODE(OP_PHRASE,
- node->left->left,
- node->right),
- makeNODE(OP_PHRASE,
- node->left->right,
- copyNODE(node->right)));
- node->left->valnode->qoperator.distance =
- node->right->valnode->qoperator.distance = distance;
- break;
- case OP_OR:
- /* (a | b) <-> c => (a <-> c) | (b <-> c) */
- node = makeNODE(OP_OR,
- makeNODE(OP_PHRASE,
- node->left->left,
- node->right),
- makeNODE(OP_PHRASE,
- node->left->right,
- copyNODE(node->right)));
- node->left->valnode->qoperator.distance =
- node->right->valnode->qoperator.distance = distance;
- break;
- case OP_NOT:
- /* !a <-> b => b & !(a <-> b) */
- X = node->left;
- node->left = node->left->right;
- X->right = node;
- node = makeNODE(OP_AND,
- X,
- copyNODE(node->right));
- break;
- case OP_PHRASE:
- /* no-op */
- break;
- default:
- elog(ERROR, "Wrong type of tsquery node: %d",
- node->left->valnode->qoperator.oper);
- }
- }
-
- /* continue transformation */
- node = normalize_phrase_tree(node);
- }
- else /* AND or OR */
- {
- node->left = normalize_phrase_tree(node->left);
- node->right = normalize_phrase_tree(node->right);
- }
-
- return node;
-}
-
/*
* Number of elements in query tree
*/
return size;
}
+/*
+ * Remove QI_VALSTOP (stopword) nodes from TSQuery.
+ */
TSQuery
-cleanup_fakeval_and_phrase(TSQuery in)
+cleanup_tsquery_stopwords(TSQuery in)
{
int32 len,
lenstr,
return out;
}
- /* push OP_PHRASE nodes down */
- root = normalize_phrase_tree(root);
-
/*
* Build TSQuery from plain view
*/
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
- PG_RETURN_POINTER(query);
+ PG_RETURN_TSQUERY(query);
}
Datum
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
- PG_RETURN_POINTER(cleanup_fakeval_and_phrase(query));
+ PG_RETURN_TSQUERY(query);
}
Datum
*
*-------------------------------------------------------------------------
*/
-
#include "postgres.h"
+#include <limits.h>
+
#include "access/htup_details.h"
#include "catalog/namespace.h"
#include "catalog/pg_type.h"
return res;
}
+/*
+ * Compute output position list for a tsquery operator in phrase mode.
+ *
+ * Merge the position lists in Ldata and Rdata as specified by "emit",
+ * returning the result list into *data. The input position lists must be
+ * sorted and unique, and the output will be as well.
+ *
+ * data: pointer to initially-all-zeroes output struct, or NULL
+ * Ldata, Rdata: input position lists
+ * emit: bitmask of TSPO_XXX flags
+ * Loffset: offset to be added to Ldata positions before comparing/outputting
+ * Roffset: offset to be added to Rdata positions before comparing/outputting
+ * max_npos: maximum possible required size of output position array
+ *
+ * Loffset and Roffset should not be negative, else we risk trying to output
+ * negative positions, which won't fit into WordEntryPos.
+ *
+ * Returns true if any positions were emitted to *data; or if data is NULL,
+ * returns true if any positions would have been emitted.
+ */
+#define TSPO_L_ONLY 0x01 /* emit positions appearing only in L */
+#define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */
+#define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */
+
+static bool
+TS_phrase_output(ExecPhraseData *data,
+ ExecPhraseData *Ldata,
+ ExecPhraseData *Rdata,
+ int emit,
+ int Loffset,
+ int Roffset,
+ int max_npos)
+{
+ int Lindex,
+ Rindex;
+
+ /* Loop until both inputs are exhausted */
+ Lindex = Rindex = 0;
+ while (Lindex < Ldata->npos || Rindex < Rdata->npos)
+ {
+ int Lpos,
+ Rpos;
+ int output_pos = 0;
+
+ /*
+ * Fetch current values to compare. WEP_GETPOS() is needed because
+ * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
+ */
+ if (Lindex < Ldata->npos)
+ Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
+ else
+ {
+ /* L array exhausted, so we're done if R_ONLY isn't set */
+ if (!(emit & TSPO_R_ONLY))
+ break;
+ Lpos = INT_MAX;
+ }
+ if (Rindex < Rdata->npos)
+ Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
+ else
+ {
+ /* R array exhausted, so we're done if L_ONLY isn't set */
+ if (!(emit & TSPO_L_ONLY))
+ break;
+ Rpos = INT_MAX;
+ }
+
+ /* Merge-join the two input lists */
+ if (Lpos < Rpos)
+ {
+ /* Lpos is not matched in Rdata, should we output it? */
+ if (emit & TSPO_L_ONLY)
+ output_pos = Lpos;
+ Lindex++;
+ }
+ else if (Lpos == Rpos)
+ {
+ /* Lpos and Rpos match ... should we output it? */
+ if (emit & TSPO_BOTH)
+ output_pos = Rpos;
+ Lindex++;
+ Rindex++;
+ }
+ else /* Lpos > Rpos */
+ {
+ /* Rpos is not matched in Ldata, should we output it? */
+ if (emit & TSPO_R_ONLY)
+ output_pos = Rpos;
+ Rindex++;
+ }
+
+ if (output_pos > 0)
+ {
+ if (data)
+ {
+ /* Store position, first allocating output array if needed */
+ if (data->pos == NULL)
+ {
+ data->pos = (WordEntryPos *)
+ palloc(max_npos * sizeof(WordEntryPos));
+ data->allocated = true;
+ }
+ data->pos[data->npos++] = output_pos;
+ }
+ else
+ {
+ /*
+ * Exact positions not needed, so return true as soon as we
+ * know there is at least one.
+ */
+ return true;
+ }
+ }
+ }
+
+ if (data && data->npos > 0)
+ {
+ /* Let's assert we didn't overrun the array */
+ Assert(data->npos <= max_npos);
+ return true;
+ }
+ return false;
+}
+
/*
* Execute tsquery at or below an OP_PHRASE operator.
*
- * This handles the recursion at levels where we need to care about
- * match locations. In addition to the same arguments used for TS_execute,
- * the caller may pass a preinitialized-to-zeroes ExecPhraseData struct to
- * be filled with lexeme match positions on success. data == NULL if no
- * match data need be returned. (In practice, outside callers pass NULL,
- * and only the internal recursion cases pass a data pointer.)
+ * This handles tsquery execution at recursion levels where we need to care
+ * about match locations.
+ *
+ * In addition to the same arguments used for TS_execute, the caller may pass
+ * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
+ * match position info on success. data == NULL if no position data need be
+ * returned. (In practice, outside callers pass NULL, and only the internal
+ * recursion cases pass a data pointer.)
+ * Note: the function assumes data != NULL for operators other than OP_PHRASE.
+ * This is OK because an outside call always starts from an OP_PHRASE node.
+ *
+ * The detailed semantics of the match data, given that the function returned
+ * "true" (successful match, or possible match), are:
+ *
+ * npos > 0, negate = false:
+ * query is matched at specified position(s) (and only those positions)
+ * npos > 0, negate = true:
+ * query is matched at all positions *except* specified position(s)
+ * npos = 0, negate = false:
+ * query is possibly matched, matching position(s) are unknown
+ * (this should only be returned when TS_EXEC_PHRASE_NO_POS flag is set)
+ * npos = 0, negate = true:
+ * query is matched at all positions
+ *
+ * Successful matches also return a "width" value which is the match width in
+ * lexemes, less one. Hence, "width" is zero for simple one-lexeme matches,
+ * and is the sum of the phrase operator distances for phrase matches. Note
+ * that when width > 0, the listed positions represent the ends of matches not
+ * the starts. (This unintuitive rule is needed to avoid possibly generating
+ * negative positions, which wouldn't fit into the WordEntryPos arrays.)
+ *
+ * When the function returns "false" (no match), it must return npos = 0,
+ * negate = false (which is the state initialized by the caller); but the
+ * "width" output in such cases is undefined.
*/
static bool
TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
- ExecPhraseData *data,
- TSExecuteCallback chkcond)
+ TSExecuteCallback chkcond,
+ ExecPhraseData *data)
{
+ ExecPhraseData Ldata,
+ Rdata;
+ bool lmatch,
+ rmatch;
+ int Loffset,
+ Roffset,
+ maxwidth;
+
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
if (curitem->type == QI_VAL)
- {
return chkcond(arg, (QueryOperand *) curitem, data);
- }
- else
+
+ switch (curitem->qoperator.oper)
{
- ExecPhraseData Ldata = {0, false, NULL},
- Rdata = {0, false, NULL};
- WordEntryPos *Lpos,
- *LposStart,
- *Rpos,
- *pos_iter = NULL;
+ case OP_NOT:
- Assert(curitem->qoperator.oper == OP_PHRASE);
+ /*
+ * Because a "true" result with no specific positions is taken as
+ * uncertain, we need no special care here for !TS_EXEC_CALC_NOT.
+ * If it's a false positive, the right things happen anyway.
+ *
+ * Also, we need not touch data->width, since a NOT operation does
+ * not change the match width.
+ */
+ if (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
+ {
+ if (data->npos > 0)
+ {
+ /* we have some positions, invert negate flag */
+ data->negate = !data->negate;
+ return true;
+ }
+ else if (data->negate)
+ {
+ /* change "match everywhere" to "match nowhere" */
+ data->negate = false;
+ return false;
+ }
+ /* match positions are, and remain, uncertain */
+ return true;
+ }
+ else
+ {
+ /* change "match nowhere" to "match everywhere" */
+ Assert(data->npos == 0 && !data->negate);
+ data->negate = true;
+ return true;
+ }
- if (!TS_phrase_execute(curitem + curitem->qoperator.left,
- arg, flags, &Ldata, chkcond))
- return false;
+ case OP_PHRASE:
+ case OP_AND:
+ memset(&Ldata, 0, sizeof(Ldata));
+ memset(&Rdata, 0, sizeof(Rdata));
- if (!TS_phrase_execute(curitem + 1, arg, flags, &Rdata, chkcond))
- return false;
+ if (!TS_phrase_execute(curitem + curitem->qoperator.left,
+ arg, flags, chkcond, &Ldata))
+ return false;
- /*
- * If either operand has no position information, then we normally
- * return false. But if TS_EXEC_PHRASE_AS_AND flag is set then we
- * return true, treating OP_PHRASE as if it were OP_AND.
- */
- if (Ldata.npos == 0 || Rdata.npos == 0)
- return (flags & TS_EXEC_PHRASE_AS_AND) ? true : false;
+ if (!TS_phrase_execute(curitem + 1,
+ arg, flags, chkcond, &Rdata))
+ return false;
- /*
- * Prepare output position array if needed.
- */
- if (data)
- {
/*
- * We can recycle the righthand operand's result array if it was
- * palloc'd, else must allocate our own. The number of matches
- * couldn't be more than the smaller of the two operands' matches.
+ * If either operand has no position information, then we can't
+ * return position data, only a "possible match" result. "Possible
+ * match" answers are only wanted when TS_EXEC_PHRASE_NO_POS flag
+ * is set, otherwise return false.
*/
- if (!Rdata.allocated)
- data->pos = palloc(sizeof(WordEntryPos) * Min(Ldata.npos, Rdata.npos));
- else
- data->pos = Rdata.pos;
+ if ((Ldata.npos == 0 && !Ldata.negate) ||
+ (Rdata.npos == 0 && !Rdata.negate))
+ return (flags & TS_EXEC_PHRASE_NO_POS) ? true : false;
- data->allocated = true;
- data->npos = 0;
- pos_iter = data->pos;
- }
+ if (curitem->qoperator.oper == OP_PHRASE)
+ {
+ /*
+ * Compute Loffset and Roffset suitable for phrase match, and
+ * compute overall width of whole phrase match.
+ */
+ Loffset = curitem->qoperator.distance + Rdata.width;
+ Roffset = 0;
+ if (data)
+ data->width = curitem->qoperator.distance +
+ Ldata.width + Rdata.width;
+ }
+ else
+ {
+ /*
+ * For OP_AND, set output width and alignment like OP_OR (see
+ * comment below)
+ */
+ maxwidth = Max(Ldata.width, Rdata.width);
+ Loffset = maxwidth - Ldata.width;
+ Roffset = maxwidth - Rdata.width;
+ if (data)
+ data->width = maxwidth;
+ }
- /*
- * Find matches by distance. WEP_GETPOS() is needed because
- * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
- *
- * Note that the output positions are those of the matching RIGHT
- * operands.
- */
- Rpos = Rdata.pos;
- LposStart = Ldata.pos;
- while (Rpos < Rdata.pos + Rdata.npos)
- {
- /*
- * We need to check all possible distances, so reset Lpos to
- * guaranteed not yet satisfied position.
- */
- Lpos = LposStart;
- while (Lpos < Ldata.pos + Ldata.npos)
+ if (Ldata.negate && Rdata.negate)
{
- if (WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) ==
- curitem->qoperator.distance)
- {
- /* MATCH! */
- if (data)
- {
- /* Store position for upper phrase operator */
- *pos_iter = WEP_GETPOS(*Rpos);
- pos_iter++;
-
- /*
- * Set left start position to next, because current
- * one could not satisfy distance for any other right
- * position
- */
- LposStart = Lpos + 1;
- break;
- }
- else
- {
- /*
- * We are at the root of the phrase tree and hence we
- * don't have to identify all the match positions.
- * Just report success.
- */
- return true;
- }
+ /* !L & !R: treat as !(L | R) */
+ (void) TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
+ Loffset, Roffset,
+ Ldata.npos + Rdata.npos);
+ if (data)
+ data->negate = true;
+ return true;
+ }
+ else if (Ldata.negate)
+ {
+ /* !L & R */
+ return TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_R_ONLY,
+ Loffset, Roffset,
+ Rdata.npos);
+ }
+ else if (Rdata.negate)
+ {
+ /* L & !R */
+ return TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_L_ONLY,
+ Loffset, Roffset,
+ Ldata.npos);
+ }
+ else
+ {
+ /* straight AND */
+ return TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_BOTH,
+ Loffset, Roffset,
+ Min(Ldata.npos, Rdata.npos));
+ }
- }
- else if (WEP_GETPOS(*Rpos) <= WEP_GETPOS(*Lpos) ||
- WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) <
- curitem->qoperator.distance)
- {
- /*
- * Go to the next Rpos, because Lpos is ahead or on less
- * distance than required by current operator
- */
- break;
+ case OP_OR:
+ memset(&Ldata, 0, sizeof(Ldata));
+ memset(&Rdata, 0, sizeof(Rdata));
- }
+ lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
+ arg, flags, chkcond, &Ldata);
+ rmatch = TS_phrase_execute(curitem + 1,
+ arg, flags, chkcond, &Rdata);
- Lpos++;
- }
+ if (!lmatch && !rmatch)
+ return false;
- Rpos++;
- }
+ /*
+ * If a valid operand has no position information, then we can't
+ * return position data, only a "possible match" result. "Possible
+ * match" answers are only wanted when TS_EXEC_PHRASE_NO_POS flag
+ * is set, otherwise return false.
+ */
+ if ((lmatch && Ldata.npos == 0 && !Ldata.negate) ||
+ (rmatch && Rdata.npos == 0 && !Rdata.negate))
+ return (flags & TS_EXEC_PHRASE_NO_POS) ? true : false;
- if (data)
- {
- data->npos = pos_iter - data->pos;
+ /*
+ * Cope with undefined output width from failed submatch. (This
+ * takes less code than trying to ensure that all failure returns
+ * set data->width to zero.)
+ */
+ if (!lmatch)
+ Ldata.width = 0;
+ if (!rmatch)
+ Rdata.width = 0;
- if (data->npos > 0)
+ /*
+ * For OP_AND and OP_OR, report the width of the wider of the two
+ * inputs, and align the narrower input's positions to the right
+ * end of that width. This rule deals at least somewhat
+ * reasonably with cases like "x <-> (y | z <-> q)".
+ */
+ maxwidth = Max(Ldata.width, Rdata.width);
+ Loffset = maxwidth - Ldata.width;
+ Roffset = maxwidth - Rdata.width;
+ data->width = maxwidth;
+
+ if (Ldata.negate && Rdata.negate)
+ {
+ /* !L | !R: treat as !(L & R) */
+ (void) TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_BOTH,
+ Loffset, Roffset,
+ Min(Ldata.npos, Rdata.npos));
+ data->negate = true;
return true;
- }
+ }
+ else if (Ldata.negate)
+ {
+ /* !L | R: treat as !(L & !R) */
+ (void) TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_L_ONLY,
+ Loffset, Roffset,
+ Ldata.npos);
+ data->negate = true;
+ return true;
+ }
+ else if (Rdata.negate)
+ {
+ /* L | !R: treat as !(!L & R) */
+ (void) TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_R_ONLY,
+ Loffset, Roffset,
+ Rdata.npos);
+ data->negate = true;
+ return true;
+ }
+ else
+ {
+ /* straight OR */
+ return TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
+ Loffset, Roffset,
+ Ldata.npos + Rdata.npos);
+ }
+
+ default:
+ elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
}
+ /* not reachable, but keep compiler quiet */
return false;
}
return TS_execute(curitem + 1, arg, flags, chkcond);
case OP_PHRASE:
-
- /*
- * do not check TS_EXEC_PHRASE_AS_AND here because chkcond() could
- * do something more if it's called from TS_phrase_execute()
- */
- return TS_phrase_execute(curitem, arg, flags, NULL, chkcond);
+ return TS_phrase_execute(curitem, arg, flags, chkcond, NULL);
default:
elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
* struct ExecPhraseData is passed to a TSExecuteCallback function if we need
* lexeme position data (because of a phrase-match operator in the tsquery).
* The callback should fill in position data when it returns true (success).
- * If it cannot return position data, it may ignore its "data" argument, but
- * then the caller of TS_execute() must pass the TS_EXEC_PHRASE_AS_AND flag
+ * If it cannot return position data, it may leave "data" unchanged, but
+ * then the caller of TS_execute() must pass the TS_EXEC_PHRASE_NO_POS flag
* and must arrange for a later recheck with position data available.
*
* The reported lexeme positions must be sorted and unique. Callers must only
* portion of a tsvector value. If "allocated" is true then the pos array
* is palloc'd workspace and caller may free it when done.
*
+ * "negate" means that the pos array contains positions where the query does
+ * not match, rather than positions where it does. "width" is positive when
+ * the match is wider than one lexeme. Neither of these fields normally need
+ * to be touched by TSExecuteCallback functions; they are used for
+ * phrase-search processing within TS_execute.
+ *
* All fields of the ExecPhraseData struct are initially zeroed by caller.
*/
typedef struct ExecPhraseData
{
int npos; /* number of positions reported */
bool allocated; /* pos points to palloc'd data? */
+ bool negate; /* positions are where query is NOT matched */
WordEntryPos *pos; /* ordered, non-duplicate lexeme positions */
+ int width; /* width of match in lexemes, less 1 */
} ExecPhraseData;
/*
* val: lexeme to test for presence of
* data: to be filled with lexeme positions; NULL if position data not needed
*
- * Return TRUE if lexeme is present in data, else FALSE
+ * Return TRUE if lexeme is present in data, else FALSE. If data is not
+ * NULL, it should be filled with lexeme positions, but function can leave
+ * it as zeroes if position data is not available.
*/
typedef bool (*TSExecuteCallback) (void *arg, QueryOperand *val,
ExecPhraseData *data);
/*
* If TS_EXEC_CALC_NOT is not set, then NOT expressions are automatically
* evaluated to be true. Useful in cases where NOT cannot be accurately
- * computed (GiST) or it isn't important (ranking).
+ * computed (GiST) or it isn't important (ranking). From TS_execute's
+ * perspective, !CALC_NOT means that the TSExecuteCallback function might
+ * return false-positive indications of a lexeme's presence.
*/
#define TS_EXEC_CALC_NOT (0x01)
/*
- * Treat OP_PHRASE as OP_AND. Used when positional information is not
- * accessible, like in consistent methods of GIN/GiST indexes; rechecking
- * must occur later.
+ * If TS_EXEC_PHRASE_NO_POS is set, allow OP_PHRASE to be executed lossily
+ * in the absence of position information: a TRUE result indicates that the
+ * phrase might be present. Without this flag, OP_PHRASE always returns
+ * false if lexeme position information is not available.
*/
-#define TS_EXEC_PHRASE_AS_AND (0x02)
+#define TS_EXEC_PHRASE_NO_POS (0x02)
+/* Obsolete spelling of TS_EXEC_PHRASE_NO_POS: */
+#define TS_EXEC_PHRASE_AS_AND TS_EXEC_PHRASE_NO_POS
extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags,
TSExecuteCallback chkcond);
* TSQuery Utilities
*/
extern QueryItem *clean_NOT(QueryItem *ptr, int32 *len);
-extern TSQuery cleanup_fakeval_and_phrase(TSQuery in);
+extern TSQuery cleanup_tsquery_stopwords(TSQuery in);
typedef struct QTNode
{
(1 row)
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b <-> sky');
- to_tsquery
------------------------------------------------------------------
- 'foot':B <-> 'sky' & 'ball':B <-> 'sky' & 'klubber':B <-> 'sky'
+ to_tsquery
+-------------------------------------------------
+ ( 'foot':B & 'ball':B & 'klubber':B ) <-> 'sky'
(1 row)
SELECT phraseto_tsquery('hunspell_tst', 'footballyklubber sky');
- phraseto_tsquery
------------------------------------------------------------
- 'foot' <-> 'sky' & 'ball' <-> 'sky' & 'klubber' <-> 'sky'
+ phraseto_tsquery
+-------------------------------------------
+ ( 'foot' & 'ball' & 'klubber' ) <-> 'sky'
(1 row)
-- Test ispell dictionary with hunspell affix with FLAG long in configuration
-- Check stop word deletion, a and s are stop-words
SELECT to_tsquery('english', '!(a & !b) & c');
- to_tsquery
-------------
- 'b' & 'c'
+ to_tsquery
+-------------
+ !!'b' & 'c'
(1 row)
SELECT to_tsquery('english', '!(a & !b)');
to_tsquery
------------
- 'b'
+ !!'b'
(1 row)
SELECT to_tsquery('english', '(1 <-> 2) <-> a');
(1 row)
SELECT ts_rewrite('5 <-> (1 & (2 <-> 3))', 'SELECT keyword, sample FROM test_tsquery'::text );
- ts_rewrite
----------------------------------------
- '5' <-> '1' & '5' <-> ( '2' <-> '3' )
+ ts_rewrite
+-------------------------
+ '5' <-> ( '2' <-> '4' )
(1 row)
SELECT ts_rewrite('5 <-> (6 | 8)', 'SELECT keyword, sample FROM test_tsquery'::text );
- ts_rewrite
----------------------------
- '5' <-> '7' | '5' <-> '8'
+ ts_rewrite
+-----------------------
+ '5' <-> ( '6' | '8' )
(1 row)
-- Check empty substitution
'citi' & 'foo' & ( 'bar' | 'qq' ) & ( 'nyc' | 'big' & 'appl' | 'new' & 'york' )
(1 row)
+SELECT ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
+ ts_rewrite
+-----------------------------------------
+ ( 'bar' | 'baz' ) <-> ( 'bar' | 'baz' )
+(1 row)
+
+SELECT to_tsvector('foo bar') @@
+ ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
+ ?column?
+----------
+ f
+(1 row)
+
+SELECT to_tsvector('bar baz') @@
+ ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
+ ?column?
+----------
+ t
+(1 row)
+
RESET enable_seqscan;
--test GUC
SET default_text_search_config=simple;
!!'a' & !!'b'
(1 row)
--- phrase transformation
-SELECT 'a <-> (b|c)'::tsquery;
- tsquery
----------------------------
- 'a' <-> 'b' | 'a' <-> 'c'
-(1 row)
-
-SELECT '(a|b) <-> c'::tsquery;
- tsquery
----------------------------
- 'a' <-> 'c' | 'b' <-> 'c'
-(1 row)
-
-SELECT '(a|b) <-> (d|c)'::tsquery;
- tsquery
--------------------------------------------------------
- 'a' <-> 'd' | 'b' <-> 'd' | 'a' <-> 'c' | 'b' <-> 'c'
-(1 row)
-
-SELECT 'a <-> (b&c)'::tsquery;
- tsquery
----------------------------
- 'a' <-> 'b' & 'a' <-> 'c'
-(1 row)
-
-SELECT '(a&b) <-> c'::tsquery;
- tsquery
----------------------------
- 'a' <-> 'c' & 'b' <-> 'c'
-(1 row)
-
-SELECT '(a&b) <-> (d&c)'::tsquery;
- tsquery
--------------------------------------------------------
- 'a' <-> 'd' & 'b' <-> 'd' & 'a' <-> 'c' & 'b' <-> 'c'
-(1 row)
-
-SELECT 'a <-> !b'::tsquery;
- tsquery
-------------------------
- 'a' & !( 'a' <-> 'b' )
-(1 row)
-
-SELECT '!a <-> b'::tsquery;
- tsquery
-------------------------
- !( 'a' <-> 'b' ) & 'b'
-(1 row)
-
-SELECT '!a <-> !b'::tsquery;
- tsquery
-------------------------------------
- !'a' & !( !( 'a' <-> 'b' ) & 'b' )
-(1 row)
-
-SELECT 'a <-> !(b&c)'::tsquery;
- tsquery
---------------------------------------
- 'a' & !( 'a' <-> 'b' & 'a' <-> 'c' )
-(1 row)
-
-SELECT 'a <-> !(b|c)'::tsquery;
- tsquery
---------------------------------------
- 'a' & !( 'a' <-> 'b' | 'a' <-> 'c' )
-(1 row)
-
-SELECT '!(a&b) <-> c'::tsquery;
- tsquery
---------------------------------------
- !( 'a' <-> 'c' & 'b' <-> 'c' ) & 'c'
-(1 row)
-
-SELECT '!(a|b) <-> c'::tsquery;
- tsquery
---------------------------------------
- !( 'a' <-> 'c' | 'b' <-> 'c' ) & 'c'
-(1 row)
-
-SELECT '(!a|b) <-> c'::tsquery;
- tsquery
---------------------------------------
- !( 'a' <-> 'c' ) & 'c' | 'b' <-> 'c'
-(1 row)
-
-SELECT '(!a&b) <-> c'::tsquery;
- tsquery
---------------------------------------
- !( 'a' <-> 'c' ) & 'c' & 'b' <-> 'c'
-(1 row)
-
-SELECT 'c <-> (!a|b)'::tsquery;
- tsquery
---------------------------------------
- 'c' & !( 'c' <-> 'a' ) | 'c' <-> 'b'
-(1 row)
-
-SELECT 'c <-> (!a&b)'::tsquery;
- tsquery
---------------------------------------
- 'c' & !( 'c' <-> 'a' ) & 'c' <-> 'b'
-(1 row)
-
-SELECT '(a|b) <-> !c'::tsquery;
- tsquery
-------------------------------------------------
- ( 'a' | 'b' ) & !( 'a' <-> 'c' | 'b' <-> 'c' )
-(1 row)
-
-SELECT '(a&b) <-> !c'::tsquery;
- tsquery
---------------------------------------------
- 'a' & 'b' & !( 'a' <-> 'c' & 'b' <-> 'c' )
-(1 row)
-
-SELECT '!c <-> (a|b)'::tsquery;
- tsquery
--------------------------------------------------
- !( 'c' <-> 'a' ) & 'a' | !( 'c' <-> 'b' ) & 'b'
-(1 row)
-
-SELECT '!c <-> (a&b)'::tsquery;
- tsquery
--------------------------------------------------
- !( 'c' <-> 'a' ) & 'a' & !( 'c' <-> 'b' ) & 'b'
-(1 row)
-
--comparisons
SELECT 'a' < 'b & c'::tsquery as "true";
true
(1 row)
SELECT 'a' <-> 'b & d'::tsquery;
- ?column?
----------------------------
- 'a' <-> 'b' & 'a' <-> 'd'
+ ?column?
+-----------------------
+ 'a' <-> ( 'b' & 'd' )
(1 row)
SELECT 'a & g' <-> 'b & d'::tsquery;
- ?column?
--------------------------------------------------------
- 'a' <-> 'b' & 'g' <-> 'b' & 'a' <-> 'd' & 'g' <-> 'd'
+ ?column?
+---------------------------------
+ ( 'a' & 'g' ) <-> ( 'b' & 'd' )
(1 row)
SELECT 'a & g' <-> 'b | d'::tsquery;
- ?column?
--------------------------------------------------------
- 'a' <-> 'b' & 'g' <-> 'b' | 'a' <-> 'd' & 'g' <-> 'd'
+ ?column?
+---------------------------------
+ ( 'a' & 'g' ) <-> ( 'b' | 'd' )
(1 row)
SELECT 'a & g' <-> 'b <-> d'::tsquery;
- ?column?
----------------------------------------------------
- 'a' <-> ( 'b' <-> 'd' ) & 'g' <-> ( 'b' <-> 'd' )
+ ?column?
+-----------------------------------
+ ( 'a' & 'g' ) <-> ( 'b' <-> 'd' )
(1 row)
SELECT tsquery_phrase('a <3> g', 'b & d', 10);
- tsquery_phrase
----------------------------------------------
- 'a' <3> 'g' <10> 'b' & 'a' <3> 'g' <10> 'd'
+ tsquery_phrase
+--------------------------------
+ 'a' <3> 'g' <10> ( 'b' & 'd' )
(1 row)
-- tsvector-tsquery operations
t
(1 row)
-SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> (2 <-> 3)' AS "false";
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> (2 <-> 3)' AS "true";
+ true
+------
+ t
+(1 row)
+
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <2> (2 <-> 3)' AS "false";
false
-------
f
(1 row)
-SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <2> (2 <-> 3)' AS "true";
+SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
true
------
t
(1 row)
-SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
+SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
true
------
t
(1 row)
-SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
+-- without position data, phrase search does not match
+SELECT strip(to_tsvector('simple', '1 2 3 4')) @@ '1 <-> 2 <-> 3' AS "false";
+ false
+-------
+ f
+(1 row)
+
+select to_tsvector('simple', 'q x q y') @@ 'q <-> (x & y)' AS "false";
+ false
+-------
+ f
+(1 row)
+
+select to_tsvector('simple', 'q x') @@ 'q <-> (x | y <-> z)' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'q y') @@ 'q <-> (x | y <-> z)' AS "false";
+ false
+-------
+ f
+(1 row)
+
+select to_tsvector('simple', 'q y z') @@ 'q <-> (x | y <-> z)' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'q y x') @@ 'q <-> (x | y <-> z)' AS "false";
+ false
+-------
+ f
+(1 row)
+
+select to_tsvector('simple', 'q x y') @@ 'q <-> (x | y <-> z)' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'q x') @@ '(x | y <-> z) <-> q' AS "false";
+ false
+-------
+ f
+(1 row)
+
+select to_tsvector('simple', 'x q') @@ '(x | y <-> z) <-> q' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'x y q') @@ '(x | y <-> z) <-> q' AS "false";
+ false
+-------
+ f
+(1 row)
+
+select to_tsvector('simple', 'x y z') @@ '(x | y <-> z) <-> q' AS "false";
+ false
+-------
+ f
+(1 row)
+
+select to_tsvector('simple', 'x y z q') @@ '(x | y <-> z) <-> q' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'y z q') @@ '(x | y <-> z) <-> q' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'y y q') @@ '(x | y <-> z) <-> q' AS "false";
+ false
+-------
+ f
+(1 row)
+
+select to_tsvector('simple', 'y y q') @@ '(!x | y <-> z) <-> q' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'x y q') @@ '(!x | y <-> z) <-> q' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'y y q') @@ '(x | y <-> !z) <-> q' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'x q') @@ '(x | y <-> !z) <-> q' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'x q') @@ '(!x | y <-> z) <-> q' AS "false";
+ false
+-------
+ f
+(1 row)
+
+select to_tsvector('simple', 'z q') @@ '(!x | y <-> z) <-> q' AS "true";
+ true
+------
+ t
+(1 row)
+
+select to_tsvector('simple', 'x y q y') @@ '!x <-> y' AS "true";
true
------
t
f
(1 row)
+SELECT 'a:1 b:3'::tsvector @@ 'a <0> a:*'::tsquery AS "true";
+ true
+------
+ t
+(1 row)
+
-- tsvector editing operations
SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
strip
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'moscow & hotel') AS query;
SELECT ts_rewrite( query, 'SELECT keyword, sample FROM test_tsquery' ) FROM to_tsquery('english', 'bar & new & qq & foo & york') AS query;
+SELECT ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
+SELECT to_tsvector('foo bar') @@
+ ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
+SELECT to_tsvector('bar baz') @@
+ ts_rewrite(tsquery_phrase('foo', 'foo'), 'foo', 'bar | baz');
+
RESET enable_seqscan;
--test GUC
SELECT '!!a & b'::tsquery;
SELECT '!!a & !!b'::tsquery;
--- phrase transformation
-SELECT 'a <-> (b|c)'::tsquery;
-SELECT '(a|b) <-> c'::tsquery;
-SELECT '(a|b) <-> (d|c)'::tsquery;
-
-SELECT 'a <-> (b&c)'::tsquery;
-SELECT '(a&b) <-> c'::tsquery;
-SELECT '(a&b) <-> (d&c)'::tsquery;
-
-SELECT 'a <-> !b'::tsquery;
-SELECT '!a <-> b'::tsquery;
-SELECT '!a <-> !b'::tsquery;
-
-SELECT 'a <-> !(b&c)'::tsquery;
-SELECT 'a <-> !(b|c)'::tsquery;
-SELECT '!(a&b) <-> c'::tsquery;
-SELECT '!(a|b) <-> c'::tsquery;
-
-SELECT '(!a|b) <-> c'::tsquery;
-SELECT '(!a&b) <-> c'::tsquery;
-SELECT 'c <-> (!a|b)'::tsquery;
-SELECT 'c <-> (!a&b)'::tsquery;
-
-SELECT '(a|b) <-> !c'::tsquery;
-SELECT '(a&b) <-> !c'::tsquery;
-SELECT '!c <-> (a|b)'::tsquery;
-SELECT '!c <-> (a&b)'::tsquery;
-
--comparisons
SELECT 'a' < 'b & c'::tsquery as "true";
SELECT 'a' > 'b & c'::tsquery as "false";
SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
SELECT to_tsvector('simple', '1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
-SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> (2 <-> 3)' AS "false";
-SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <2> (2 <-> 3)' AS "true";
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <-> (2 <-> 3)' AS "true";
+SELECT to_tsvector('simple', '1 2 3 4') @@ '1 <2> (2 <-> 3)' AS "false";
SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '(1 <-> 2) <-> 3' AS "true";
SELECT to_tsvector('simple', '1 2 1 2 3 4') @@ '1 <-> 2 <-> 3' AS "true";
+-- without position data, phrase search does not match
+SELECT strip(to_tsvector('simple', '1 2 3 4')) @@ '1 <-> 2 <-> 3' AS "false";
+
+select to_tsvector('simple', 'q x q y') @@ 'q <-> (x & y)' AS "false";
+select to_tsvector('simple', 'q x') @@ 'q <-> (x | y <-> z)' AS "true";
+select to_tsvector('simple', 'q y') @@ 'q <-> (x | y <-> z)' AS "false";
+select to_tsvector('simple', 'q y z') @@ 'q <-> (x | y <-> z)' AS "true";
+select to_tsvector('simple', 'q y x') @@ 'q <-> (x | y <-> z)' AS "false";
+select to_tsvector('simple', 'q x y') @@ 'q <-> (x | y <-> z)' AS "true";
+select to_tsvector('simple', 'q x') @@ '(x | y <-> z) <-> q' AS "false";
+select to_tsvector('simple', 'x q') @@ '(x | y <-> z) <-> q' AS "true";
+select to_tsvector('simple', 'x y q') @@ '(x | y <-> z) <-> q' AS "false";
+select to_tsvector('simple', 'x y z') @@ '(x | y <-> z) <-> q' AS "false";
+select to_tsvector('simple', 'x y z q') @@ '(x | y <-> z) <-> q' AS "true";
+select to_tsvector('simple', 'y z q') @@ '(x | y <-> z) <-> q' AS "true";
+select to_tsvector('simple', 'y y q') @@ '(x | y <-> z) <-> q' AS "false";
+select to_tsvector('simple', 'y y q') @@ '(!x | y <-> z) <-> q' AS "true";
+select to_tsvector('simple', 'x y q') @@ '(!x | y <-> z) <-> q' AS "true";
+select to_tsvector('simple', 'y y q') @@ '(x | y <-> !z) <-> q' AS "true";
+select to_tsvector('simple', 'x q') @@ '(x | y <-> !z) <-> q' AS "true";
+select to_tsvector('simple', 'x q') @@ '(!x | y <-> z) <-> q' AS "false";
+select to_tsvector('simple', 'z q') @@ '(!x | y <-> z) <-> q' AS "true";
+select to_tsvector('simple', 'x y q y') @@ '!x <-> y' AS "true";
--ranking
SELECT ts_rank(' a:1 s:2C d g'::tsvector, 'a | s');
SELECT 'a:1 b:3'::tsvector @@ 'a <1> b'::tsquery AS "false";
SELECT 'a:1 b:3'::tsvector @@ 'a <2> b'::tsquery AS "true";
SELECT 'a:1 b:3'::tsvector @@ 'a <3> b'::tsquery AS "false";
+SELECT 'a:1 b:3'::tsvector @@ 'a <0> a:*'::tsquery AS "true";
-- tsvector editing operations