]> granicus.if.org Git - postgresql/commitdiff
Tsvector editing functions
authorTeodor Sigaev <teodor@sigaev.ru>
Fri, 11 Mar 2016 16:22:36 +0000 (19:22 +0300)
committerTeodor Sigaev <teodor@sigaev.ru>
Fri, 11 Mar 2016 16:22:36 +0000 (19:22 +0300)
Adds several tsvector editting function: convert tsvector to/from text array,
set weight for given lexemes, delete lexeme(s), unnest, filter lexemes
with given weights

Author: Stas Kelvich with some editorization by me
Reviewers: Tomas Vondram, Teodor Sigaev

doc/src/sgml/func.sgml
doc/src/sgml/textsearch.sgml
src/backend/utils/adt/tsvector_op.c
src/include/catalog/pg_proc.h
src/include/tsearch/ts_type.h
src/test/regress/expected/tstypes.out
src/test/regress/sql/tstypes.sql

index 4b5ee8135f021fe5935f1a85f6290dca21d462c7..000489d961b53703750b047bff6b962938bcdad4 100644 (file)
@@ -9211,13 +9211,26 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
          <indexterm>
           <primary>setweight</primary>
          </indexterm>
-         <literal><function>setweight(<type>tsvector</>, <type>"char"</>)</function></literal>
+         <literal><function>setweight(<replaceable class="PARAMETER">vector</replaceable> <type>tsvector</>, <replaceable class="PARAMETER">weight</replaceable> <type>"char"</>)</function></literal>
         </entry>
         <entry><type>tsvector</type></entry>
-        <entry>assign weight to each element of <type>tsvector</></entry>
+        <entry>assign <replaceable class="PARAMETER">weight</replaceable> to each element of <replaceable class="PARAMETER">vector</replaceable></entry>
         <entry><literal>setweight('fat:2,4 cat:3 rat:5B'::tsvector, 'A')</literal></entry>
         <entry><literal>'cat':3A 'fat':2A,4A 'rat':5A</literal></entry>
        </row>
+       <row>
+        <entry>
+         <indexterm>
+          <primary>setweight</primary>
+          <secondary>setweight by filter</secondary>
+         </indexterm>
+         <literal><function>setweight(<replaceable class="PARAMETER">vector</replaceable> <type>tsvector</>, <replaceable class="PARAMETER">weight</replaceable> <type>"char"</>, <replaceable class="PARAMETER">lexemes</replaceable> <type>"text"[]</>)</function></literal>
+        </entry>
+        <entry><type>tsvector</type></entry>
+        <entry>assign <replaceable class="PARAMETER">weight</replaceable> to elements of <replaceable class="PARAMETER">vector</replaceable> that are listed in <replaceable class="PARAMETER">lexemes</replaceable> array</entry>
+        <entry><literal>setweight('fat:2,4 cat:3 rat:5B'::tsvector, 'A', '{cat,rat}')</literal></entry>
+        <entry><literal>'cat':3A 'fat':2,4 'rat':5A</literal></entry>
+       </row>
        <row>
         <entry>
          <indexterm>
@@ -9230,6 +9243,80 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
         <entry><literal>strip('fat:2,4 cat:3 rat:5A'::tsvector)</literal></entry>
         <entry><literal>'cat' 'fat' 'rat'</literal></entry>
        </row>
+       <row>
+        <entry>
+         <indexterm>
+          <primary>delete</primary>
+          <secondary>delete lemexeme</secondary>
+         </indexterm>
+         <literal><function>delete(<replaceable class="PARAMETER">vector</replaceable> <type>tsvector</>, <replaceable class="PARAMETER">lexeme</replaceable> <type>text</>)</function></literal>
+        </entry>
+        <entry><type>tsvector</type></entry>
+        <entry>remove given <replaceable class="PARAMETER">lexeme</replaceable> from <replaceable class="PARAMETER">vector</replaceable></entry>
+        <entry><literal>delete('fat:2,4 cat:3 rat:5A'::tsvector, 'fat')</literal></entry>
+        <entry><literal>'cat':3 'rat':5A</literal></entry>
+       </row>
+       <row>
+        <entry>
+         <indexterm>
+          <primary>delete</primary>
+          <secondary>delete lemexemes array</secondary>
+         </indexterm>
+         <literal><function>delete(<replaceable class="PARAMETER">vector</replaceable> <type>tsvector</>, <replaceable class="PARAMETER">lexemes</replaceable> <type>text[]</>)</function></literal>
+        </entry>
+        <entry><type>tsvector</type></entry>
+        <entry>remove any occurrence of lexemes in <replaceable class="PARAMETER">lexemes</replaceable> array from <replaceable class="PARAMETER">vector</replaceable></entry>
+        <entry><literal>delete('fat:2,4 cat:3 rat:5A'::tsvector, ARRAY['fat','rat'])</literal></entry>
+        <entry><literal>'cat':3</literal></entry>
+       </row>
+       <row>
+        <entry>
+         <indexterm>
+          <primary>unnest</primary>
+         </indexterm>
+         <literal><function>unnest(<type>tsvector</>, OUT <replaceable class="PARAMETER">lexeme</> <type>text</>, OUT <replaceable class="PARAMETER">positions</> <type>smallint[]</>, OUT <replaceable class="PARAMETER">weights</> <type>text</>)</function></literal>
+        </entry>
+        <entry><type>setof record</type></entry>
+        <entry>expand a tsvector to a set of rows</entry>
+        <entry><literal>unnest('fat:2,4 cat:3 rat:5A'::tsvector)</literal></entry>
+        <entry><literal>(cat,{3},{D}) ...</literal></entry>
+       </row>
+       <row>
+        <entry>
+         <indexterm>
+          <primary>tsvector_to_array</primary>
+         </indexterm>
+         <literal><function>tsvector_to_array(<type>tsvector</>)</function></literal>
+        </entry>
+        <entry><type>text[]</type></entry>
+        <entry>convert <type>tsvector</> to array of lexemes</entry>
+        <entry><literal>tsvector_to_array('fat:2,4 cat:3 rat:5A'::tsvector)</literal></entry>
+        <entry><literal>{cat,fat,rat}</literal></entry>
+       </row>
+       <row>
+        <entry>
+         <indexterm>
+          <primary>array_to_tsvector</primary>
+         </indexterm>
+         <literal><function>array_to_tsvector(<type>text[]</>)</function></literal>
+        </entry>
+        <entry><type>tsvector</type></entry>
+        <entry>convert array of lexemes to <type>tsvector</type></entry>
+        <entry><literal>array_to_tsvector('{fat,cat,rat}'::text[])</literal></entry>
+        <entry><literal>'fat' 'cat' 'rat'</literal></entry>
+       </row>
+       <row>
+        <entry>
+         <indexterm>
+          <primary>filter</primary>
+         </indexterm>
+         <literal><function>filter(<replaceable class="PARAMETER">vector</replaceable> <type>tsvector</>, <replaceable class="PARAMETER">weights</replaceable> <type>"char"[]</>)</function></literal>
+        </entry>
+        <entry><type>tsvector</type></entry>
+        <entry>Select only elements with given <replaceable class="PARAMETER">weights</replaceable> from <replaceable class="PARAMETER">vector</replaceable></entry>
+        <entry><literal>filter('fat:2,4 cat:3b rat:5A'::tsvector, '{a,b}')</literal></entry>
+        <entry><literal>'cat':3B 'rat':5A</literal></entry>
+       </row>
        <row>
         <entry>
          <indexterm>
index ff9997606850bd517c55f73687b9d07c25e9f7e7..ea3abc9e15a3e6401a83ba18ad900aa27e687bcf 100644 (file)
@@ -1326,6 +1326,10 @@ FROM (SELECT id, body, q, ts_rank_cd(ti, q) AS rank
 
    </variablelist>
 
+   <para>
+    Full list of <type>tsvector</>-related functions available in <xref linkend="textsearch-functions-table">.
+   </para>
+
   </sect2>
 
   <sect2 id="textsearch-manipulate-tsquery">
index a3f1c36187d7d84a2b951ba50738f033268b6268..6a01276ca269a17801a3ad80080a1bd1786d1ca6 100644 (file)
@@ -14,6 +14,7 @@
 
 #include "postgres.h"
 
+#include "access/htup_details.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_type.h"
 #include "commands/trigger.h"
@@ -65,6 +66,7 @@ typedef struct
 #define STATHDRSIZE (offsetof(TSVectorStat, data))
 
 static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
+static int tsvector_bsearch(TSVector tsin, char *lexin, int lexin_len);
 
 /*
  * Order: haspos, len, word, for all positions (pos, weight)
@@ -251,6 +253,90 @@ tsvector_setweight(PG_FUNCTION_ARGS)
        PG_RETURN_POINTER(out);
 }
 
+/*
+ * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
+ *
+ * Assign weight w to elements of tsin that are listed in lexemes.
+ */
+Datum
+tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
+{
+       TSVector        tsin = PG_GETARG_TSVECTOR(0);
+       char            char_weight = PG_GETARG_CHAR(1);
+       ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(2);
+
+       TSVector        tsout;
+       int                     i,
+                               j,
+                               nlexemes,
+                               weight;
+       WordEntry  *entry;
+       Datum      *dlexemes;
+       bool       *nulls;
+
+       switch (char_weight)
+       {
+               case 'A': case 'a':
+                       weight = 3;
+                       break;
+               case 'B': case 'b':
+                       weight = 2;
+                       break;
+               case 'C': case 'c':
+                       weight = 1;
+                       break;
+               case 'D': case 'd':
+                       weight = 0;
+                       break;
+               default:
+                       /* internal error */
+                       elog(ERROR, "unrecognized weight: %c", char_weight);
+       }
+
+       tsout = (TSVector) palloc(VARSIZE(tsin));
+       memcpy(tsout, tsin, VARSIZE(tsin));
+       entry = ARRPTR(tsout);
+
+       deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
+                                         &dlexemes, &nulls, &nlexemes);
+
+       /*
+        * Assuming that lexemes array is significantly shorter than tsvector
+        * we can iterate through lexemes performing binary search
+        * of each lexeme from lexemes in tsvector.
+        */
+       for (i = 0; i < nlexemes; i++)
+       {
+               char   *lex;
+               int             lex_len,
+                               lex_pos;
+
+               if (nulls[i])
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("lexeme array may not contain nulls")));
+
+               lex = VARDATA(dlexemes[i]);
+               lex_len = VARSIZE_ANY_EXHDR(dlexemes[i]);
+               lex_pos = tsvector_bsearch(tsout, lex, lex_len);
+
+               if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
+               {
+                       WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
+                       while (j--)
+                       {
+                               WEP_SETWEIGHT(*p, weight);
+                               p++;
+                       }
+               }
+       }
+
+       PG_FREE_IF_COPY(tsin, 0);
+       PG_FREE_IF_COPY(lexemes, 2);
+
+       PG_RETURN_POINTER(tsout);
+}
+
 #define compareEntry(pa, a, pb, b) \
        tsCompareString((pa) + (a)->pos, (a)->len,      \
                                        (pb) + (b)->pos, (b)->len,      \
@@ -291,6 +377,483 @@ add_pos(TSVector src, WordEntry *srcptr,
        return *clen - startlen;
 }
 
+/*
+ * Perform binary search of given lexeme in TSVector.
+ * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
+ * found.
+ */
+static int
+tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
+{
+       WordEntry  *arrin = ARRPTR(tsv);
+       int                     StopLow = 0,
+                               StopHigh = tsv->size,
+                               StopMiddle,
+                               cmp;
+
+       while (StopLow < StopHigh)
+       {
+               StopMiddle = (StopLow + StopHigh)/2;
+
+               cmp = tsCompareString(lexeme, lexeme_len,
+                       STRPTR(tsv) + arrin[StopMiddle].pos,
+                       arrin[StopMiddle].len,
+                       false);
+
+               if (cmp < 0)
+                       StopHigh = StopMiddle;
+               else if (cmp > 0)
+                       StopLow = StopMiddle + 1;
+               else /* found it */
+                       return StopMiddle;
+       }
+
+       return -1;
+}
+
+static int
+compareint(const void *va, const void *vb)
+{
+       int32           a = *((const int32 *) va);
+       int32           b = *((const int32 *) vb);
+
+       if (a == b)
+               return 0;
+       return (a > b) ? 1 : -1;
+}
+
+/*
+ * Internal routine to delete lexemes from TSVector by array of offsets.
+ *
+ * int *indices_to_delete -- array of lexeme offsets to delete
+ * int indices_count -- size of that array
+ *
+ * Returns new TSVector without given lexemes along with their positions
+ * and weights.
+ */
+static TSVector
+tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
+                                                  int indices_count)
+{
+       TSVector        tsout;
+       WordEntry  *arrin = ARRPTR(tsv),
+                          *arrout;
+       char       *data = STRPTR(tsv),
+                          *dataout;
+       int                     i, j, k,
+                               curoff;
+
+       /*
+        * Here we overestimates tsout size, since we don't know exact size
+        * occupied by positions and weights. We will set exact size later
+        * after a pass through TSVector.
+        */
+       tsout = (TSVector) palloc0(VARSIZE(tsv));
+       arrout = ARRPTR(tsout);
+       tsout->size = tsv->size - indices_count;
+
+       /* Sort our filter array to simplify membership check later. */
+       if (indices_count > 1)
+               qsort(indices_to_delete, indices_count, sizeof(int), compareint);
+
+       /*
+        * Copy tsv to tsout skipping lexemes that enlisted in indices_to_delete.
+        */
+       curoff = 0;
+       dataout = STRPTR(tsout);
+       for (i = j = k = 0; i < tsv->size; i++)
+       {
+               /*
+                * Here we should check whether current i is present in
+                * indices_to_delete or not. Since indices_to_delete is already
+                * sorted we can advance it index only when we have match.
+                */
+               if (k < indices_count && i == indices_to_delete[k]){
+                       k++;
+                       continue;
+               }
+
+               /* Copy lexeme, it's positions and weights */
+               memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
+               arrout[j].haspos = arrin[i].haspos;
+               arrout[j].len = arrin[i].len;
+               arrout[j].pos = curoff;
+               curoff += arrin[i].len;
+               if (arrin[i].haspos)
+               {
+                       int len = POSDATALEN(tsv, arrin+i) * sizeof(WordEntryPos) +
+                                         sizeof(uint16);
+                       curoff = SHORTALIGN(curoff);
+                       memcpy(dataout + curoff,
+                                  STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
+                                  len);
+                       curoff += len;
+               }
+
+               j++;
+       }
+
+       /*
+        * After the pass through TSVector k should equals exactly to indices_count.
+        * If it isn't then the caller provided us with indices outside of
+        * [0, tsv->size) range and estimation of tsout's size is wrong.
+        */
+       Assert(k == indices_count);
+
+       SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
+       return tsout;
+}
+
+/*
+ * Delete given lexeme from tsvector.
+ * Implementation of user-level delete(tsvector, text).
+ */
+Datum
+tsvector_delete_str(PG_FUNCTION_ARGS)
+{
+       TSVector        tsin = PG_GETARG_TSVECTOR(0),
+                               tsout;
+       text       *tlexeme = PG_GETARG_TEXT_P(1);
+       char       *lexeme = VARDATA(tlexeme);
+       int                     lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
+                               skip_index;
+
+       if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
+               PG_RETURN_POINTER(tsin);
+
+       tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
+
+       PG_FREE_IF_COPY(tsin, 0);
+       PG_FREE_IF_COPY(tlexeme, 1);
+       PG_RETURN_POINTER(tsout);
+}
+
+/*
+ * Delete given array of lexemes from tsvector.
+ * Implementation of user-level delete(tsvector, text[]).
+ */
+Datum
+tsvector_delete_arr(PG_FUNCTION_ARGS)
+{
+       TSVector        tsin = PG_GETARG_TSVECTOR(0),
+                               tsout;
+       ArrayType  *lexemes = PG_GETARG_ARRAYTYPE_P(1);
+       int                     i, nlex,
+                               skip_count,
+                          *skip_indices;
+       Datum      *dlexemes;
+       bool       *nulls;
+
+       deconstruct_array(lexemes, TEXTOID, -1, false, 'i',
+                                         &dlexemes, &nulls, &nlex);
+
+       /*
+        * In typical use case array of lexemes to delete is relatively small.
+        * So here we optimizing things for that scenario: iterate through lexarr
+        * performing binary search of each lexeme from lexarr in tsvector.
+        */
+       skip_indices = palloc0(nlex * sizeof(int));
+       for (i = skip_count = 0; i < nlex; i++)
+       {
+               char *lex;
+               int lex_len,
+                       lex_pos;
+
+               if (nulls[i])
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("lexeme array may not contain nulls")));
+
+               lex = VARDATA(dlexemes[i]);
+               lex_len = VARSIZE_ANY_EXHDR(dlexemes[i]);
+               lex_pos = tsvector_bsearch(tsin, lex, lex_len);
+
+               if (lex_pos >= 0)
+                       skip_indices[skip_count++] = lex_pos;
+       }
+
+       tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
+
+       pfree(skip_indices);
+       PG_FREE_IF_COPY(tsin, 0);
+       PG_FREE_IF_COPY(lexemes, 1);
+
+       PG_RETURN_POINTER(tsout);
+}
+
+/*
+ * Expand tsvector as table with following columns:
+ *     lexeme: lexeme text
+ *     positions: integer array of lexeme positions
+ *     weights: char array of weights corresponding to positions
+ */
+Datum
+tsvector_unnest(PG_FUNCTION_ARGS)
+{
+       FuncCallContext    *funcctx;
+       TSVector                        tsin;
+
+       if (SRF_IS_FIRSTCALL())
+       {
+               MemoryContext oldcontext;
+               TupleDesc       tupdesc;
+
+               funcctx = SRF_FIRSTCALL_INIT();
+               oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+               tupdesc = CreateTemplateTupleDesc(3, false);
+               TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
+                                                  TEXTOID, -1, 0);
+               TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
+                                                  INT2ARRAYOID, -1, 0);
+               TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
+                                                  TEXTARRAYOID, -1, 0);
+               funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+
+               funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
+
+               MemoryContextSwitchTo(oldcontext);
+       }
+
+       funcctx = SRF_PERCALL_SETUP();
+       tsin = (TSVector) funcctx->user_fctx;
+
+       if (funcctx->call_cntr < tsin->size)
+       {
+               WordEntry  *arrin = ARRPTR(tsin);
+               char       *data = STRPTR(tsin);
+               HeapTuple       tuple;
+               int                     j,
+                                       i = funcctx->call_cntr;
+               bool            nulls[] = {false, false, false};
+               Datum           values[3];
+
+               values[0] = PointerGetDatum(
+                               cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len)
+               );
+
+               if (arrin[i].haspos)
+               {
+                       WordEntryPosVector *posv;
+                       Datum      *positions;
+                       Datum      *weights;
+                       char            weight;
+
+                       /*
+                        * Internally tsvector stores position and weight in the same
+                        * uint16 (2 bits for weight, 14 for position). Here we extract that
+                        * in two separate arrays.
+                        */
+                       posv = _POSVECPTR(tsin, arrin + i);
+                       positions = palloc(posv->npos * sizeof(Datum));
+                       weights   = palloc(posv->npos * sizeof(Datum));
+                       for (j = 0; j < posv->npos; j++)
+                       {
+                               positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
+                               weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
+                               weights[j] = PointerGetDatum(
+                                                                       cstring_to_text_with_len(&weight, 1)
+                                                       );
+                       }
+
+                       values[1] = PointerGetDatum(
+                               construct_array(positions, posv->npos, INT2OID, 2, true, 's'));
+                       values[2] = PointerGetDatum(
+                               construct_array(weights, posv->npos, TEXTOID, -1, false, 'i'));
+               }
+               else
+               {
+                       nulls[1] = nulls[2] = true;
+               }
+
+               tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+               SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+       }
+       else
+       {
+               pfree(tsin);
+               SRF_RETURN_DONE(funcctx);
+       }
+}
+
+/*
+ * Convert tsvector to array of lexemes.
+ */
+Datum
+tsvector_to_array(PG_FUNCTION_ARGS)
+{
+       TSVector                        tsin  = PG_GETARG_TSVECTOR(0);
+       WordEntry                  *arrin = ARRPTR(tsin);
+       Datum                           elements[tsin->size];
+       int                                     i;
+       ArrayType                  *array;
+
+       for (i = 0; i < tsin->size; i++)
+       {
+               elements[i] = PointerGetDatum(
+                       cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos, arrin[i].len)
+               );
+       }
+
+       array = construct_array(elements, tsin->size, TEXTOID, -1, false, 'i');
+       PG_FREE_IF_COPY(tsin, 0);
+       PG_RETURN_POINTER(array);
+}
+
+/*
+ * Build tsvector from array of lexemes.
+ */
+Datum
+array_to_tsvector(PG_FUNCTION_ARGS)
+{
+       ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
+       TSVector        tsout;
+       Datum      *dlexemes;
+       WordEntry  *arrout;
+       bool       *nulls;
+       int                     nitems,
+                               i,
+                               tslen,
+                               datalen = 0;
+       char       *cur;
+
+       deconstruct_array(v, TEXTOID, -1, false, 'i', &dlexemes, &nulls, &nitems);
+
+       for (i = 0; i < nitems; i++)
+       {
+               if (nulls[i])
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("lexeme array may not contain nulls")));
+
+               datalen += VARSIZE_ANY_EXHDR(dlexemes[i]);
+       }
+
+       tslen = CALCDATASIZE(nitems, datalen);
+       tsout = (TSVector) palloc0(tslen);
+       SET_VARSIZE(tsout, tslen);
+       tsout->size = nitems;
+       arrout = ARRPTR(tsout);
+       cur = STRPTR(tsout);
+
+       for (i = 0; i < nitems; i++)
+       {
+               char *lex = VARDATA(dlexemes[i]);
+               int lex_len = VARSIZE_ANY_EXHDR(dlexemes[i]);
+
+               memcpy(cur, lex, lex_len);
+               arrout[i].haspos = 0;
+               arrout[i].len = lex_len;
+               arrout[i].pos = cur - STRPTR(tsout);
+               cur += lex_len;
+       }
+
+       PG_FREE_IF_COPY(v, 0);
+       PG_RETURN_POINTER(tsout);
+}
+
+/*
+ * Leave only elements with given weights from tsvector.
+ */
+Datum
+tsvector_filter(PG_FUNCTION_ARGS)
+{
+       TSVector        tsin = PG_GETARG_TSVECTOR(0),
+                               tsout;
+       ArrayType  *weights = PG_GETARG_ARRAYTYPE_P(1);
+       WordEntry  *arrin = ARRPTR(tsin),
+                          *arrout;
+       char       *datain = STRPTR(tsin),
+                          *dataout;
+       Datum      *dweights;
+       bool       *nulls;
+       int                     nweigths;
+       int                     i, j;
+       char            mask = 0,
+                               cur_pos = 0;
+
+       deconstruct_array(weights, CHAROID, 1, true, 'c',
+                                         &dweights, &nulls, &nweigths);
+
+       for (i = 0; i < nweigths; i++)
+       {
+               char char_weight;
+
+               if (nulls[i])
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("weight array may not contain nulls")));
+
+               char_weight = DatumGetChar(dweights[i]);
+               switch (char_weight)
+               {
+                       case 'A': case 'a':
+                               mask = mask | 8;
+                               break;
+                       case 'B': case 'b':
+                               mask = mask | 4;
+                               break;
+                       case 'C': case 'c':
+                               mask = mask | 2;
+                               break;
+                       case 'D': case 'd':
+                               mask = mask | 1;
+                               break;
+                       default:
+                               /* internal error */
+                               elog(ERROR, "unrecognized weight: %c", char_weight);
+               }
+       }
+
+       tsout = (TSVector) palloc0(VARSIZE(tsin));
+       tsout->size = tsin->size;
+       arrout = ARRPTR(tsout);
+       dataout = STRPTR(tsout);
+
+       for (i = j = 0; i < tsin->size; i++)
+       {
+               WordEntryPosVector *posvin,
+                                                  *posvout;
+               int npos = 0;
+               int k;
+
+               if (!arrin[i].haspos)
+                       continue;
+
+               posvin  = _POSVECPTR(tsin, arrin + i);
+               posvout = (WordEntryPosVector *)
+                                               (dataout + SHORTALIGN(cur_pos + arrin[i].len));
+
+               for (k = 0; k < posvin->npos; k++)
+               {
+                       if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
+                               posvout->pos[npos++] = posvin->pos[k];
+               }
+
+               if (!npos) /* no satisfactory positions found, so skip that lexeme */
+                       continue;
+
+               arrout[j].haspos = true;
+               arrout[j].len = arrin[i].len;
+               arrout[j].pos = cur_pos;
+
+               memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
+               posvout->npos = npos;
+               cur_pos += SHORTALIGN(arrin[i].len);
+               cur_pos += POSDATALEN(tsout, arrout+j) * sizeof(WordEntryPos) +
+                                  sizeof(uint16);
+               j++;
+       }
+
+       tsout->size = j;
+       if (dataout != STRPTR(tsout))
+               memmove(STRPTR(tsout), dataout, cur_pos);
+
+       SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
+
+       PG_FREE_IF_COPY(tsin, 0);
+       PG_RETURN_POINTER(tsout);
+}
 
 Datum
 tsvector_concat(PG_FUNCTION_ARGS)
index 451bad7b4e55d9abbf89bf985082f31366d5c089..5c71bce07a72c700ddaa11193b195ed794e37631 100644 (file)
@@ -4498,8 +4498,22 @@ DESCR("number of lexemes");
 DATA(insert OID = 3623 (  strip                                        PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 3614 "3614" _null_ _null_ _null_ _null_ _null_ tsvector_strip _null_ _null_ _null_ ));
 DESCR("strip position information");
 DATA(insert OID = 3624 (  setweight                            PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3614 "3614 18" _null_ _null_ _null_ _null_ _null_ tsvector_setweight _null_ _null_ _null_ ));
-DESCR("set weight of lexeme's entries");
-DATA(insert OID = 3625 (  tsvector_concat              PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 3614 "3614 3614" _null_ _null_ _null_ _null_ _null_ tsvector_concat _null_ _null_ _null_ ));
+DESCR("set given weight for whole tsvector");
+DATA(insert OID = 3320 (  setweight                            PGNSP PGUID 12 1 0  0 0 f f f f t f i s 3 0 3614 "3614 18 1009" _null_ _null_ _null_ _null_ _null_ tsvector_setweight_by_filter _null_ _null_ _null_ ));
+DESCR("set given weight for given lexemes");
+DATA(insert OID = 3625 (  tsvector_concat              PGNSP PGUID 12 1 0  0 0 f f f f t f i s 2 0 3614 "3614 3614" _null_ _null_ _null_ _null_ _null_ tsvector_concat _null_ _null_ _null_ ));
+DATA(insert OID = 3321 (  delete                               PGNSP PGUID 12 1 0  0 0 f f f f t f i s 2 0 3614 "3614 25" _null_ _null_ _null_ _null_ _null_ tsvector_delete_str _null_ _null_ _null_ ));
+DESCR("delete lexeme");
+DATA(insert OID = 3323 (  delete                               PGNSP PGUID 12 1 0  0 0 f f f f t f i s 2 0 3614 "3614 1009" _null_ _null_ _null_ _null_ _null_ tsvector_delete_arr _null_ _null_ _null_ ));
+DESCR("delete given lexemes");
+DATA(insert OID = 3322 (  unnest                               PGNSP PGUID 12 1 10 0 0 f f f f t t i s 1 0 2249 "3614" "{3614,25,1005,1009}" "{i,o,o,o}" "{tsvector,lexeme,positions,weights}"  _null_ _null_ tsvector_unnest _null_ _null_ _null_ ));
+DESCR("expand tsvector to set of rows");
+DATA(insert OID = 3326 (  tsvector_to_array            PGNSP PGUID 12 1 0  0 0 f f f f t f i s 1 0 1009 "3614" _null_ _null_ _null_ _null_ _null_ tsvector_to_array _null_ _null_ _null_ ));
+DESCR("convert to lexeme's array");
+DATA(insert OID = 3327 (  array_to_tsvector            PGNSP PGUID 12 1 0  0 0 f f f f t f i s 1 0 3614 "1009" _null_ _null_ _null_ _null_ _null_ array_to_tsvector _null_ _null_ _null_ ));
+DESCR("build tsvector from lexeme's array");
+DATA(insert OID = 3319 (  filter                               PGNSP PGUID 12 1 0  0 0 f f f f t f i s 2 0 3614 "3614 1002" _null_ _null_ _null_ _null_ _null_ tsvector_filter _null_ _null_ _null_ ));
+DESCR("returns tsvector that contain only postings with given weights");
 
 DATA(insert OID = 3634 (  ts_match_vq                  PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3614 3615" _null_ _null_ _null_ _null_ _null_ ts_match_vq _null_ _null_ _null_ ));
 DATA(insert OID = 3635 (  ts_match_qv                  PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "3615 3614" _null_ _null_ _null_ _null_ _null_ ts_match_qv _null_ _null_ _null_ ));
index dc6067a93253fe8b8e968575db90d223a6b5afaf..bc99524dc082575338534419e8eeb56a1fc193ab 100644 (file)
@@ -141,7 +141,14 @@ extern Datum tsvector_cmp(PG_FUNCTION_ARGS);
 extern Datum tsvector_length(PG_FUNCTION_ARGS);
 extern Datum tsvector_strip(PG_FUNCTION_ARGS);
 extern Datum tsvector_setweight(PG_FUNCTION_ARGS);
+extern Datum tsvector_setweight_by_filter(PG_FUNCTION_ARGS);
 extern Datum tsvector_concat(PG_FUNCTION_ARGS);
+extern Datum tsvector_delete_str(PG_FUNCTION_ARGS);
+extern Datum tsvector_delete_arr(PG_FUNCTION_ARGS);
+extern Datum tsvector_unnest(PG_FUNCTION_ARGS);
+extern Datum tsvector_to_array(PG_FUNCTION_ARGS);
+extern Datum array_to_tsvector(PG_FUNCTION_ARGS);
+extern Datum tsvector_filter(PG_FUNCTION_ARGS);
 extern Datum tsvector_update_trigger_byid(PG_FUNCTION_ARGS);
 extern Datum tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS);
 
index 6284fb6181340dd806089384c8d74e9392869132..a386a46361a8eee68ce6cc8599ecd60918c95223 100644 (file)
@@ -83,18 +83,6 @@ SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
  'a':3A,4B 'b':2A 'ba':1237
 (1 row)
 
-SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
-                        setweight                         
-----------------------------------------------------------
- 'a':1C,3C 'asd':1C 'w':5C,6C,12C,13C 'zxc':81C,222C,567C
-(1 row)
-
-SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
-     strip     
----------------
- 'a' 'asd' 'w'
-(1 row)
-
 --Base tsquery test
 SELECT '1'::tsquery;
  tsquery 
@@ -625,3 +613,212 @@ SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s');
         0.1
 (1 row)
 
+-- tsvector editing operations
+SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+     strip     
+---------------
+ 'a' 'asd' 'w'
+(1 row)
+
+SELECT strip('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
+                    strip                     
+----------------------------------------------
+ 'base' 'hidden' 'rebel' 'spaceship' 'strike'
+(1 row)
+
+SELECT strip('base hidden rebel spaceship strike'::tsvector);
+                    strip                     
+----------------------------------------------
+ 'base' 'hidden' 'rebel' 'spaceship' 'strike'
+(1 row)
+
+SELECT delete(to_tsvector('english', 'Rebel spaceships, striking from a hidden base'), 'spaceship');
+                  delete                  
+------------------------------------------
+ 'base':7 'hidden':6 'rebel':1 'strike':3
+(1 row)
+
+SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'base');
+                            delete                            
+--------------------------------------------------------------
+ 'hidden':6 'rebel':1 'spaceship':2,33A,34B,35C,36 'strike':3
+(1 row)
+
+SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'bas');
+                                delete                                 
+-----------------------------------------------------------------------
+ 'base':7 'hidden':6 'rebel':1 'spaceship':2,33A,34B,35C,36 'strike':3
+(1 row)
+
+SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'bases');
+                                delete                                 
+-----------------------------------------------------------------------
+ 'base':7 'hidden':6 'rebel':1 'spaceship':2,33A,34B,35C,36 'strike':3
+(1 row)
+
+SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'spaceship');
+                  delete                  
+------------------------------------------
+ 'base':7 'hidden':6 'rebel':1 'strike':3
+(1 row)
+
+SELECT delete('base hidden rebel spaceship strike'::tsvector, 'spaceship');
+              delete              
+----------------------------------
+ 'base' 'hidden' 'rebel' 'strike'
+(1 row)
+
+SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceship','rebel']);
+             delete             
+--------------------------------
+ 'base':7 'hidden':6 'strike':3
+(1 row)
+
+SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceships','rebel']);
+                           delete                            
+-------------------------------------------------------------
+ 'base':7 'hidden':6 'spaceship':2,33A,34B,35C,36 'strike':3
+(1 row)
+
+SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceshi','rebel']);
+                           delete                            
+-------------------------------------------------------------
+ 'base':7 'hidden':6 'spaceship':2,33A,34B,35C,36 'strike':3
+(1 row)
+
+SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceship','leya','rebel']);
+             delete             
+--------------------------------
+ 'base':7 'hidden':6 'strike':3
+(1 row)
+
+SELECT delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel']);
+          delete          
+--------------------------
+ 'base' 'hidden' 'strike'
+(1 row)
+
+SELECT delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', NULL]);
+ERROR:  lexeme array may not contain nulls
+SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
+                   unnest                    
+---------------------------------------------
+ (base,{7},{D})
+ (hidden,{6},{D})
+ (rebel,{1},{D})
+ (spaceship,"{2,33,34,35,36}","{D,A,B,C,D}")
+ (strike,{3},{D})
+(5 rows)
+
+SELECT unnest('base hidden rebel spaceship strike'::tsvector);
+    unnest     
+---------------
+ (base,,)
+ (hidden,,)
+ (rebel,,)
+ (spaceship,,)
+ (strike,,)
+(5 rows)
+
+SELECT * FROM unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
+  lexeme   |    positions    |   weights   
+-----------+-----------------+-------------
+ base      | {7}             | {D}
+ hidden    | {6}             | {D}
+ rebel     | {1}             | {D}
+ spaceship | {2,33,34,35,36} | {D,A,B,C,D}
+ strike    | {3}             | {D}
+(5 rows)
+
+SELECT * FROM unnest('base hidden rebel spaceship strike'::tsvector);
+  lexeme   | positions | weights 
+-----------+-----------+---------
+ base      |           | 
+ hidden    |           | 
+ rebel     |           | 
+ spaceship |           | 
+ strike    |           | 
+(5 rows)
+
+SELECT lexeme, positions[1] from unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
+  lexeme   | positions 
+-----------+-----------
+ base      |         7
+ hidden    |         6
+ rebel     |         1
+ spaceship |         2
+ strike    |         3
+(5 rows)
+
+SELECT tsvector_to_array('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
+          tsvector_to_array           
+--------------------------------------
+ {base,hidden,rebel,spaceship,strike}
+(1 row)
+
+SELECT tsvector_to_array('base hidden rebel spaceship strike'::tsvector);
+          tsvector_to_array           
+--------------------------------------
+ {base,hidden,rebel,spaceship,strike}
+(1 row)
+
+SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
+              array_to_tsvector               
+----------------------------------------------
+ 'base' 'hidden' 'rebel' 'spaceship' 'strike'
+(1 row)
+
+SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
+ERROR:  lexeme array may not contain nulls
+SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+                        setweight                         
+----------------------------------------------------------
+ 'a':1C,3C 'asd':1C 'w':5C,6C,12C,13C 'zxc':81C,222C,567C
+(1 row)
+
+SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c');
+                        setweight                         
+----------------------------------------------------------
+ 'a':1C,3C 'asd':1C 'w':5C,6C,12C,13C 'zxc':81C,222C,567C
+(1 row)
+
+SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a}');
+                      setweight                       
+------------------------------------------------------
+ 'a':1C,3C 'asd':1C 'w':5,6,12B,13A 'zxc':81,222A,567
+(1 row)
+
+SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a}');
+                      setweight                       
+------------------------------------------------------
+ 'a':1C,3C 'asd':1C 'w':5,6,12B,13A 'zxc':81,222A,567
+(1 row)
+
+SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a,zxc}');
+                       setweight                        
+--------------------------------------------------------
+ 'a':1C,3C 'asd':1C 'w':5,6,12B,13A 'zxc':81C,222C,567C
+(1 row)
+
+SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', '{a,zxc}');
+            setweight            
+---------------------------------
+ 'a' 'asd' 'w':5,6,12B,13A 'zxc'
+(1 row)
+
+SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', NULL]);
+ERROR:  lexeme array may not contain nulls
+SELECT filter('base:7A empir:17 evil:15 first:11 galact:16 hidden:6A rebel:1A spaceship:2A strike:3A victori:12 won:9'::tsvector, '{a}');
+                           filter                            
+-------------------------------------------------------------
+ 'base':7A 'hidden':6A 'rebel':1A 'spaceship':2A 'strike':3A
+(1 row)
+
+SELECT filter('base hidden rebel spaceship strike'::tsvector, '{a}');
+ filter 
+--------
+(1 row)
+
+SELECT filter('base hidden rebel spaceship strike'::tsvector, '{a,b,NULL}');
+ERROR:  weight array may not contain nulls
index fd7c7024f5e547018323b1766eb229afb8c541d9..db62c5460da34baa4d542ef6030e97031c915832 100644 (file)
@@ -14,8 +14,6 @@ SELECT $$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector;
 SELECT tsvectorin(tsvectorout($$'\\as' ab\c ab\\c AB\\\c ab\\\\c$$::tsvector));
 SELECT '''w'':4A,3B,2C,1D,5 a:8';
 SELECT 'a:3A b:2a'::tsvector || 'ba:1234 a:1B';
-SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
-SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
 
 --Base tsquery test
 SELECT '1'::tsquery;
@@ -115,3 +113,48 @@ SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a | s');
 SELECT ts_rank_cd(' a:1 s:2C d g'::tsvector, 'a & s');
 SELECT ts_rank_cd(' a:1 s:2B d g'::tsvector, 'a & s');
 SELECT ts_rank_cd(' a:1 s:2 d g'::tsvector, 'a & s');
+
+-- tsvector editing operations
+
+SELECT strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector);
+SELECT strip('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
+SELECT strip('base hidden rebel spaceship strike'::tsvector);
+
+SELECT delete(to_tsvector('english', 'Rebel spaceships, striking from a hidden base'), 'spaceship');
+SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'base');
+SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'bas');
+SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'bases');
+SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, 'spaceship');
+SELECT delete('base hidden rebel spaceship strike'::tsvector, 'spaceship');
+
+SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceship','rebel']);
+SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceships','rebel']);
+SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceshi','rebel']);
+SELECT delete('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector, ARRAY['spaceship','leya','rebel']);
+SELECT delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel']);
+SELECT delete('base hidden rebel spaceship strike'::tsvector, ARRAY['spaceship','leya','rebel', NULL]);
+
+SELECT unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
+SELECT unnest('base hidden rebel spaceship strike'::tsvector);
+SELECT * FROM unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
+SELECT * FROM unnest('base hidden rebel spaceship strike'::tsvector);
+SELECT lexeme, positions[1] from unnest('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
+
+SELECT tsvector_to_array('base:7 hidden:6 rebel:1 spaceship:2,33A,34B,35C,36D strike:3'::tsvector);
+SELECT tsvector_to_array('base hidden rebel spaceship strike'::tsvector);
+
+SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship','strike']);
+SELECT array_to_tsvector(ARRAY['base','hidden','rebel','spaceship', NULL]);
+
+SELECT setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c');
+SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c');
+SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a}');
+SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a}');
+SELECT setweight('a:1,3A asd:1C w:5,6,12B,13A zxc:81,222A,567'::tsvector, 'c', '{a,zxc}');
+SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', '{a,zxc}');
+SELECT setweight('a asd w:5,6,12B,13A zxc'::tsvector, 'c', ARRAY['a', 'zxc', NULL]);
+
+SELECT filter('base:7A empir:17 evil:15 first:11 galact:16 hidden:6A rebel:1A spaceship:2A strike:3A victori:12 won:9'::tsvector, '{a}');
+SELECT filter('base hidden rebel spaceship strike'::tsvector, '{a}');
+SELECT filter('base hidden rebel spaceship strike'::tsvector, '{a,b,NULL}');
+