]> granicus.if.org Git - postgresql/commitdiff
Improve make_tsvector() to handle empty input, and simplify its callers.
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 18 Jul 2017 17:13:47 +0000 (13:13 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 18 Jul 2017 17:13:47 +0000 (13:13 -0400)
It seemed a bit silly that each caller of make_tsvector() was laboriously
special-casing the situation where no lexemes were found, when it would
be easy and much more bullet-proof to make make_tsvector() handle that.

src/backend/tsearch/to_tsany.c
src/backend/utils/adt/tsvector_op.c

index b410a49908add20e0ccf2a15dc14f9f1ab2a8336..35d9ab276cfd7a76a638ff6f50fbb9d4c2cf3e55 100644 (file)
@@ -149,6 +149,8 @@ uniqueWORD(ParsedWord *a, int32 l)
 
 /*
  * make value of tsvector, given parsed text
+ *
+ * Note: frees prs->words and subsidiary data.
  */
 TSVector
 make_tsvector(ParsedText *prs)
@@ -162,7 +164,11 @@ make_tsvector(ParsedText *prs)
        char       *str;
        int                     stroff;
 
-       prs->curwords = uniqueWORD(prs->words, prs->curwords);
+       /* Merge duplicate words */
+       if (prs->curwords > 0)
+               prs->curwords = uniqueWORD(prs->words, prs->curwords);
+
+       /* Determine space needed */
        for (i = 0; i < prs->curwords; i++)
        {
                lenstr += prs->words[i].len;
@@ -217,7 +223,10 @@ make_tsvector(ParsedText *prs)
                        ptr->haspos = 0;
                ptr++;
        }
-       pfree(prs->words);
+
+       if (prs->words)
+               pfree(prs->words);
+
        return in;
 }
 
@@ -231,26 +240,19 @@ to_tsvector_byid(PG_FUNCTION_ARGS)
 
        prs.lenwords = VARSIZE_ANY_EXHDR(in) / 6;       /* just estimation of word's
                                                                                                 * number */
-       if (prs.lenwords == 0)
+       if (prs.lenwords < 2)
                prs.lenwords = 2;
        prs.curwords = 0;
        prs.pos = 0;
        prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
 
        parsetext(cfgId, &prs, VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in));
+
        PG_FREE_IF_COPY(in, 1);
 
-       if (prs.curwords)
-               out = make_tsvector(&prs);
-       else
-       {
-               pfree(prs.words);
-               out = palloc(CALCDATASIZE(0, 0));
-               SET_VARSIZE(out, CALCDATASIZE(0, 0));
-               out->size = 0;
-       }
+       out = make_tsvector(&prs);
 
-       PG_RETURN_POINTER(out);
+       PG_RETURN_TSVECTOR(out);
 }
 
 Datum
@@ -281,21 +283,10 @@ jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
 
        iterate_jsonb_string_values(jb, &state, add_to_tsvector);
 
-       if (prs.curwords > 0)
-               result = make_tsvector(&prs);
-       else
-       {
-               /*
-                * There weren't any string elements in jsonb, so we need to return an
-                * empty vector
-                */
-               result = palloc(CALCDATASIZE(0, 0));
-               SET_VARSIZE(result, CALCDATASIZE(0, 0));
-               result->size = 0;
-       }
-
        PG_FREE_IF_COPY(jb, 1);
 
+       result = make_tsvector(&prs);
+
        PG_RETURN_TSVECTOR(result);
 }
 
@@ -327,21 +318,10 @@ json_to_tsvector_byid(PG_FUNCTION_ARGS)
 
        iterate_json_string_values(json, &state, add_to_tsvector);
 
-       if (prs.curwords > 0)
-               result = make_tsvector(&prs);
-       else
-       {
-               /*
-                * There weren't any string elements in json, so we need to return an
-                * empty vector
-                */
-               result = palloc(CALCDATASIZE(0, 0));
-               SET_VARSIZE(result, CALCDATASIZE(0, 0));
-               result->size = 0;
-       }
-
        PG_FREE_IF_COPY(json, 1);
 
+       result = make_tsvector(&prs);
+
        PG_RETURN_TSVECTOR(result);
 }
 
index 2d7407c29cbde9671fc43ecbcbd462149ce9ee7e..822520299ed525633f474858ac816fe0e464ca4e 100644 (file)
@@ -2579,28 +2579,15 @@ tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
        }
 
        /* make tsvector value */
-       if (prs.curwords)
-       {
-               datum = PointerGetDatum(make_tsvector(&prs));
-               isnull = false;
-               rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
-                                                                                        1, &tsvector_attr_num,
-                                                                                        &datum, &isnull);
-               pfree(DatumGetPointer(datum));
-       }
-       else
-       {
-               TSVector        out = palloc(CALCDATASIZE(0, 0));
-
-               SET_VARSIZE(out, CALCDATASIZE(0, 0));
-               out->size = 0;
-               datum = PointerGetDatum(out);
-               isnull = false;
-               rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
-                                                                                        1, &tsvector_attr_num,
-                                                                                        &datum, &isnull);
-               pfree(prs.words);
-       }
+       datum = TSVectorGetDatum(make_tsvector(&prs));
+       isnull = false;
+
+       /* and insert it into tuple */
+       rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
+                                                                                1, &tsvector_attr_num,
+                                                                                &datum, &isnull);
+
+       pfree(DatumGetPointer(datum));
 
        return PointerGetDatum(rettuple);
 }