From 04a2c7f412d01da8100de79b13df4fd39e15ce25 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 18 Jul 2017 13:13:47 -0400 Subject: [PATCH] Improve make_tsvector() to handle empty input, and simplify its callers. It seemed a bit silly that each caller of make_tsvector() was laboriously special-casing the situation where no lexemes were found, when it would be easy and much more bullet-proof to make make_tsvector() handle that. --- src/backend/tsearch/to_tsany.c | 58 ++++++++++------------------- src/backend/utils/adt/tsvector_op.c | 31 +++++---------- 2 files changed, 28 insertions(+), 61 deletions(-) diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c index b410a49908..35d9ab276c 100644 --- a/src/backend/tsearch/to_tsany.c +++ b/src/backend/tsearch/to_tsany.c @@ -149,6 +149,8 @@ uniqueWORD(ParsedWord *a, int32 l) /* * make value of tsvector, given parsed text + * + * Note: frees prs->words and subsidiary data. */ TSVector make_tsvector(ParsedText *prs) @@ -162,7 +164,11 @@ make_tsvector(ParsedText *prs) char *str; int stroff; - prs->curwords = uniqueWORD(prs->words, prs->curwords); + /* Merge duplicate words */ + if (prs->curwords > 0) + prs->curwords = uniqueWORD(prs->words, prs->curwords); + + /* Determine space needed */ for (i = 0; i < prs->curwords; i++) { lenstr += prs->words[i].len; @@ -217,7 +223,10 @@ make_tsvector(ParsedText *prs) ptr->haspos = 0; ptr++; } - pfree(prs->words); + + if (prs->words) + pfree(prs->words); + return in; } @@ -231,26 +240,19 @@ to_tsvector_byid(PG_FUNCTION_ARGS) prs.lenwords = VARSIZE_ANY_EXHDR(in) / 6; /* just estimation of word's * number */ - if (prs.lenwords == 0) + if (prs.lenwords < 2) prs.lenwords = 2; prs.curwords = 0; prs.pos = 0; prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords); parsetext(cfgId, &prs, VARDATA_ANY(in), VARSIZE_ANY_EXHDR(in)); + PG_FREE_IF_COPY(in, 1); - if (prs.curwords) - out = make_tsvector(&prs); - else - { - pfree(prs.words); - out = palloc(CALCDATASIZE(0, 0)); - SET_VARSIZE(out, CALCDATASIZE(0, 0)); - out->size = 0; - } + out = make_tsvector(&prs); - PG_RETURN_POINTER(out); + PG_RETURN_TSVECTOR(out); } Datum @@ -281,21 +283,10 @@ jsonb_to_tsvector_byid(PG_FUNCTION_ARGS) iterate_jsonb_string_values(jb, &state, add_to_tsvector); - if (prs.curwords > 0) - result = make_tsvector(&prs); - else - { - /* - * There weren't any string elements in jsonb, so we need to return an - * empty vector - */ - result = palloc(CALCDATASIZE(0, 0)); - SET_VARSIZE(result, CALCDATASIZE(0, 0)); - result->size = 0; - } - PG_FREE_IF_COPY(jb, 1); + result = make_tsvector(&prs); + PG_RETURN_TSVECTOR(result); } @@ -327,21 +318,10 @@ json_to_tsvector_byid(PG_FUNCTION_ARGS) iterate_json_string_values(json, &state, add_to_tsvector); - if (prs.curwords > 0) - result = make_tsvector(&prs); - else - { - /* - * There weren't any string elements in json, so we need to return an - * empty vector - */ - result = palloc(CALCDATASIZE(0, 0)); - SET_VARSIZE(result, CALCDATASIZE(0, 0)); - result->size = 0; - } - PG_FREE_IF_COPY(json, 1); + result = make_tsvector(&prs); + PG_RETURN_TSVECTOR(result); } diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index 2d7407c29c..822520299e 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -2579,28 +2579,15 @@ tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column) } /* make tsvector value */ - if (prs.curwords) - { - datum = PointerGetDatum(make_tsvector(&prs)); - isnull = false; - rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att, - 1, &tsvector_attr_num, - &datum, &isnull); - pfree(DatumGetPointer(datum)); - } - else - { - TSVector out = palloc(CALCDATASIZE(0, 0)); - - SET_VARSIZE(out, CALCDATASIZE(0, 0)); - out->size = 0; - datum = PointerGetDatum(out); - isnull = false; - rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att, - 1, &tsvector_attr_num, - &datum, &isnull); - pfree(prs.words); - } + datum = TSVectorGetDatum(make_tsvector(&prs)); + isnull = false; + + /* and insert it into tuple */ + rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att, + 1, &tsvector_attr_num, + &datum, &isnull); + + pfree(DatumGetPointer(datum)); return PointerGetDatum(rettuple); } -- 2.40.0