]> granicus.if.org Git - postgresql/commitdiff
Fix potential memory clobber in tsvector_concat().
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 26 Aug 2011 20:51:34 +0000 (16:51 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 26 Aug 2011 20:51:34 +0000 (16:51 -0400)
tsvector_concat() allocated its result workspace using the "conservative"
estimate of the sum of the two input tsvectors' sizes.  Unfortunately that
wasn't so conservative as all that, because it supposed that the number of
pad bytes required could not grow.  Which it can, as per test case from
Jesper Krogh, if there's a mix of lexemes with positions and lexemes
without them in the input data.  The fix is to assume that we might add
a not-previously-present pad byte for each and every lexeme in the two
inputs; which really is conservative, but it doesn't seem worthwhile to
try to be more precise.

This is an aboriginal bug in tsvector_concat, so back-patch to all
versions containing it.

src/backend/utils/adt/tsvector_op.c

index 181bfee9e308653bd423a07ea7df6de6c70d93db..be7978561f7f592027a8a7b39a8bcdc75c1e30ab 100644 (file)
@@ -341,11 +341,14 @@ tsvector_concat(PG_FUNCTION_ARGS)
                                j,
                                i1,
                                i2,
-                               dataoff;
+                               dataoff,
+                               output_bytes,
+                               output_size;
        char       *data,
                           *data1,
                           *data2;
 
+       /* Get max position in in1; we'll need this to offset in2's positions */
        ptr = ARRPTR(in1);
        i = in1->size;
        while (i--)
@@ -369,10 +372,23 @@ tsvector_concat(PG_FUNCTION_ARGS)
        data2 = STRPTR(in2);
        i1 = in1->size;
        i2 = in2->size;
-       /* conservative estimate of space needed */
-       out = (TSVector) palloc0(VARSIZE(in1) + VARSIZE(in2));
-       SET_VARSIZE(out, VARSIZE(in1) + VARSIZE(in2));
+
+       /*
+        * Conservative estimate of space needed.  We might need all the data
+        * in both inputs, and conceivably add a pad byte before position data
+        * for each item where there was none before.
+        */
+       output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
+
+       out = (TSVector) palloc0(output_bytes);
+       SET_VARSIZE(out, output_bytes);
+
+       /*
+        * We must make out->size valid so that STRPTR(out) is sensible.  We'll
+        * collapse out any unused space at the end.
+        */
        out->size = in1->size + in2->size;
+
        ptr = ARRPTR(out);
        data = STRPTR(out);
        dataoff = 0;
@@ -514,10 +530,18 @@ tsvector_concat(PG_FUNCTION_ARGS)
                                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
                                 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
 
-       out->size = ptr - ARRPTR(out);
-       SET_VARSIZE(out, CALCDATASIZE(out->size, dataoff));
+       /*
+        * Adjust sizes (asserting that we didn't overrun the original estimates)
+        * and collapse out any unused array entries.
+        */
+       output_size = ptr - ARRPTR(out);
+       Assert(output_size <= out->size);
+       out->size = output_size;
        if (data != STRPTR(out))
                memmove(STRPTR(out), data, dataoff);
+       output_bytes = CALCDATASIZE(out->size, dataoff);
+       Assert(output_bytes <= VARSIZE(out));
+       SET_VARSIZE(out, output_bytes);
 
        PG_FREE_IF_COPY(in1, 0);
        PG_FREE_IF_COPY(in2, 1);