]> granicus.if.org Git - postgresql/commitdiff
Fix inadequately-sized output buffer in contrib/unaccent.
authorTom Lane <tgl@sss.pgh.pa.us>
Tue, 1 Jul 2014 15:22:53 +0000 (11:22 -0400)
committerTom Lane <tgl@sss.pgh.pa.us>
Tue, 1 Jul 2014 15:22:53 +0000 (11:22 -0400)
The output buffer size in unaccent_lexize() was calculated as input string
length times pg_database_encoding_max_length(), which effectively assumes
that replacement strings aren't more than one character.  While that was
all that we previously documented it to support, the code actually has
always allowed replacement strings of arbitrary length; so if you tried
to make use of longer strings, you were at risk of buffer overrun.  To fix,
use an expansible StringInfo buffer instead of trying to determine the
maximum space needed a-priori.

This would be a security issue if unaccent rules files could be installed
by unprivileged users; but fortunately they can't, so in the back branches
the problem can be labeled as improper configuration by a superuser.
Nonetheless, a memory stomp isn't a nice way of reacting to improper
configuration, so let's back-patch the fix.

contrib/unaccent/unaccent.c

index 262d5ec15f8eab63f0e3c707e2f88a593568b92a..eabf01baf8293d4b6f584cc1d16770d7a254e08f 100644 (file)
@@ -15,6 +15,7 @@
 
 #include "catalog/namespace.h"
 #include "commands/defrem.h"
+#include "lib/stringinfo.h"
 #include "tsearch/ts_cache.h"
 #include "tsearch/ts_locale.h"
 #include "tsearch/ts_public.h"
@@ -265,46 +266,48 @@ unaccent_lexize(PG_FUNCTION_ARGS)
        SuffixChar *rootSuffixTree = (SuffixChar *) PG_GETARG_POINTER(0);
        char       *srcchar = (char *) PG_GETARG_POINTER(1);
        int32           len = PG_GETARG_INT32(2);
-       char       *srcstart,
-                          *trgchar = NULL;
-       int                     charlen;
-       TSLexeme   *res = NULL;
-       SuffixChar *node;
+       char       *srcstart = srcchar;
+       TSLexeme   *res;
+       StringInfoData buf;
+
+       /* we allocate storage for the buffer only if needed */
+       buf.data = NULL;
 
-       srcstart = srcchar;
        while (srcchar - srcstart < len)
        {
+               SuffixChar *node;
+               int                     charlen;
+
                charlen = pg_mblen(srcchar);
 
                node = findReplaceTo(rootSuffixTree, (unsigned char *) srcchar, charlen);
                if (node && node->replaceTo)
                {
-                       if (!res)
+                       if (buf.data == NULL)
                        {
-                               /* allocate res only if it's needed */
-                               res = palloc0(sizeof(TSLexeme) * 2);
-                               res->lexeme = trgchar = palloc(len * pg_database_encoding_max_length() + 1 /* \0 */ );
-                               res->flags = TSL_FILTER;
+                               /* initialize buffer */
+                               initStringInfo(&buf);
+                               /* insert any data we already skipped over */
                                if (srcchar != srcstart)
-                               {
-                                       memcpy(trgchar, srcstart, srcchar - srcstart);
-                                       trgchar += (srcchar - srcstart);
-                               }
+                                       appendBinaryStringInfo(&buf, srcstart, srcchar - srcstart);
                        }
-                       memcpy(trgchar, node->replaceTo, node->replacelen);
-                       trgchar += node->replacelen;
-               }
-               else if (res)
-               {
-                       memcpy(trgchar, srcchar, charlen);
-                       trgchar += charlen;
+                       appendBinaryStringInfo(&buf, node->replaceTo, node->replacelen);
                }
+               else if (buf.data != NULL)
+                       appendBinaryStringInfo(&buf, srcchar, charlen);
 
                srcchar += charlen;
        }
 
-       if (res)
-               *trgchar = '\0';
+       /* return a result only if we made at least one substitution */
+       if (buf.data != NULL)
+       {
+               res = (TSLexeme *) palloc0(sizeof(TSLexeme) * 2);
+               res->lexeme = buf.data;
+               res->flags = TSL_FILTER;
+       }
+       else
+               res = NULL;
 
        PG_RETURN_POINTER(res);
 }