]> granicus.if.org Git - postgresql/commitdiff
Fix not-terribly-safe coding in NIImportOOAffixes() and NIImportAffixes().
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 7 Mar 2016 00:20:55 +0000 (19:20 -0500)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 7 Mar 2016 00:20:55 +0000 (19:20 -0500)
There were two places in spell.c that supposed that they could search
for a location in a string produced by lowerstr() and then transpose
the offset into the original string.  But this fails completely if
lowerstr() transforms any characters into characters of different byte
length, as can happen in Turkish UTF8 for instance.

We'd added some comments about this coding in commit 51e78ab4ff328296,
but failed to realize that it was not merely confusing but wrong.

Coverity complained about this code years ago, but in such an opaque
fashion that nobody understood what it was on about.  I'm not entirely
sure that this issue *is* what it's on about, actually, but perhaps
this patch will shut it up -- and in any case the problem is clear.

Back-patch to all supported branches.

src/backend/tsearch/spell.c

index 7c24c47495742e499063099776a50d7c955182d7..20097275d0fd6f60f4de4b5db0117637a6ed2cc6 100644 (file)
@@ -219,6 +219,19 @@ findchar(char *str, int c)
        return NULL;
 }
 
+static char *
+findchar2(char *str, int c1, int c2)
+{
+       while (*str)
+       {
+               if (t_iseq(str, c1) || t_iseq(str, c2))
+                       return str;
+               str += pg_mblen(str);
+       }
+
+       return NULL;
+}
+
 
 /* backward string compare for suffix tree operations */
 static int
@@ -1262,18 +1275,13 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
 
                        if (flag == 0)
                                goto nextline;
+                       /* Get flags after '/' (flags are case sensitive) */
+                       if ((ptr = strchr(repl, '/')) != NULL)
+                               aflg |= getFlagValues(Conf, getFlags(Conf, ptr + 1));
+                       /* Get lowercased version of string before '/' */
                        prepl = lowerstr_ctx(Conf, repl);
-                       /* Find position of '/' in lowercased string "prepl" */
                        if ((ptr = strchr(prepl, '/')) != NULL)
-                       {
-                               /*
-                                * Here we use non-lowercased string "repl". We need position
-                                * of '/' in "repl".
-                                */
                                *ptr = '\0';
-                               ptr = repl + (ptr - prepl) + 1;
-                               aflg |= getFlagValues(Conf, getFlags(Conf, ptr));
-                       }
                        pfind = lowerstr_ctx(Conf, find);
                        pmask = lowerstr_ctx(Conf, mask);
                        if (t_iseq(find, '0'))
@@ -1343,12 +1351,10 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
 
                if (STRNCMP(pstr, "compoundwords") == 0)
                {
-                       /* Find position in lowercased string "pstr" */
-                       s = findchar(pstr, 'l');
+                       /* Find case-insensitive L flag in non-lowercased string */
+                       s = findchar2(recoded, 'l', 'L');
                        if (s)
                        {
-                               /* Here we use non-lowercased string "recoded" */
-                               s = recoded + (s - pstr);
                                while (*s && !t_isspace(s))
                                        s += pg_mblen(s);
                                while (*s && t_isspace(s))